From bf01b8d5a605ea98a6732dd139c9a80b5ef43388 Mon Sep 17 00:00:00 2001 From: Lovesh Harchandani Date: Tue, 21 Feb 2017 13:53:42 +0530 Subject: [PATCH 001/100] Bugfix, choosing a free port to run client in a test (#70) * using the correct mechanism to choose random port * show node registry only when it is non empty and do not show the message of missing pool transaction file on startup (#68) * update help message for 'missing genesis txns file' * Revert "update help message for 'missing genesis txns file'" This reverts commit 4b15c4b8641263646be7122f7a9355f6f5ed7b19. * update help message for 'missing genesis txns file' * show help info aoubt when trigger * Revert "show help info aoubt when trigger" This reverts commit 730e74c1a68e40af73037a4771311e12723d12fa. * show 'missing genesis txn' help text only when 'connect test' is trigger * show node registery only if available Signed-off-by: Lovesh Harchandani Sign-off-executed-by: toktar Approved-at: h-master --- plenum/cli/cli.py | 9 --------- plenum/common/script_helper.py | 4 ++-- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/plenum/cli/cli.py b/plenum/cli/cli.py index 47931c48ce..263fac6b77 100644 --- a/plenum/cli/cli.py +++ b/plenum/cli/cli.py @@ -259,15 +259,6 @@ def __init__(self, looper, basedirpath, nodeReg=None, cliNodeReg=None, if nodeReg: self.print("Node registry loaded.") self.showNodeRegistry() - else: - msg = """ - The information required to connect this client to the nodes cannot be found. - This is an error. To correct the error, get the file containing genesis transactions - (the file name is `{}`) from the github repository and place it in directory - `{}`. The github url is {}.\n""".format(self.config.poolTransactionsFile, - self.config.baseDir, - self.githubUrl) - self.print(msg) self.print("Type 'help' for more information.") self.print("Running {} {}\n".format(self.properName, diff --git a/plenum/common/script_helper.py b/plenum/common/script_helper.py index 43448f4c77..a2a529d46d 100644 --- a/plenum/common/script_helper.py +++ b/plenum/common/script_helper.py @@ -1,5 +1,4 @@ import os -import random from jsonpickle import json @@ -7,6 +6,7 @@ from plenum.client.client import Client from plenum.client.wallet import Wallet from plenum.common.eventually import eventually +from plenum.common.port_dispenser import genHa from plenum.common.raet import initLocalKeep, getLocalVerKey, getLocalPubKey from plenum.common.signer_simple import SimpleSigner from plenum.common.txn import TXN_TYPE, TARGET_NYM, DATA, NODE_IP, \ @@ -250,7 +250,7 @@ def changeHA(looper, config, nodeName, nodeSeed, newNodeHA, stewardWallet.addIdentifier(signer=stewardSigner) # prepare client to submit change ha request to sovrin - randomClientPort = random.randint(9700, 9799) + _, randomClientPort = genHa() client = Client(stewardName, ha=('0.0.0.0', randomClientPort), config=config) looper.add(client) From 391c6fca763fbc92939186715de5a3f9fe235f9b Mon Sep 17 00:00:00 2001 From: Andrei Goncharov Date: Tue, 28 Mar 2017 02:03:05 +0300 Subject: [PATCH 002/100] Feature Added: Jenkins (#100) Signed-off-by: Andrei Goncharov Sign-off-executed-by: toktar Approved-at: h-master --- Jenkinsfile | 82 ++++++++++++++++++++++++++++++++++++++++++ README.md | 4 ++- ci/ubuntu.dockerfile | 26 ++++++++++++++ ci/windows.dockerfile | 50 ++++++++++++++++++++++++++ plenum/__init__.py | 9 ----- plenum/__metadata__.py | 6 +--- runner.py | 20 +++++++---- setup.py | 5 +-- 8 files changed, 178 insertions(+), 24 deletions(-) create mode 100644 Jenkinsfile create mode 100644 ci/ubuntu.dockerfile create mode 100644 ci/windows.dockerfile diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 0000000000..e5ace55ada --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,82 @@ +#!groovy + +@Library('SovrinHelpers') _ + +def name = 'plenum' + +def testUbuntu = { + try { + echo 'Ubuntu Test: Checkout csm' + checkout scm + + echo 'Ubuntu Test: Build docker image' + orientdb.start() + + def testEnv = dockerHelpers.build(name) + + testEnv.inside('--network host') { + echo 'Ubuntu Test: Install dependencies' + testHelpers.installDeps() + + echo 'Ubuntu Test: Test' + sh 'python runner.py --pytest \"python -m pytest\" --output "test-result.txt"' + } + } + finally { + echo 'Ubuntu Test: Cleanup' + orientdb.stop() + step([$class: 'WsCleanup']) + } +} + +def testWindows = { + echo 'TODO: Implement me' + + /* win2016 for now (03-23-2017) is not supported by Docker for Windows + * (Hyper-V version), so we can't use linux containers + * https://github.com/docker/for-win/issues/448#issuecomment-276328342 + * + * possible solutions: + * - use host-installed OrientDB (trying this one) + * - wait until Docker support will be provided for win2016 + */ + + //try { + // echo 'Windows Test: Checkout csm' + // checkout scm + + // echo 'Windows Test: Build docker image' + // dockerHelpers.buildAndRunWindows(name, testHelpers.installDepsWindowsCommands() + ["cd C:\\test && python -m pytest -k orientdb --junit-xml=C:\\testOrig\\$testFile"] /*testHelpers.testJunitWindowsCommands()*/) + // junit 'test-result.xml' + //} + //finally { + // echo 'Windows Test: Cleanup' + // step([$class: 'WsCleanup']) + //} +} + +def testWindowsNoDocker = { + try { + echo 'Windows No Docker Test: Checkout csm' + checkout scm + + echo 'Windows No Docker Test: drop orientdb databases' + orientdb.cleanupWindows() + + testHelpers.createVirtualEnvAndExecute({ python, pip -> + echo 'Windows No Docker Test: Install dependencies' + testHelpers.installDepsBat(python, pip) + + echo 'Windows No Docker Test: Test' + bat "${python} runner.py --pytest \"${python} -m pytest\" --output \"test-result.txt\"" + }) + } + finally { + echo 'Windows No Docker Test: Cleanup' + step([$class: 'WsCleanup']) + } +} + + + +testAndPublish(name, [ubuntu: testUbuntu, windows: testWindowsNoDocker, windowsNoDocker: testWindowsNoDocker]) diff --git a/README.md b/README.md index bb66ca23c2..a9413717a5 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,6 @@ -# Plenum Byzantine Fault Tolerant Protocol +# Plenum Byzantine Fault Tolerant Protocol + +[![Build Status](https://jenkins.evernym.com/buildStatus/icon?job=Plenum/stable)](https://jenkins.evernym.com/job/Plenum/job/stable/) Plenum makes extensive use of coroutines and the async/await keywords in Python, and as such, requires Python version 3.5.0 or later. Plenum also diff --git a/ci/ubuntu.dockerfile b/ci/ubuntu.dockerfile new file mode 100644 index 0000000000..52dfbc2da9 --- /dev/null +++ b/ci/ubuntu.dockerfile @@ -0,0 +1,26 @@ +# Development +FROM ubuntu:16.04 + +ARG uid=1000 + +# Install environment +RUN apt-get update -y +RUN apt-get install -y \ + git \ + wget \ + python3.5 \ + python3-pip \ + python-setuptools \ + python3-nacl +RUN pip3 install -U \ + pip \ + setuptools \ + virtualenv +RUN useradd -ms /bin/bash -u $uid sovrin +USER sovrin +RUN virtualenv -p python3.5 /home/sovrin/test +USER root +RUN ln -sf /home/sovrin/test/bin/python /usr/local/bin/python +RUN ln -sf /home/sovrin/test/bin/pip /usr/local/bin/pip +USER sovrin +WORKDIR /home/sovrin diff --git a/ci/windows.dockerfile b/ci/windows.dockerfile new file mode 100644 index 0000000000..2f52f2610a --- /dev/null +++ b/ci/windows.dockerfile @@ -0,0 +1,50 @@ +FROM microsoft/windowsservercore + +LABEL Description="plenum" Vendor="Evernym" + +# Install environment + +# Python +RUN powershell.exe -Command \ + $ErrorActionPreference = 'Stop'; \ + wget https://www.python.org/ftp/python/3.5.1/python-3.5.1.exe -OutFile c:\python-3.5.1.exe; \ + Start-Process c:\python-3.5.1.exe -ArgumentList '/quiet InstallAllUsers=1 PrependPath=1' -Wait; \ + Remove-Item c:\python-3.5.1.exe -Force + +# Chocolate +RUN powershell.exe -Command \ + $ErrorActionPreference = 'Stop'; \ + "iwr https://chocolatey.org/install.ps1 -UseBasicParsing | iex" + +# Git (using Chocolate) +RUN powershell.exe -Command \ + $ErrorActionPreference = 'Stop'; \ + choco install git -ArgumentList '-params /GitAndUnixTllsOnPath' -y -Wait + +# PIP deps +RUN powershell.exe -Command \ + $ErrorActionPreference = 'Stop'; \ + pip install -U pip pytest + + +# MS Visual C++ Build Tools (using Chocolate) +RUN powershell.exe -Command \ + $ErrorActionPreference = 'Stop'; \ + choco install microsoft-build-tools -y -Wait + + +# unzip using chocolate +RUN powershell.exe -Command \ + $ErrorActionPreference = 'Stop'; \ + choco install unzip -y -Wait + + +# orientdb +#RUN powershell.exe -Command \ +# $ErrorActionPreference = 'Stop'; \ +# wget http://mkt.orientdb.com/CE-2217-windows -OutFile c:\orientdb-community.zip; \ +# unzip c:\orientdb-community.zip +# Start-Process c:\python-3.5.1.exe -ArgumentList '/quiet InstallAllUsers=1 PrependPath=1' -Wait; \ +# Remove-Item c:\orientdb-community.zip -Force; \ +# Remove-Item orientdb-community* -Force; \ + diff --git a/plenum/__init__.py b/plenum/__init__.py index 452f8a2627..3c32ce7d20 100644 --- a/plenum/__init__.py +++ b/plenum/__init__.py @@ -6,14 +6,5 @@ import sys -import plenum -from plenum.common.pkg_util import check_deps - if sys.version_info < (3, 5, 0): raise ImportError("Python 3.5.0 or later required.") - - -import importlib -from .__metadata__ import * - -check_deps(plenum) diff --git a/plenum/__metadata__.py b/plenum/__metadata__.py index 77a47a0a1f..a397681c3b 100644 --- a/plenum/__metadata__.py +++ b/plenum/__metadata__.py @@ -1,13 +1,9 @@ """ plenum package metadata """ -__version_info__ = (0, 2) +__version_info__ = (0, 3) __version__ = '.'.join(map(str, __version_info__)) __author__ = "Evernym, Inc." __license__ = "Apache 2.0" __all__ = ['__version_info__', '__version__', '__author__', '__license__'] - -__dependencies__ = { - "ledger": ">=0.0.34" -} diff --git a/runner.py b/runner.py index 779756154b..4f65534bec 100644 --- a/runner.py +++ b/runner.py @@ -1,12 +1,13 @@ import os import re import sys +import argparse -def run(): - log("Preparing test suite") +def run(pytest, output_file): + log("Preparing test suite with {}".format(pytest)) testListFile = "test_list.txt" - os.system('pytest --collect-only > {}'.format(testListFile)) + os.system('{} --collect-only > {}'.format(pytest, testListFile)) log("Reading collected modules file") collectedData = open(testListFile).read() os.remove(testListFile) @@ -37,7 +38,7 @@ def run(): for test in testList: # testRep = '{}.rep'.format(test.split("/")[-1]) log("Going to run {}".format(test)) - r = os.system('pytest -k "{}" > {}'.format(test, testRep)) + r = os.system('{} -k "{}" > {}'.format(pytest, test, testRep)) reportLines = open(testRep).readlines() output = ''.join(reportLines) pas = passPat.search(output) @@ -110,9 +111,9 @@ def run(): for fm, fn in allErrorTests: log('{}:{}'.format(fm, fn)) - if failureData: + if failureData and output_file: log("Writing failure data in Test-Report.txt") - with open('../Test-Report.txt', 'w') as f: + with open(output_file, 'w') as f: f.write(summaryMsg) f.write(''.join(failureData)) @@ -128,5 +129,10 @@ def log(msg): if __name__ == "__main__": - r = run() + parser = argparse.ArgumentParser() + parser.add_argument('--pytest', type=str, help='pytest instance', default='pytest') + parser.add_argument('--output', type=str, help='result file', default='../Test-Report.txt') + parser.add_argument('--nooutput', help='no result file', action="store_true") + args = parser.parse_args() + r = run(pytest=args.pytest, output_file=args.output if not args.nooutput else None) sys.exit(0 if r == 0 else 1) diff --git a/setup.py b/setup.py index ff2ba62300..fbc91626e3 100644 --- a/setup.py +++ b/setup.py @@ -61,8 +61,9 @@ data_files=[( (BASE_DIR, ['data/pool_transactions_sandbox', ]) )], - install_requires=['raet', 'jsonpickle', 'portalocker==0.5.7', - 'prompt_toolkit==0.57', 'pyorient', 'pygments', 'ledger', + install_requires=['ledger', + 'raet', 'jsonpickle', 'portalocker==0.5.7', + 'prompt_toolkit==0.57', 'pyorient', 'pygments', 'ioflo==1.5.4', 'semver', 'base58', 'orderedset', 'sortedcontainers', 'psutil'], extras_require={ From 31ff5697dcf137d1fdb2b90ad353b1663182e340 Mon Sep 17 00:00:00 2001 From: Andrei Goncharov Date: Tue, 28 Mar 2017 12:14:28 +0300 Subject: [PATCH 003/100] Hotfix: Deps (#102) Signed-off-by: Andrei Goncharov Sign-off-executed-by: toktar Approved-at: h-master --- plenum/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/plenum/__init__.py b/plenum/__init__.py index 3c32ce7d20..9b81930b04 100644 --- a/plenum/__init__.py +++ b/plenum/__init__.py @@ -5,6 +5,10 @@ from __future__ import absolute_import, division, print_function import sys +import plenum if sys.version_info < (3, 5, 0): raise ImportError("Python 3.5.0 or later required.") + +import importlib +from .__metadata__ import * From fe689750299acea2ae1f86ffbfc48c4fcaefd1e6 Mon Sep 17 00:00:00 2001 From: Andrei Goncharov Date: Thu, 6 Apr 2017 14:20:30 +0300 Subject: [PATCH 004/100] Versioning fix (#112) Signed-off-by: Andrei Goncharov Sign-off-executed-by: toktar Approved-at: h-master --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e9d993307e..a46ef33cd5 100644 --- a/setup.py +++ b/setup.py @@ -61,7 +61,7 @@ data_files=[( (BASE_DIR, ['data/pool_transactions_sandbox', ]) )], - install_requires=['ledger', + install_requires=['ledger==0.2.2', 'raet', 'jsonpickle', 'portalocker==0.5.7', 'prompt_toolkit==0.57', 'pyorient', 'pygments', 'ioflo==1.5.4', 'semver', 'base58', 'orderedset', From 46b1c21fa28b3cd2fd902536371d529a8327e37f Mon Sep 17 00:00:00 2001 From: Andrei Goncharov Date: Fri, 21 Apr 2017 20:11:23 +0300 Subject: [PATCH 005/100] Stable build (#146) * proposed abstraction for network interface * initial commit for zstack * test in progress * fixing problem with verify key location * change in remote * test passing in promiscous mode * adding linger time but it does not help * using disconnect on remote and changing from old style to new style coroutines * completing test * comments * removed sleeps * refactoring tests and KITZStack * making basic node tests work * fixing tests in test_node_connection.py * changes for pool transactions * more changes * fixing more tests * fixing more tests * adding whitelist message * adding whitelist message for node catchup tests * added ELEC tag to logs for troubleshooting * fixing a bug where a node with a primary was nominating itself during re-elections * in the middle * patching non-test node and client objects to make script tests work * added dummy placeholders to allow tests to run until abstraction of networking is complete * all tests except one work * changes in scripts * changes for endpoint stack * commenting a log message which makes some tests run slow * fixes in scripts * not printing some logs on CLI * creating node params file * digest recalculation preventaion * checking for port in use for both tcp and udp * ensured new log entries didn't show up in CLI * printing exception with the log * beginnig to abolish key sharing * changes for key sharing * fixing a bug with req reply store and some tests * log messages * fixing logs * removing accidentally committed code * add pattern for hidden files to gitignore * adding disconnect detection * fixing bug for 3 phase messages prior to stable checkpoint * 'gc'ing requests only when checkpoint achived on protocol instances * fixing bug * resolving some race conditions around checkpoints * add option for supresing stdout logging * removing linger and handling exception while sending message through listener * closing auth, stopping prodables when logging error results in BlowUp and upgrating tests * Consistency proof request should specify the target seqNo * skipping a test and raising timeout * skipping test * skipping test * skipping test * skipping test * initial refactoring to move secure transport abstraction into a different project * minor changes * moved some modules to stp * moving node and client stacks * fixed some improper imports * more changes * fixed keystroke error * not calling sorted container's method from a newer api, resolving a bug in catchup when transactions are requested after timeout * [SOV-768] Fixed port not available exception getting ignored and tests failing on windows and linux based system * Removed unused import * flexible timeout and catching exception * Fixed error for port not available exception on windows * Removed extra lines at the end of exceptions file * Moved common logic related to transport to stp * using new names from stp * changes for stp * removing redundant close in orientdb socket * adding tests for catchup scenario and 3 phase message outside water mark * fixing import in test * Fix 'infinit' looping on receiving messages when their amount is very high (#97) * fix 'infinit' looping on receiving messages when their amount is very high * move quotas to config * make zstack get params from config only once * add test for quota * increase message quotas to 100 * Feature Added: Jenkins (#99) * Moved util methods to NetwrorkInterfaces; fixes in raet stack * fix key generation * Hotfix: Deps (#101) * - Added a custom jsonpickle handler for enums to avoid the issue https://github.com/jsonpickle/jsonpickle/issues/135 in jsonpickle 0.9.2 which is the canonical version for Ubuntu 16.04. (#98) - Corrected Cli.restoreWalletByPath method. * Merge branches 'stp' and 'zmq' of github.com:evernym/plenum into stp * handling infinite loop problem and fixing test delay * fixing import * bugfix in test * raising timeout * merge * change in ordered * move connection related exceptions to stp project * fix imports of moved exceptions * fixed dependencies * temporarily disabling tests on Windows * renamed init_plenum_raet_keep to init_plenum_keys * removing unused code and changing script mode * Introduced failing of the pytets session in case any unexpected warnings have been made (#104) * changes to troubleshoot testing for coro not awaited * added support for warning checking across all tests * fixed scope for warnfilters fixture * - Updated the warnings filter. Made the rules for ResourceWarning: "unclosed file", ResourceWarning: "unclosed.*socket\.socket" and RuntimeWarning: "coroutine.*was never awaited" global since they are made by the garbage collector in unpredictable points. - Sorted the rules in the warnings filter alphabetically. * Specialized the warnings filter rule for RuntimeWarning about a coroutine was never awaited (to "msgAll" coroutine). * Added an explicit call of the garbage collector to the tear-down phase of "warncheck" fixture in order to ensure that warnings will be generated and recorded for all the unclosed resources. * Fixed the issue with that a coroutine "msgAll" object was never awaited. Removed the corresponding "ignore" rule from the warnings filter. * - Removed the rules not being actuated currently from the warnings filter. - Replaced use of line numbers with use of message patterns in the warnings filter rules. * Corrected the message pattern for a rule from the warnings filter. * Added an "ignore" rule for ResourceWarning: "unclosed event loop" to the warnings filter. * Returned back the warnings filter rules for DeprecationWarnings in jsonpickle package. Replaced use of line numbers with use of message patterns in these rules. * adding warning * removing looper and checking while closing orientdb * increasing a timeout * replacing warn with warning methods * fixed stp dependency * fixed tests * skip failing test, created SOV-881. * changing logs to print alias * setting resource limit for tests * handling exception while raising resource limits * moved wallet persistence and restoration logic to helper methods so that it can be re-utilized from different places, some other minor refactoring (#106) * moved wallet persistence and restoration logic to helper methods so that it can be re-utilized from different places, some other minor refactoring * removed unwanted error code, refactored list keyrings command so that it correctly works for agent cli * removed unused imports * making CLI accept hex seed of size 64 * move logging to stp repo * mitigating key error in pool manager * changing timeouts so the diconnect check happens twice * handling unavailabily of resource module * adding timeout to test utility method * Versioning fix (#113) * [Closes SOV-905] Big Fixed: Orientdb is not installing with the latest build 0.3.16 * moving error codes to stp * upgrading version of stp * Stp (#116) * use common stp interface for changing of node's HA * get rid of explicitly calling zmq and raet; move all transport stuff into stp interfaces. * fix tests; increment stp version * add AuthMode enum instead of auto parameter * fixed testConnectWithoutKeySharingFails test * increased test timeout * Plenum test runner fix (#115) * Fixed a bug in plenum test runner with an incorrect test results summary pattern. Previously it had been expected that the line with the test results summary must begin with equality signs and end with them. But this line is padded with equality signs only if it is shorter than 80 characters. Now the test results summary patterns don't require equality signs. Instead they may match only the last line in the file. * Corrected test results summary patterns in plenum test runner. * fix import of OP_FIELD_NAME * up version of stp to 0.1.24 * Agent issuer wallet restoration (#117) * supplied keys parameter as True to wallet encoding and decoding functions so that dictionary keys which are custom objects restore properly, otherwise it used to be restored as string oppossed to actual object it was before persistence * minor changes in list keyrings to show issuer wallet for agent as well * minor change to print base wallet first and then any other related wallets * up stp version to 0.1.26 * skip tests in test_new_node_catchup.py * scheduling primary selection * Skip failing tests (#122) * skeip test testChangeNodeHaForNonPrimary due to SOV-941 * skip test testProtocolInstanceCannotBecomeActiveWithLessThanFourServers due to SOV-940 * parametrize test timeouts (#120) * rename expectedWait to expectedNodeInterconnectionTime * add signature for other time expect functions * use named timeouts in conftest * move totalConnections from waits to utils * use named timeout in ensureElectionsDone * use float instead of int for seconds * use default args of ensureElectionsDone where it is possible * use named argument for timeouts * use named timeout in helper * use names for parameters * rename 'timeoutPerReq' of checkSufficientRepliesForRequests and sendReqsToNodesAndVerifySuffReplies to 'customTimeoutPerRequest' to emphasize the fact that there is a default one * use default timeout instead of custom timeout of checkSufficientRepliesForRequests and sendReqsToNodesAndVerifySuffReplies where it can be used; use named argument for timeouts * add comments for two functions with similar names * rename checkSufficientRepliesRecvd to checkSufficientRepliesReceived * rename checkSufficientRepliesForRequests to waitForSufficientRepliesForRequests * add 'returns' to docstrings * fix passing of args to waits.expectedElectionTimeout * use waitForSufficientRepliesForRequests and it's default timeout instead of checkSufficientRepliesReceived everywhere it is possible * update doc lines * create waitForViewChange and use it in ensureView * replace usages of checkViewNoForNodes with eventually by waitForViewChange * add waits.expectedNodeToNodeMessageDeliveryTime and use it in sendMsgAndCheck * rename checkMsg to checkMessageReceived * rename sendMsgAndCheck to sendMessageAndCheckDelivery * add docstring to sendMessageAndCheckDelivery * remove unused helper function * add expectedGetReadyTimeout and use it in checkPoolReady * rename overrideTimeout parameter to customTimeout in checkNodesConnected * use default timeout of checkNodesConnected * create expectedPoolLedgerCheck and expectedClientConnectionTimeout waits and use them * add todo for ensureDisconnectedToNodes * update waits.expectedPoolLedgerCheck * add todo for checkNodesParticipating * add requestIds parameter for waitForSufficientRepliesForRequests * update docstring of waitForSufficientRepliesForRequests * fix waitForSufficientRepliesForRequests * remove unused imports from test_log_filtering * use named timeout in test_status_command * use waits.expectedTransactionExecutionTime in testTransactions * refactor testTransactions * use waitRequestSuccess and waitBalanceChange in test_cli_with_auction_plugin, move them to test.cli.helper * use named timeout in test_basic_client_commands * use named timeout in helper.checkRequest * create waitClientConnected and use it instead of checkClientConnected with eventually * add docstrings * rename checkNodeStarted to waitNodeStarted and use named timeout 'expectedNodeStartUpTimeout' in it * rename expectedGetReadyTimeout to expectedPoolGetReadyTimeout * rename checkAllNodesStarted to waitAllNodesStarted * fix default value of customTimeout of checkPoolReady * create waitAllNodesUp and use it instead of checkAllNodesUp with eventually * create waitReplyCount and use instead of checkReplyCount and eventually * use named timeouts in test_client * use named timeouts in some more tests * add basic implementation for waits.expectedCatchupTime and use it * make expectedCatchupTime get custom ConsistencyProofsTimeout as parameter * use named timeout in testNodeDiscardMessageFromUnknownView * use waits.expectedElectionTimeout and rename timeout arg to custonTimeout in checkEveryNodeHasAtMostOnePrimary * rename timeout argument of plenum/test/node_catchup/test_discard_view_no.py to customTimeout and make it used named timeouts from waits as default * update timeouts in testNodeDiscardMessageFromUnknownView * create waits.expectedRequestStashingTime and use it * add TODO to test_catchup_delayed_nodes * create waitNodeLedgersEquality and use it instead of directo usage of checkNodeLedgersEquality * use waits.expectedPoolLedgerCheck in waitNodeLedgersEquality * use named timeout in testOrderingCase2 * add waits.expectedNominationTimeout and use it * use named timeout in some more tests * add missing empty lines * update waits * add 'TODO[slow-factor]' * update timeouts in the tests * fix testTestNodeDelay and missed import * skip testMultipleRequests test * skip testStatusAfterClientAdded test * fix testInstChangeWithLowerRatioThanDelta * fix test_new_node_catchup.py * fix testChangeHaPersistsPostNodesRestart * fix testAdd2NewNodes * increase expectedElectionTimeout timeout * rename logger.warn => logger.warning * tune timeouts in the tests * make sections in waits.py * add --repat for runner.py * increase expectedCatchupTime timeout * improve runner.py * tune the test timeouts * skip some catchup tests * parametrize test timeouts * rm eventually.py * fix testQueueingReqFromFutureView, testNumOfPrePrepareWithFPlusOneFaults, testNumOfPrepareWithFPlusOneFaults * fix testProtocolInstanceCannotBecomeActiveWithLessThanFourServers * tune propagate and preprepare test timeouts * skip testNumOf*WithFPlusOneFaults * fixed plenum for the latest stp (#127) * fixed plenum for the latest stp * increment stp version * archive runner.py output for all agents (#107) * archive runner.py results * using env variable NODE_NAME instaed of func param for artifacts * configured archiveArtifacts to allow empty/no archive * Do view change if a primary is disconnected (#128) * check whether function is a partial function in getCallableName * add tests for view change when primary goes down * start view change if primary went offline * use startViewChangeIfPrimaryWentOffline instead of doElectionIfNeeded * Unclosed file warnings (#124) * Removed "ignore" rule for ResourceWarning about an unclosed file from the warnings filter. * Fixed some causes of ResourceWarnings about unclosed files. * - Fixed some causes of ResourceWarnings about unclosed files. - Corrected plenum.common.txn_util.updateGenesisPoolTxnFile function. * - Fixed the rest causes of ResourceWarnings about unclosed files. - Removed TimeAndSizeRotatingFileHandler class which is not used anymore (the class with the same name from stp is now used instead). * Updated stp-dev dependency to the new version. * Reverted update of stp-dev dependency. * Skipped the tests in plenum.test.test_log_rotation module since they require stp-dev dependency of a newer version (0.1.28 or higher). * build * build * build * build Signed-off-by: Andrei Goncharov Sign-off-executed-by: toktar Approved-at: h-master --- .gitignore | 3 + .venv/lib64 | 1 + Jenkinsfile | 19 +- README.md | 8 +- examples/new_client.py | 2 +- examples/simple_client.py | 2 +- examples/simple_node.py | 2 +- examples/stack_message_loss.py | 83 -- plenum/__init__.py | 3 + plenum/cli/__main__.py | 2 +- plenum/cli/cli.py | 279 +++--- plenum/cli/command.py | 13 +- plenum/cli/constants.py | 2 +- plenum/cli/helper.py | 2 +- plenum/client/client.py | 71 +- plenum/client/pool_manager.py | 10 +- plenum/client/wallet.py | 18 +- plenum/common/batched.py | 117 +++ plenum/common/config_util.py | 1 - plenum/common/crypto.py | 36 - plenum/common/did_method.py | 2 +- plenum/common/error.py | 2 +- plenum/common/error_codes.py | 1 - plenum/common/eventually.py | 155 --- plenum/common/exceptions.py | 47 +- plenum/common/jsonpickle_util.py | 29 + plenum/common/keygen_utils.py | 51 + plenum/common/ledger_manager.py | 54 +- plenum/common/log.py | 238 ----- .../logging/TimeAndSizeRotatingFileHandler.py | 41 - plenum/common/looper.py | 302 ------ plenum/common/message_processor.py | 61 ++ plenum/common/motor.py | 4 +- plenum/common/pkg_util.py | 2 +- plenum/common/plugin_helper.py | 6 +- plenum/common/port_dispenser.py | 76 -- plenum/common/raet.py | 159 ---- plenum/common/ratchet.py | 91 -- plenum/common/request.py | 38 +- plenum/common/script_helper.py | 31 +- plenum/common/signer.py | 23 - plenum/common/signer_did.py | 11 +- plenum/common/signer_simple.py | 5 +- plenum/common/signing.py | 4 +- plenum/common/stack_manager.py | 35 +- plenum/common/stacked.py | 898 ------------------ plenum/common/stacks.py | 138 +++ plenum/common/test_network_setup.py | 55 +- plenum/common/throttler.py | 2 +- plenum/common/transaction_store.py | 2 +- plenum/common/txn_util.py | 52 +- plenum/common/types.py | 19 +- plenum/common/util.py | 224 ++--- plenum/common/verifier.py | 2 +- plenum/config.py | 11 +- .../persistence/client_req_rep_store_file.py | 50 +- plenum/persistence/client_txn_log.py | 3 + plenum/persistence/orientdb_graph_store.py | 2 +- plenum/persistence/orientdb_hash_store.py | 5 +- plenum/persistence/orientdb_store.py | 13 +- plenum/server/client_authn.py | 4 +- plenum/server/has_action_queue.py | 3 +- plenum/server/monitor.py | 28 +- plenum/server/node.py | 230 +++-- plenum/server/notifier_plugin_manager.py | 2 +- .../plugin_firebase_stats_consumer.py | 2 +- .../plugin/stats_consumer/stats_publisher.py | 2 +- plenum/server/plugin_loader.py | 2 +- plenum/server/pool_manager.py | 42 +- plenum/server/primary_decider.py | 2 +- plenum/server/primary_elector.py | 106 +-- plenum/server/primary_selector.py | 11 +- plenum/server/propagator.py | 18 +- plenum/server/replica.py | 210 ++-- .../test/blacklist/test_blacklist_client.py | 6 +- ..._blacklist_node_on_multiple_nominations.py | 6 +- ...t_node_on_multiple_primary_declarations.py | 6 +- plenum/test/checkpoints/conftest.py | 6 +- plenum/test/checkpoints/helper.py | 5 +- .../checkpoints/test_basic_checkpointing.py | 9 +- .../test_discard_old_checkpoint_messages.py | 5 +- .../test_message_outside_watermark.py | 58 ++ .../test_message_outside_watermark1.py | 36 + .../checkpoints/test_stable_checkpoint.py | 14 +- .../checkpoints/test_stable_checkpoint1.py | 24 + plenum/test/cli/conftest.py | 24 +- plenum/test/cli/helper.py | 105 +- plenum/test/cli/test_basic_client_commands.py | 7 +- plenum/test/cli/test_basic_node_commands.py | 4 +- plenum/test/cli/test_cli_client_ip_port.py | 8 +- plenum/test/cli/test_cli_startup.py | 6 +- .../cli/test_cli_with_auction_req_plugin.py | 130 ++- .../test/cli/test_cli_with_bank_req_plugin.py | 87 +- plenum/test/cli/test_log_filtering.py | 16 +- .../test/cli/test_save_and_restore_wallet.py | 7 +- plenum/test/cli/test_status_command.py | 25 +- plenum/test/client/test_client.py | 157 ++- .../test/client/test_client_request_nack.py | 7 +- plenum/test/client/test_client_retry.py | 74 +- plenum/test/common/test_throttler.py | 6 +- plenum/test/conftest.py | 216 +++-- plenum/test/helper.py | 246 +++-- plenum/test/instances/test_commit_digest.py | 12 +- ...come_active_with_less_than_four_servers.py | 27 +- .../test_msgs_from_slow_instances.py | 8 +- plenum/test/instances/test_multiple_commit.py | 12 +- .../test_multiple_instance_change_msgs.py | 20 +- .../instances/test_multiple_pre_prepare.py | 20 +- .../test/instances/test_multiple_prepare.py | 20 +- .../test/instances/test_pre_prepare_digest.py | 12 +- plenum/test/instances/test_prepare_digest.py | 12 +- plenum/test/malicious_behaviors_client.py | 2 +- plenum/test/malicious_behaviors_node.py | 8 +- plenum/test/monitoring/conftest.py | 10 +- plenum/test/monitoring/test_avg_latency.py | 13 +- .../test_instance_change_with_Delta.py | 34 +- .../test_instance_change_with_req_Lambda.py | 18 +- .../test_monitoring_params_with_zfn.py | 2 +- .../monitoring/test_post_monitoring_stats.py | 25 +- plenum/test/monitoring/test_throughput.py | 11 +- plenum/test/node_catchup/conftest.py | 45 +- plenum/test/node_catchup/helper.py | 37 +- .../test_catchup_delayed_nodes.py | 23 +- .../node_catchup/test_catchup_scenarios.py | 11 +- ...test_catchup_while_new_request_incoming.py | 53 ++ .../test/node_catchup/test_discard_view_no.py | 26 +- .../node_catchup/test_new_node_catchup.py | 39 +- ..._node_reject_invalid_txn_during_catchup.py | 27 +- .../test_node_request_consistency_proof.py | 30 +- .../test_node_request_missing_transactions.py | 20 +- .../test/node_request/node_request_helper.py | 30 +- ...st_num_of_commit_with_f_plus_one_faults.py | 12 +- ..._ordering_when_pre_prepare_not_received.py | 15 +- .../test_order/test_request_ordering_1.py | 11 +- .../test_order/test_request_ordering_2.py | 18 +- .../test_non_primary_sends_a_pre_prepare.py | 12 +- ...m_of_pre_prepare_with_f_plus_one_faults.py | 34 +- ...st_primary_sends_preprepare_of_high_num.py | 15 +- ...t_num_of_prepare_with_f_plus_one_faults.py | 21 +- .../test_num_of_sufficient_prepare.py | 2 +- .../plugin_auction_req_processor.py | 2 +- .../plugin_bank_req_processor.py | 2 +- .../test_auction_req_processor_plugin.py | 19 +- .../test_auction_req_validation_plugin.py | 7 +- .../plugin/test_bank_req_processor_plugin.py | 29 +- .../plugin/test_bank_req_validation_plugin.py | 14 +- plenum/test/pool_transactions/conftest.py | 12 +- plenum/test/pool_transactions/helper.py | 54 +- .../pool_transactions/test_adding_stewards.py | 9 +- ...t_change_ha_persists_post_nodes_restart.py | 16 +- .../test_client_change_ha.py | 5 +- .../test_client_with_pool_txns.py | 32 +- .../test_multiple_clients.py | 41 + .../test_nodes_with_pool_txns.py | 70 +- .../pool_transactions/test_suspend_node.py | 13 +- .../test_primary_election_case1.py | 15 +- .../test_primary_election_case2.py | 10 +- .../test_primary_election_case4.py | 9 +- .../test_primary_election_case5.py | 4 +- .../test_primary_election_contested.py | 20 +- ...test_primary_election_with_clear_winner.py | 14 +- .../test_primary_election_with_tie.py | 22 +- .../primary_election/test_primary_forfeit.py | 2 +- .../test_primary_selection.py | 13 +- .../test_propagate_recvd_after_request.py | 13 +- .../test_propagate_recvd_before_request.py | 20 +- plenum/test/raet/__init__.py | 0 plenum/test/raet/helper.py | 45 - plenum/test/raet/test_communication.py | 198 ---- .../test/raet/test_raet_comm_with_one_key.py | 103 -- ...y_marked_suspicious_for_sending_prepare.py | 7 +- .../test_replica_reject_same_pre_prepare.py | 25 +- plenum/test/script/helper.py | 23 +- .../script/test_add_unregistered_remote.py | 55 -- .../test/script/test_bootstrap_test_node.py | 2 +- .../script/test_change_non_primary_node_ha.py | 6 +- .../script/test_change_primary_node_ha.py | 7 +- plenum/test/signing/test_signing.py | 8 +- plenum/test/stasher.py | 2 +- plenum/test/storage/helper.py | 6 +- .../test/storage/test_orientdb_hash_store.py | 1 + plenum/test/test_action_queue.py | 35 + plenum/test/test_bootstrapping.py | 37 +- plenum/test/test_client.py | 32 +- .../test_connections_with_converted_key.py | 28 +- plenum/test/test_crypto.py | 5 +- plenum/test/test_delay.py | 61 +- plenum/test/test_log_rotation.py | 5 +- plenum/test/test_memory_consumpion.py | 17 +- plenum/test/test_node.py | 182 ++-- plenum/test/test_node_basic.py | 26 +- plenum/test/test_node_connection.py | 156 +-- plenum/test/test_node_request.py | 82 +- plenum/test/test_port_conflicts.py | 20 +- .../test_round_trip_with_one_faulty_node.py | 2 +- plenum/test/test_stack.py | 95 +- plenum/test/test_testable.py | 2 +- plenum/test/test_util.py | 3 +- plenum/test/test_verif_merkle_proof.py | 7 +- plenum/test/testable.py | 10 +- plenum/test/testing_utils.py | 4 +- ...st_discard_inst_chng_msg_from_past_view.py | 12 +- .../test_elections_after_view_change.py | 14 +- .../test_instance_change_msg_checking.py | 6 +- .../test_queueing_req_from_future_view.py | 38 +- plenum/test/view_change/test_view_change.py | 13 +- ..._changes_if_backup_primary_disconnected.py | 32 + ..._changes_if_master_primary_disconnected.py | 32 + .../test/view_change/test_view_not_changed.py | 4 +- plenum/test/waits.py | 129 ++- plenum/test/wallet/test_wallet.py | 3 +- .../logging => test/zstack_tests}/__init__.py | 0 .../zstack_tests/test_zstack_reconnection.py | 62 ++ runner.py | 183 ++-- scripts/gen_node | 25 +- scripts/gen_steward_key | 5 +- scripts/generate_plenum_pool_transactions | 4 +- scripts/get_keys | 4 +- ...init_plenum_raet_keep => init_plenum_keys} | 10 +- scripts/plenum | 6 +- scripts/start_plenum_node | 6 +- setup.py | 8 +- tutorial/tutorial.py | 23 +- 223 files changed, 3930 insertions(+), 4780 deletions(-) create mode 120000 .venv/lib64 delete mode 100644 examples/stack_message_loss.py create mode 100644 plenum/common/batched.py delete mode 100644 plenum/common/crypto.py delete mode 100644 plenum/common/error_codes.py delete mode 100644 plenum/common/eventually.py create mode 100644 plenum/common/jsonpickle_util.py create mode 100644 plenum/common/keygen_utils.py delete mode 100644 plenum/common/log.py delete mode 100644 plenum/common/logging/TimeAndSizeRotatingFileHandler.py delete mode 100644 plenum/common/looper.py create mode 100644 plenum/common/message_processor.py delete mode 100644 plenum/common/port_dispenser.py delete mode 100644 plenum/common/raet.py delete mode 100644 plenum/common/ratchet.py delete mode 100644 plenum/common/signer.py delete mode 100644 plenum/common/stacked.py create mode 100644 plenum/common/stacks.py create mode 100644 plenum/test/checkpoints/test_message_outside_watermark.py create mode 100644 plenum/test/checkpoints/test_message_outside_watermark1.py create mode 100644 plenum/test/checkpoints/test_stable_checkpoint1.py create mode 100644 plenum/test/node_catchup/test_catchup_while_new_request_incoming.py create mode 100644 plenum/test/pool_transactions/test_multiple_clients.py delete mode 100644 plenum/test/raet/__init__.py delete mode 100644 plenum/test/raet/helper.py delete mode 100644 plenum/test/raet/test_communication.py delete mode 100644 plenum/test/raet/test_raet_comm_with_one_key.py delete mode 100644 plenum/test/script/test_add_unregistered_remote.py create mode 100644 plenum/test/test_action_queue.py create mode 100644 plenum/test/view_change/test_view_changes_if_backup_primary_disconnected.py create mode 100644 plenum/test/view_change/test_view_changes_if_master_primary_disconnected.py rename plenum/{common/logging => test/zstack_tests}/__init__.py (100%) create mode 100644 plenum/test/zstack_tests/test_zstack_reconnection.py rename scripts/{init_plenum_raet_keep => init_plenum_keys} (86%) mode change 100644 => 100755 scripts/start_plenum_node diff --git a/.gitignore b/.gitignore index a60e626e78..8f7c08978f 100644 --- a/.gitignore +++ b/.gitignore @@ -65,3 +65,6 @@ include/ # generated doc files docs/source/api_docs/ + +# hidden files +.* diff --git a/.venv/lib64 b/.venv/lib64 new file mode 120000 index 0000000000..7951405f85 --- /dev/null +++ b/.venv/lib64 @@ -0,0 +1 @@ +lib \ No newline at end of file diff --git a/Jenkinsfile b/Jenkinsfile index e5ace55ada..7bac441e32 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -19,7 +19,13 @@ def testUbuntu = { testHelpers.installDeps() echo 'Ubuntu Test: Test' - sh 'python runner.py --pytest \"python -m pytest\" --output "test-result.txt"' + def resFile = "test-result.${NODE_NAME}.txt" + try { + sh "python runner.py --pytest \"python -m pytest\" --output \"$resFile\"" + } + finally { + archiveArtifacts allowEmptyArchive: true, artifacts: "$resFile" + } } } finally { @@ -68,7 +74,13 @@ def testWindowsNoDocker = { testHelpers.installDepsBat(python, pip) echo 'Windows No Docker Test: Test' - bat "${python} runner.py --pytest \"${python} -m pytest\" --output \"test-result.txt\"" + def resFile = "test-result.${NODE_NAME}.txt" + try { + bat "${python} runner.py --pytest \"${python} -m pytest\" --output \"$resFile\"" + } + finally { + archiveArtifacts allowEmptyArchive: true, artifacts: "$resFile" + } }) } finally { @@ -79,4 +91,5 @@ def testWindowsNoDocker = { -testAndPublish(name, [ubuntu: testUbuntu, windows: testWindowsNoDocker, windowsNoDocker: testWindowsNoDocker]) +//testAndPublish(name, [ubuntu: testUbuntu, windows: testWindowsNoDocker, windowsNoDocker: testWindowsNoDocker]) +testAndPublish(name, [ubuntu: testUbuntu]) diff --git a/README.md b/README.md index a9413717a5..36023244fa 100644 --- a/README.md +++ b/README.md @@ -106,19 +106,19 @@ source /bin/activate ### Initializing Keep ``` -init_plenum_raet_keep --name Alpha --seeds 000000000000000000000000000Alpha Alpha000000000000000000000000000 --force +init_plenum_keys --name Alpha --seeds 000000000000000000000000000Alpha Alpha000000000000000000000000000 --force ``` ``` -init_plenum_raet_keep --name Beta --seeds 0000000000000000000000000000Beta Beta0000000000000000000000000000 --force +init_plenum_keys --name Beta --seeds 0000000000000000000000000000Beta Beta0000000000000000000000000000 --force ``` ``` -init_plenum_raet_keep --name Gamma --seeds 000000000000000000000000000Gamma Gamma000000000000000000000000000 --force +init_plenum_keys --name Gamma --seeds 000000000000000000000000000Gamma Gamma000000000000000000000000000 --force ``` ``` -init_plenum_raet_keep --name Delta --seeds 000000000000000000000000000Delta Delta000000000000000000000000000 --force +init_plenum_keys --name Delta --seeds 000000000000000000000000000Delta Delta000000000000000000000000000 --force ``` Note: Seed can be any randomly chosen 32 byte value. It does not have to be in the format `00..`. diff --git a/examples/new_client.py b/examples/new_client.py index 143f9820f8..f5dbc1fe69 100644 --- a/examples/new_client.py +++ b/examples/new_client.py @@ -11,7 +11,7 @@ #TODO Remove this file, just for now from plenum.client.client import Client -from plenum.common.looper import Looper +from stp_core.loop.looper import Looper from plenum.common.signer_simple import SimpleSigner from plenum.common.config_util import getConfig diff --git a/examples/simple_client.py b/examples/simple_client.py index deb38f76e3..46ab4dac12 100755 --- a/examples/simple_client.py +++ b/examples/simple_client.py @@ -10,7 +10,7 @@ from collections import OrderedDict from plenum.client.client import Client -from plenum.common.looper import Looper +from stp_core.loop.looper import Looper from plenum.common.signer_simple import SimpleSigner from plenum.common.temp_file_util import SafeTemporaryDirectory diff --git a/examples/simple_node.py b/examples/simple_node.py index 84b59360bc..339241b7c3 100755 --- a/examples/simple_node.py +++ b/examples/simple_node.py @@ -12,7 +12,7 @@ import sys from collections import OrderedDict -from plenum.common.looper import Looper +from stp_core.loop.looper import Looper from plenum.common.temp_file_util import SafeTemporaryDirectory from plenum.server.node import Node diff --git a/examples/stack_message_loss.py b/examples/stack_message_loss.py deleted file mode 100644 index c7097792d5..0000000000 --- a/examples/stack_message_loss.py +++ /dev/null @@ -1,83 +0,0 @@ -import raet -import time -from raet.raeting import AutoMode - - -def example2(): - alpha = raet.road.stacking.RoadStack(name='alpha', - ha=('0.0.0.0', 7531), - auto=AutoMode.always) - - beta = raet.road.stacking.RoadStack(name='beta', - ha=('0.0.0.0', 7532), - main=True, - auto=AutoMode.always) - - remote = raet.road.estating.RemoteEstate(stack=alpha, - ha=beta.ha) - - alpha.addRemote(remote) - - alpha.join(uid=remote.uid, cascade=True) - - stacks = [alpha, beta] - while True: - for stack in stacks: - stack.serviceAll() - stack.store.advanceStamp(0.1) - if all([not stack.transactions for stack in stacks]): - break - time.sleep(0.1) - - print("Finished Handshake\n") - - msg = {'subject': 'Example message alpha to beta', - 'content': 'The dict keys in this dict are not special any dict will do.',} - - alpha.transmit(msg, remote.uid) - while True: - for stack in stacks: - stack.serviceAll() - stack.store.advanceStamp(0.1) - if all([not stack.transactions for stack in stacks]): - break - time.sleep(0.1) - - rx = beta.rxMsgs.popleft() - print("{0}\n".format(rx)) - print("Finished Message alpha to beta\n") - - msg = {'subject': 'Example message beta to alpha', - 'content': 'Messages are the core of raet.',} - - beta.transmit(msg, remote.uid) - while True: - for stack in stacks: - stack.serviceAll() - stack.store.advanceStamp(0.1) - if all([not stack.transactions for stack in stacks]): - break - time.sleep(0.1) - - rx = alpha.rxMsgs.popleft() - print("{0}\n".format(rx)) - print("Finished Message beta to alpha\n") - - beta.server.close() - for i in range(10): - alpha.transmit(msg, remote.uid) - time.sleep(3) - for i in range(500): - alpha.serviceAll() - alpha.store.advanceStamp(0.1) - if all([not stack.transactions for stack in stacks]): - break - time.sleep(0.1) - - for stack in stacks: - stack.server.close() # close the UDP socket - stack.keep.clearAllDir() # clear persisted data - - print("Finished\n") - -example2() diff --git a/plenum/__init__.py b/plenum/__init__.py index 9b81930b04..3d915343af 100644 --- a/plenum/__init__.py +++ b/plenum/__init__.py @@ -6,9 +6,12 @@ import sys import plenum +from plenum.common.jsonpickle_util import setUpJsonpickle if sys.version_info < (3, 5, 0): raise ImportError("Python 3.5.0 or later required.") import importlib from .__metadata__ import * + +setUpJsonpickle() diff --git a/plenum/cli/__main__.py b/plenum/cli/__main__.py index 2a2890ae5a..45b15c6019 100644 --- a/plenum/cli/__main__.py +++ b/plenum/cli/__main__.py @@ -1,7 +1,7 @@ import sys from plenum.cli.cli import Cli -from plenum.common.looper import Looper +from stp_core.loop.looper import Looper from plenum.common.config_util import getConfig diff --git a/plenum/cli/cli.py b/plenum/cli/cli.py index 9cff2df937..41a5569bc9 100644 --- a/plenum/cli/cli.py +++ b/plenum/cli/cli.py @@ -1,20 +1,20 @@ from __future__ import unicode_literals -# noinspection PyUnresolvedReferences import glob +import shutil +from hashlib import sha256 +from os.path import basename, dirname from typing import Dict, Iterable import pyorient -import shutil -from hashlib import sha256 -from jsonpickle import json, encode, decode +from jsonpickle import json + from ledger.compact_merkle_tree import CompactMerkleTree from ledger.ledger import Ledger from ledger.stores.file_hash_store import FileHashStore -from os.path import basename, dirname - +from plenum import config from plenum.cli.command import helpCmd, statusNodeCmd, statusClientCmd, \ - keyShareCmd, loadPluginsCmd, clientSendCmd, clientShowCmd, newKeyCmd, \ + loadPluginsCmd, clientSendCmd, clientShowCmd, newKeyCmd, \ newKeyringCmd, renameKeyringCmd, useKeyringCmd, saveKeyringCmd, \ listKeyringCmd, listIdsCmd, useIdCmd, addGenesisTxnCmd, \ createGenesisTxnFileCmd, changePromptCmd, exitCmd, quitCmd, Command @@ -29,17 +29,19 @@ from plenum.cli.phrase_word_completer import PhraseWordCompleter from plenum.client.wallet import Wallet from plenum.common.exceptions import NameAlreadyExists, GraphStorageNotAvailable, \ - RaetKeysNotFoundException + KeysNotFoundException +from plenum.common.keygen_utils import learnKeysFromOthers, tellKeysToOthers, areKeysSetup from plenum.common.plugin_helper import loadPlugins -from plenum.common.port_dispenser import genHa -from plenum.common.raet import getLocalEstateData -from plenum.common.raet import isLocalKeepSetup +from stp_core.crypto.util import cleanSeed, seedFromHex +from stp_raet.util import getLocalEstateData from plenum.common.signer_simple import SimpleSigner from plenum.common.stack_manager import TxnStackManager from plenum.common.constants import TXN_TYPE, TARGET_NYM, TXN_ID, DATA, IDENTIFIER, \ NODE, ALIAS, NODE_IP, NODE_PORT, CLIENT_PORT, CLIENT_IP, VERKEY, BY, CLIENT_STACK_SUFFIX from plenum.common.transactions import PlenumTransactions from prompt_toolkit.utils import is_windows, is_conemu_ansi +from stp_core.network.port_dispenser import genHa +from stp_core.types import HA if is_windows(): from prompt_toolkit.terminal.win32_output import Win32Output @@ -55,9 +57,7 @@ import ast from functools import reduce, partial -import logging import sys -from collections import defaultdict from prompt_toolkit.history import FileHistory from ioflo.aid.consoling import Console @@ -74,12 +74,14 @@ from pygments.token import Token from plenum.client.client import Client from plenum.common.util import getMaxFailures, \ - firstValue, randomString, cleanSeed, bootstrapClientKeys, \ - createDirIfNotExists, getFriendlyIdentifier -from plenum.common.log import CliHandler, getlogger, Logger, \ - getRAETLogLevelFromConfig, getRAETLogFilePath, TRACE_LOG_LEVEL + firstValue, randomString, bootstrapClientKeys, \ + getFriendlyIdentifier, saveGivenWallet, \ + normalizedWalletFileName, getWalletFilePath, getWalletByPath, \ + getLastSavedWalletFileName +from stp_core.common.log import \ + getlogger, Logger, getRAETLogFilePath, getRAETLogLevelFromConfig from plenum.server.node import Node -from plenum.common.types import NodeDetail, HA +from plenum.common.types import NodeDetail from plenum.server.plugin_loader import PluginLoader from plenum.server.replica import Replica from plenum.common.config_util import getConfig @@ -132,12 +134,14 @@ def __init__(self, looper, basedirpath, nodeReg=None, cliNodeReg=None, and len(cliNodeReg)): self.nodeRegLoadedFromFile = True dataDir = self.basedirpath - ledger = Ledger(CompactMerkleTree(hashStore=FileHashStore( - dataDir=dataDir)), + fileHashStore = FileHashStore(dataDir=dataDir) + ledger = Ledger(CompactMerkleTree(hashStore=fileHashStore), dataDir=dataDir, fileName=self.config.poolTransactionsFile) nodeReg, cliNodeReg, _ = TxnStackManager.parseLedgerForHaAndKeys( ledger) + ledger.stop() + fileHashStore.close() self.withNode = withNode self.nodeReg = nodeReg @@ -276,6 +280,15 @@ def __init__(self, looper, basedirpath, nodeReg=None, cliNodeReg=None, self.checkIfCmdHandlerAndCmdMappingExists() + def close(self): + """ + Stops all the created clients and nodes. + """ + for key in self.clients: + self.clients[key].stop() + for key in self.nodes: + self.nodes[key].stop() + def _getCmdMappingError(self, cmdHandlerFuncName, mappingFuncName): msg="Command mapping not provided for '{}' command handler. " \ "\nPlease add proper mapping for that command handler " \ @@ -311,14 +324,14 @@ def actions(self): self._actions = [self._simpleAction, self._helpAction, self._newNodeAction, self._newClientAction, self._statusNodeAction, self._statusClientAction, - self._keyShareAction, self._loadPluginDirAction, + self._loadPluginDirAction, self._clientCommand, self._addKeyAction, self._newKeyAction, self._listIdsAction, self._useIdentifierAction, self._addGenesisAction, self._createGenTxnFileAction, self._changePrompt, self._newKeyring, self._renameKeyring, self._useKeyringAction, self._saveKeyringAction, - self._listKeyringsAction ] + self._listKeyringsAction] return self._actions @property @@ -407,11 +420,11 @@ def lexers(self): def _renameWalletFile(self, oldWalletName, newWalletName): keyringsDir = self.getContextBasedKeyringsBaseDir() - oldWalletFilePath = Cli.getWalletFilePath( - keyringsDir, Cli._normalizedWalletFileName(oldWalletName)) + oldWalletFilePath = getWalletFilePath( + keyringsDir, normalizedWalletFileName(oldWalletName)) if os.path.exists(oldWalletFilePath): - newWalletFilePath = Cli.getWalletFilePath( - keyringsDir, Cli._normalizedWalletFileName(newWalletName)) + newWalletFilePath = getWalletFilePath( + keyringsDir, normalizedWalletFileName(newWalletName)) if os.path.exists(newWalletFilePath): self.print("A persistent wallet file already exists for " "new wallet name. Please choose new wallet name.") @@ -468,6 +481,7 @@ def _createGenTxnFileAction(self, matchedVars): ledger.add(item) self.print('Genesis transaction file created at {} ' .format(ledger._transactionLog.dbPath)) + ledger.stop() return True def _addGenesisAction(self, matchedVars): @@ -576,7 +590,7 @@ def activeClient(self): @activeClient.setter def activeClient(self, client): self._activeClient = client - self.print("Active client set to " + client.name) + self.print("Active client set to " + client.alias) @staticmethod def relist(seq): @@ -657,7 +671,6 @@ def cmdHandlerToCmdMappings(self): mappings['newClientAction'] = newClientCmd mappings['statusNodeAction'] = statusNodeCmd mappings['statusClientAction'] = statusClientCmd - mappings['keyShareAction'] = keyShareCmd mappings['clientSendMsgCommand'] = clientSendCmd mappings['clientShowMsgCommand'] = clientShowCmd @@ -825,21 +838,21 @@ def getStatus(self): self.print("Instances: " "Not enough nodes to create protocol instances") - def keyshare(self, nodeName): - node = self.nodes.get(nodeName, None) - if node is not None: - node = self.nodes[nodeName] - node.startKeySharing() - elif nodeName not in self.nodeReg: - tokens = [(Token.Error, "Invalid node name '{}'.".format(nodeName))] - self.printTokens(tokens) - self.showValidNodes() - return - else: - tokens = [(Token.Error, "Node '{}' not started.".format(nodeName))] - self.printTokens(tokens) - self.showStartedNodes() - return + # def keyshare(self, nodeName): + # node = self.nodes.get(nodeName, None) + # if node is not None: + # node = self.nodes[nodeName] + # node.startKeySharing() + # elif nodeName not in self.nodeReg: + # tokens = [(Token.Error, "Invalid node name '{}'.".format(nodeName))] + # self.printTokens(tokens) + # self.showValidNodes() + # return + # else: + # tokens = [(Token.Error, "Node '{}' not started.".format(nodeName))] + # self.printTokens(tokens) + # self.showStartedNodes() + # return def showStartedNodes(self): self.printTokens([(Token, "Started nodes are: ")]) @@ -888,19 +901,23 @@ def newNode(self, nodeName: str): nodes = [] for name in names: try: + nodeRegistry = None if self.nodeRegLoadedFromFile \ + else self.nodeRegistry + + learnKeysFromOthers(self.basedirpath, name, self.nodes.values()) node = self.NodeClass(name, - nodeRegistry=None if self.nodeRegLoadedFromFile - else self.nodeRegistry, - basedirpath=self.basedirpath, - pluginPaths=self.pluginPaths, - config=self.config) - except (GraphStorageNotAvailable, RaetKeysNotFoundException) as e: + nodeRegistry=nodeRegistry, + basedirpath=self.basedirpath, + pluginPaths=self.pluginPaths, + config=self.config) + except (GraphStorageNotAvailable, KeysNotFoundException) as e: self.print(str(e), Token.BoldOrange) return self.nodes[name] = node self.looper.add(node) if not self.nodeRegLoadedFromFile: - node.startKeySharing() + # node.startKeySharing() + tellKeysToOthers(node, self.nodes.values()) if len(self.clients) > 0: self.bootstrapKey(self.activeWallet, node) @@ -1006,7 +1023,7 @@ def newClient(self, clientName, config=None): try: self.ensureValidClientId(clientName) - if not isLocalKeepSetup(clientName, self.basedirpath): + if not areKeysSetup(clientName, self.basedirpath): client_addr = genHa(ip='0.0.0.0') else: client_addr = tuple(getLocalEstateData(clientName, @@ -1177,11 +1194,11 @@ def _statusClientAction(self, matchedVars): self.statusClient(client) return True - def _keyShareAction(self, matchedVars): - if matchedVars.get('node_command') == 'keyshare': - name = matchedVars.get('node_name') - self.keyshare(name) - return True + # def _keyShareAction(self, matchedVars): + # if matchedVars.get('node_command') == 'keyshare': + # name = matchedVars.get('node_name') + # self.keyshare(name) + # return True def _clientCommand(self, matchedVars): if matchedVars.get('client') == 'client': @@ -1292,9 +1309,10 @@ def bootstrapClientKeys(idr, verkey, nodes): def isValidSeedForNewKey(self, seed): if seed: seed = seed.strip() - if len(seed) != 32: - self.print('Seed needs to be 32 characters long but is {} ' - 'characters long'.format(len(seed)), Token.Error) + if len(seed) != 32 and not seedFromHex(seed): + self.print('Seed needs to be 32 or 64 characters (if hex) long ' + 'but is {} characters long'.format(len(seed)), + Token.Error) return False return True @@ -1348,14 +1366,17 @@ def _newWallet(self, walletName=None): def _listKeyringsAction(self, matchedVars): if matchedVars.get('list_krs') == 'list keyrings': - envs = self.getAllEnvDirNamesForKeyrings() + keyringBaseDir = self.getKeyringsBaseDir() contextDirPath = self.getContextBasedKeyringsBaseDir() - envPaths = [os.path.join(self.getKeyringsBaseDir(), e) for e in envs] + dirs_to_scan = self.getAllSubDirNamesForKeyrings() + if contextDirPath not in dirs_to_scan: + dirs_to_scan.insert(0, contextDirPath) + dirs_to_scan = [os.path.join(keyringBaseDir, e) for e in dirs_to_scan] anyWalletFound = False - for e in envPaths: - fe = e.rstrip(os.sep) - envName = basename(fe) - files = glob.glob("{}/*.{}".format(fe, WALLET_FILE_EXTENSION)) + for dir in dirs_to_scan: + cleaned_dir_name = dir.rstrip(os.sep) # removed os path separator at the end + dir_name = basename(cleaned_dir_name) + files = glob.glob("{}/*.{}".format(cleaned_dir_name, WALLET_FILE_EXTENSION)) persistedWalletNames = [] unpersistedWalletNames = [] @@ -1364,7 +1385,7 @@ def _listKeyringsAction(self, matchedVars): walletName = Cli.getWalletKeyName(basename(f)) persistedWalletNames.append(walletName) - if contextDirPath == fe: + if contextDirPath == cleaned_dir_name: unpersistedWalletNames = [ n for n in self.wallets.keys() if n.lower() not in persistedWalletNames] @@ -1372,14 +1393,15 @@ def _listKeyringsAction(self, matchedVars): if len(persistedWalletNames) > 0 or \ len(unpersistedWalletNames) > 0: anyWalletFound = True - self.print("\nEnvironment: {}".format(envName)) + self.print("\nContext Name: {}".format(dir_name), newline=False) + self.print(" (path:{})".format(dir), Token.Gray) if len(persistedWalletNames) > 0: self.print(" Persisted wallets:") for pwn in persistedWalletNames: - f = os.path.join(fe, Cli._normalizedWalletFileName(pwn)) + f = os.path.join(cleaned_dir_name, normalizedWalletFileName(pwn)) lastModifiedTime = time.ctime(os.path.getmtime(f)) - isThisActiveWallet = True if contextDirPath == fe and \ + isThisActiveWallet = True if contextDirPath == cleaned_dir_name and \ self._activeWallet is not None and \ self._activeWallet.name.lower() == pwn.lower() \ else False @@ -1431,9 +1453,9 @@ def _listIdsAction(self, matchedVars): return True def checkIfPersistentWalletExists(self, name, inContextDir=None): - toBeWalletFileName = Cli._normalizedWalletFileName(name) + toBeWalletFileName = normalizedWalletFileName(name) contextDir = inContextDir or self.getContextBasedKeyringsBaseDir() - toBeWalletFilePath = Cli.getWalletFilePath( + toBeWalletFilePath = getWalletFilePath( contextDir, toBeWalletFileName) if os.path.exists(toBeWalletFilePath): return toBeWalletFilePath @@ -1490,7 +1512,7 @@ def _checkIfWalletExists(origName, checkInWallets=True, def _loadWalletIfExistsAndNotLoaded(self, name, copyAs=None, override=False): wallet = self._getWalletByName(name) if not wallet: - walletFileName = Cli._normalizedWalletFileName(name) + walletFileName = normalizedWalletFileName(name) self.restoreWalletByName(walletFileName, copyAs=copyAs, override=override) @@ -1528,7 +1550,7 @@ def _isWalletFilePathBelongsToCurrentContext(self, filePath): return True - def getAllEnvDirNamesForKeyrings(self): + def getAllSubDirNamesForKeyrings(self): return [NO_ENV] def checkIfWalletPathBelongsToCurrentContext(self, filePath): @@ -1542,7 +1564,7 @@ def checkIfWalletPathBelongsToCurrentContext(self, filePath): "according to the environment it belongs to." "\nPossible sub directory names are: {}". format(keyringsBaseDir, filePath, - self.getAllEnvDirNamesForKeyrings())) + self.getAllSubDirNamesForKeyrings())) return False curContextDirName = self.getContextBasedKeyringsBaseDir() @@ -1704,53 +1726,44 @@ def performValidationCheck(self, wallet, walletFilePath, override=False): def restoreWalletByPath(self, walletFilePath, copyAs=None, override=False): try: + wallet = getWalletByPath(walletFilePath) - with open(walletFilePath) as walletFile: - try: - # if wallet already exists, deserialize it - # and set as active wallet - wallet = decode(walletFile.read()) - if copyAs: - wallet.name=copyAs - - if not self.performValidationCheck(wallet, walletFilePath, - override): - return False - - # As the persisted wallet restored and validated successfully, - # before we restore it, lets save active wallet (if exists) - if self._activeWallet: - self._saveActiveWallet() - - self._wallets[wallet.name] = wallet - self.print('\nSaved keyring "{}" restored'. - format(wallet.name), newline=False) - self.print(" ({})".format(walletFilePath) - , Token.Gray) - self.activeWallet = wallet - self.activeIdentifier = wallet.defaultId - - self.printWarningIfIncompatibleWalletIsRestored(walletFilePath) - - except (ValueError, AttributeError) as e: - self.logger.info( - "error occurred while restoring wallet {}: {}". - format(walletFilePath, e), Token.BoldOrange) - except IOError: + if copyAs: + wallet.name=copyAs + + if not self.performValidationCheck(wallet, walletFilePath, + override): + return False + + # As the persisted wallet restored and validated successfully, + # before we restore it, lets save active wallet (if exists) + if self._activeWallet: + self._saveActiveWallet() + + self._wallets[wallet.name] = wallet + self.print('\nSaved keyring "{}" restored'. + format(wallet.name), newline=False) + self.print(" ({})".format(walletFilePath) + , Token.Gray) + self.activeWallet = wallet + self.activeIdentifier = wallet.defaultId + + self.printWarningIfIncompatibleWalletIsRestored(walletFilePath) + + return True + except (ValueError, AttributeError) as e: + self.logger.info( + "error occurred while restoring wallet {}: {}". + format(walletFilePath, e), Token.BoldOrange) + except IOError as e: self.logger.debug("No such keyring file exists ({})". format(walletFilePath)) def restoreLastActiveWallet(self): - filePattern = "*.{}".format(WALLET_FILE_EXTENSION) baseFileName=None try: - def getLastModifiedTime(file): - return os.stat(file).st_mtime_ns - keyringPath = self.getContextBasedKeyringsBaseDir() - newest = max(glob.iglob('{}/{}'.format(keyringPath, filePattern)), - key=getLastModifiedTime) - baseFileName = basename(newest) + baseFileName = getLastSavedWalletFileName(keyringPath) self._searchAndSetWallet(os.path.join(keyringPath, baseFileName)) except ValueError as e: if not str(e) == "max() arg is an empty sequence": @@ -1765,7 +1778,7 @@ def errorDuringRestoringLastActiveWallet(self, baseFileName, e): raise e def restoreWalletByName(self, walletFileName, copyAs=None, override=False): - walletFilePath = self.getWalletFilePath( + walletFilePath = getWalletFilePath( self.getContextBasedKeyringsBaseDir(), walletFileName) self.restoreWalletByPath(walletFilePath, copyAs=copyAs, override=override) @@ -1774,10 +1787,6 @@ def getWalletKeyName(walletFileName): return walletFileName.replace( ".{}".format(WALLET_FILE_EXTENSION), "") - @staticmethod - def _normalizedWalletFileName(walletName): - return "{}.{}".format(walletName.lower(), WALLET_FILE_EXTENSION) - @staticmethod def getPromptAndEnv(cliName, currPromptText): if PROMPT_ENV_SEPARATOR not in currPromptText: @@ -1788,7 +1797,7 @@ def getPromptAndEnv(cliName, currPromptText): def getActiveWalletPersitentFileName(self): fileName = self._activeWallet.name if self._activeWallet \ else self.name - return Cli._normalizedWalletFileName(fileName) + return normalizedWalletFileName(fileName) @property @@ -1831,10 +1840,6 @@ def isAnyWalletFileExistsForCurrentContext(self): pattern = "{}/*.{}".format(keyringPath, WALLET_FILE_EXTENSION) return self.isAnyWalletFileExistsForGivenContext(pattern) - @staticmethod - def getWalletFilePath(basedir, walletFileName): - return os.path.join(basedir, walletFileName) - @property def getActiveEnv(self): return None @@ -1857,27 +1862,15 @@ def performCompatibilityCheckBeforeSave(self): def _saveActiveWalletInDir(self, contextDir, printMsgs=True): try: - createDirIfNotExists(contextDir) - walletFilePath = Cli.getWalletFilePath( - contextDir, self.walletFileName) - with open(walletFilePath, "w+") as walletFile: - try: - encodedWallet = encode(self._activeWallet) - walletFile.write(encodedWallet) - if printMsgs: - self.print('Active keyring "{}" saved'.format( - self._activeWallet.name), newline=False) - self.print(' ({})'.format(walletFilePath), Token.Gray) - except ValueError as ex: - self.logger.info("ValueError: " + - "Could not save wallet while exiting\n {}" - .format(ex)) - except IOError: - self.logger.info( - "IOError while writing data to wallet file" - ) + walletFilePath = saveGivenWallet(self._activeWallet, + self.walletFileName, contextDir) + if printMsgs: + self.print('Active keyring "{}" saved'.format( + self._activeWallet.name), newline=False) + self.print(' ({})'.format(walletFilePath), Token.Gray) + except IOError as ex: - self.logger.info("Error occurred while creating wallet. " + + self.logger.info("Error occurred while saving wallet. " + "error no.{}, error.{}" .format(ex.errno, ex.strerror)) diff --git a/plenum/cli/command.py b/plenum/cli/command.py index 0fb03ba56a..d3acbb55f1 100644 --- a/plenum/cli/command.py +++ b/plenum/cli/command.py @@ -79,11 +79,12 @@ def __str__(self): usage="status client ", examples="status client Alice") -keyShareCmd = Command( - id="keyshare", - title="Manually starts key sharing of a node", - usage="keyshare node ", - examples="keyshare node Alpha") +# TODO: Obsolete, Needs to be removed +# keyShareCmd = Command( +# id="keyshare", +# title="Manually starts key sharing of a node", +# usage="keyshare node ", +# examples="keyshare node Alpha") loadPluginsCmd = Command( id="load plugins", @@ -122,7 +123,7 @@ def __str__(self): useIdCmd = Command( id="use identifier", - title="Marks given idetifier active/default", + title="Marks given identifier active/default", usage="use identifier ", note="Note: To see all identifiers in active keyring, use 'list ids' command", examples="use identifier 5pJcAEAQqW7B8aGSxDArGaeXvb1G1MQwwqLMLmG2fAy9") diff --git a/plenum/cli/constants.py b/plenum/cli/constants.py index 3c7613d9e5..4fe401a2e5 100644 --- a/plenum/cli/constants.py +++ b/plenum/cli/constants.py @@ -105,7 +105,7 @@ def getPipedRegEx(cmd): # commands SIMPLE_CMDS = {'status', 'exit', 'quit', 'license'} CLI_CMDS = {'status', 'new'} -NODE_CMDS = CLI_CMDS | {'keyshare'} +NODE_CMDS = CLI_CMDS # command formatted reg exs diff --git a/plenum/cli/helper.py b/plenum/cli/helper.py index cec993f0eb..03ffb16d6b 100644 --- a/plenum/cli/helper.py +++ b/plenum/cli/helper.py @@ -19,7 +19,7 @@ CLIENT_GRAMS_USE_KEYRING_FORMATTED_REG_EX, \ CLIENT_GRAMS_SAVE_KEYRING_FORMATTED_REG_EX, \ CLIENT_GRAMS_LIST_KEYRINGS_FORMATTED_REG_EX -from plenum.common.log import getlogger +from stp_core.common.log import getlogger logger = getlogger() diff --git a/plenum/client/client.py b/plenum/client/client.py index 01e0d16544..c936b74c6a 100644 --- a/plenum/client/client.py +++ b/plenum/client/client.py @@ -12,35 +12,34 @@ from typing import List, Union, Dict, Optional, Tuple, Set, Any, \ Iterable -from raet.raeting import AutoMode +from plenum.common.stacks import nodeStackClass +from stp_core.crypto.nacl_wrappers import Signer +from stp_core.network.auth_mode import AuthMode +from stp_core.network.network_interface import NetworkInterface +from stp_core.types import HA from ledger.merkle_verifier import MerkleVerifier from ledger.serializers.compact_serializer import CompactSerializer from ledger.util import F, STH from plenum.client.pool_manager import HasPoolManager -from plenum.common.exceptions import MissingNodeOp, RemoteNotFound +from plenum.common.config_util import getConfig +from plenum.common.exceptions import MissingNodeOp +from stp_core.network.exceptions import RemoteNotFound from plenum.common.has_file_storage import HasFileStorage from plenum.common.ledger_manager import LedgerManager +from stp_core.common.log import getlogger from plenum.common.motor import Motor from plenum.common.plugin_helper import loadPlugins -from plenum.common.raet import getHaFromLocalEstate -from plenum.common.signer import Signer -from plenum.common.stacked import NodeStack +from plenum.common.request import Request from plenum.common.startable import Status, LedgerState, Mode from plenum.common.constants import REPLY, POOL_LEDGER_TXNS, \ LEDGER_STATUS, CONSISTENCY_PROOF, CATCHUP_REP, REQACK, REQNACK, OP_FIELD_NAME -from plenum.common.types import Reply, f, HA, \ - LedgerStatus, TaggedTuples -from plenum.common.request import Request -from plenum.common.util import getMaxFailures, MessageProcessor, \ - checkIfMoreThanFSameItems, rawToFriendly +from plenum.common.txn_util import getTxnOrderedFields +from plenum.common.types import Reply, f, LedgerStatus, TaggedTuples +from plenum.common.util import getMaxFailures, checkIfMoreThanFSameItems, rawToFriendly +from plenum.common.message_processor import MessageProcessor from plenum.persistence.client_req_rep_store_file import ClientReqRepStoreFile from plenum.persistence.client_txn_log import ClientTxnLog -from raet.nacling import Signer - -from plenum.common.log import getlogger -from plenum.common.txn_util import getTxnOrderedFields -from plenum.common.config_util import getConfig from plenum.server.has_action_queue import HasActionQueue logger = getlogger() @@ -70,16 +69,19 @@ def __init__(self, self.basedirpath = basedirpath signer = Signer(sighex) - sighex = signer.keyhex + sighex = signer.keyraw verkey = rawToFriendly(signer.verraw) - self.name = name self.stackName = verkey + # TODO: Have a way for a client to have a user friendly name. Does it + # matter now, it used to matter in some CLI exampples in the past. + # self.name = name + self.name = self.stackName cha = None # If client information already exists is RAET then use that if self.exists(self.stackName, basedirpath): - cha = getHaFromLocalEstate(self.stackName, basedirpath) + cha = self.nodeStackClass.getHaFromLocal(self.stackName, basedirpath) if cha: cha = HA(*cha) logger.debug("Client {} ignoring given ha {} and using {}". @@ -94,6 +96,9 @@ def __init__(self, HasFileStorage.__init__(self, self.name, baseDir=self.basedirpath, dataDir=self.dataDir) + # TODO: Find a proper name + self.alias = name + self._ledger = None if not nodeReg: @@ -117,7 +122,7 @@ def __init__(self, stackargs = dict(name=self.stackName, ha=cha, main=False, # stops incoming vacuous joins - auto=AutoMode.always) + auth_mode=AuthMode.ALLOW_ANY.value) stackargs['basedirpath'] = basedirpath self.created = time.perf_counter() @@ -130,7 +135,7 @@ def __init__(self, if self.nodeReg: logger.info("Client {} initialized with the following node registry:" - .format(self.name)) + .format(self.alias)) lengths = [max(x) for x in zip(*[ (len(name), len(host), len(str(port))) for name, (host, port) in self.nodeReg.items()])] @@ -140,7 +145,7 @@ def __init__(self, logger.info(fmt.format(name, host, port)) else: logger.info( - "Client {} found an empty node registry:".format(self.name)) + "Client {} found an empty node registry:".format(self.alias)) Motor.__init__(self) @@ -203,14 +208,14 @@ def exists(name, basedirpath): os.path.exists(os.path.join(basedirpath, name)) @property - def nodeStackClass(self) -> NodeStack: - return NodeStack + def nodeStackClass(self) -> NetworkInterface: + return nodeStackClass def start(self, loop): oldstatus = self.status if oldstatus in Status.going(): logger.info("{} is already {}, so start has no effect". - format(self, self.status.name)) + format(self.alias, self.status.name)) else: super().start(loop) self.nodestack.start() @@ -229,7 +234,7 @@ async def prod(self, limit) -> int: s = 0 if self.isGoing(): s = await self.nodestack.service(limit) - await self.nodestack.serviceLifecycle() + self.nodestack.serviceLifecycle() self.nodestack.flushOutBoxes() s += self._serviceActions() # TODO: This if condition has to be removed. `_ledger` if once set wont @@ -242,6 +247,7 @@ def submitReqs(self, *reqs: Request) -> List[Request]: requests = [] for request in reqs: if self.mode == Mode.discovered and self.hasSufficientConnections: + logger.debug('Client {} sending request {}'.format(self, request)) self.nodestack.send(request) self.expectingFor(request) else: @@ -314,7 +320,16 @@ def _statusChanged(self, old, new): # do nothing for now pass + def stop(self, *args, **kwargs): + super().stop(*args, **kwargs) + self.txnLog.close() + if self._ledger is not None: + self._ledger.stop() + if hasattr(self, 'hashStore') and self.hashStore is not None: + self.hashStore.close() + def onStopping(self, *args, **kwargs): + logger.debug('Stopping client {}'.format(self)) self.nodestack.nextCheck = 0 self.nodestack.stop() if self._ledger: @@ -535,11 +550,11 @@ def retryForExpected(self): try: remote = self.nodestack.getRemote(nm) except RemoteNotFound: - logger.warn('{} could not find remote {}'.format(self, nm)) + logger.warning('{} could not find remote {}'.format(self, nm)) continue logger.debug('Remote {} of {} being joined since REQACK for not ' 'received for request'.format(remote, self)) - self.nodestack.join(remote.uid, cascade=True) + self.nodestack.connect(name=remote.name) if keys: # Need a delay in case connection has to be established with some @@ -601,7 +616,7 @@ def verifyMerkleProof(*replies: Tuple[Reply]) -> bool: r[f.RESULT.nm][F.rootHash.name].encode()) auditPath = [base64.b64decode( a.encode()) for a in r[f.RESULT.nm][F.auditPath.name]] - filtered = ((k, v) for (k, v) in r[f.RESULT.nm].iteritems() + filtered = ((k, v) for (k, v) in r[f.RESULT.nm].items() if k not in [F.auditPath.name, F.seqNo.name, F.rootHash.name]) result = serializer.serialize(dict(filtered)) diff --git a/plenum/client/pool_manager.py b/plenum/client/pool_manager.py index 4c040e9446..7139760f86 100644 --- a/plenum/client/pool_manager.py +++ b/plenum/client/pool_manager.py @@ -2,14 +2,16 @@ import json from ledger.util import F -from plenum.common.exceptions import RemoteNotFound +from stp_core.network.exceptions import RemoteNotFound +from stp_core.types import HA + from plenum.common.stack_manager import TxnStackManager from plenum.common.constants import TXN_TYPE, NODE, ALIAS, DATA, TARGET_NYM, NODE_IP,\ NODE_PORT, CLIENT_IP, CLIENT_PORT, VERKEY, SERVICES, VALIDATOR, CLIENT_STACK_SUFFIX from plenum.common.types import PoolLedgerTxns, f, HA from plenum.common.util import getMaxFailures from plenum.common.txn_util import updateGenesisPoolTxnFile -from plenum.common.log import getlogger +from stp_core.common.log import getlogger logger = getlogger() t = f.TXN.nm @@ -107,8 +109,8 @@ def nodeServicesChanged(self, txn): nodeNym = txn[TARGET_NYM] _, nodeInfo = self.getNodeInfoFromLedger(nodeNym) remoteName = nodeInfo[DATA][ALIAS] + CLIENT_STACK_SUFFIX - oldServices = set(nodeInfo[DATA][SERVICES]) - newServices = set(txn[DATA][SERVICES]) + oldServices = set(nodeInfo[DATA].get(SERVICES, [])) + newServices = set(txn[DATA].get(SERVICES, [])) if oldServices == newServices: logger.debug( "Client {} not changing {} since it is same as existing" diff --git a/plenum/client/wallet.py b/plenum/client/wallet.py index bdd92d5019..9528d53689 100644 --- a/plenum/client/wallet.py +++ b/plenum/client/wallet.py @@ -6,9 +6,9 @@ from plenum.common.did_method import DidMethods, DefaultDidMethods from plenum.common.exceptions import EmptyIdentifier -from plenum.common.log import getlogger -from plenum.common.signer import Signer -from plenum.common.types import Identifier +from stp_core.common.log import getlogger +from stp_core.crypto.signer import Signer +from stp_core.types import Identifier from plenum.common.request import Request from plenum.common.util import getTimeBasedId @@ -27,7 +27,7 @@ def decrypt(self, key) -> 'Wallet': Alias = str -IdData = HA = NamedTuple("IdData", [ +IdData = NamedTuple("IdData", [ ("signer", Signer), ("lastReqId", int)]) @@ -73,9 +73,6 @@ def encrypt(self, key: bytes, raw = crypto_secretbox(byts, nonce, key) return EncryptedWallet(raw, nonce) - # def addIdentifier(self, didMethodName=None): - # return self.addSigner(didMethodName).identifier - # def addIdentifier(self, identifier=None, seed=None, @@ -89,6 +86,7 @@ def addIdentifier(self, :param identifier: signer identifier or None to use random one :param seed: signer key seed or None to use random one :param signer: signer to add + :param alias: a friendly readable name for the signer :param didMethodName: name of DID Method if not the default :return: """ @@ -190,8 +188,9 @@ def signRequest(self, idData = self._getIdData(idr) req.identifier = idr req.reqId = getTimeBasedId() + req.digest = req.getDigest() self.ids[idr] = IdData(idData.signer, req.reqId) - req.signature = self.signMsg(msg=req.getSigningState(), + req.signature = self.signMsg(msg=req.signingState, identifier=idr, otherIdentifier=req.identifier) @@ -211,9 +210,6 @@ def signOp(self, request = Request(operation=op) return self.signRequest(request, identifier) - # Removed: - # _getIdData - removed in favor of passing RequestIdStore - def _signerById(self, idr: Identifier): signer = self.idsToSigners.get(idr) if not signer: diff --git a/plenum/common/batched.py b/plenum/common/batched.py new file mode 100644 index 0000000000..ce83273ac3 --- /dev/null +++ b/plenum/common/batched.py @@ -0,0 +1,117 @@ +from collections import deque +from typing import Any, Iterable +from typing import Dict + +from plenum.common.constants import BATCH +from stp_core.crypto.signer import Signer +from stp_core.common.log import getlogger +from plenum.common.types import Batch, OP_FIELD_NAME, f +from plenum.common.message_processor import MessageProcessor + +logger = getlogger() + + +class Batched(MessageProcessor): + """ + A mixin to allow batching of requests to be send to remotes. + """ + + def __init__(self): + """ + :param self: 'NodeStacked' + """ + self.outBoxes = {} # type: Dict[int, deque] + + def _enqueue(self, msg: Any, rid: int, signer: Signer) -> None: + """ + Enqueue the message into the remote's queue. + + :param msg: the message to enqueue + :param rid: the id of the remote node + """ + payload = self.prepForSending(msg, signer) + if rid not in self.outBoxes: + self.outBoxes[rid] = deque() + self.outBoxes[rid].append(payload) + + def _enqueueIntoAllRemotes(self, msg: Any, signer: Signer) -> None: + """ + Enqueue the specified message into all the remotes in the nodestack. + + :param msg: the message to enqueue + """ + for rid in self.remotes.keys(): + self._enqueue(msg, rid, signer) + + def send(self, msg: Any, *rids: Iterable[int], signer: Signer = None) -> None: + """ + Enqueue the given message into the outBoxes of the specified remotes + or into the outBoxes of all the remotes if rids is None + + :param msg: the message to enqueue + :param rids: ids of the remotes to whose outBoxes + this message must be enqueued + """ + if rids: + for r in rids: + self._enqueue(msg, r, signer) + else: + self._enqueueIntoAllRemotes(msg, signer) + + def flushOutBoxes(self) -> None: + """ + Clear the outBoxes and transmit batched messages to remotes. + """ + removedRemotes = [] + for rid, msgs in self.outBoxes.items(): + try: + dest = self.remotes[rid].name + except KeyError: + removedRemotes.append(rid) + continue + if msgs: + if len(msgs) == 1: + msg = msgs.popleft() + # Setting timeout to never expire + self.transmit(msg, rid, timeout=self.messageTimeout) + logger.trace( + "{} sending msg {} to {}".format(self, msg, dest)) + else: + logger.debug( + "{} batching {} msgs to {} into one transmission". + format(self, len(msgs), dest)) + logger.trace(" messages: {}".format(msgs)) + batch = Batch([], None) + while msgs: + batch.messages.append(msgs.popleft()) + # don't need to sign the batch, when the composed msgs are + # signed + payload = self.prepForSending(batch) + logger.trace("{} sending payload to {}: {}".format(self, + dest, + payload)) + # Setting timeout to never expire + self.transmit(payload, rid, timeout=self.messageTimeout) + for rid in removedRemotes: + logger.warning("{} rid {} has been removed".format(self, rid), + extra={"cli": False}) + msgs = self.outBoxes[rid] + if msgs: + self.discard(msgs, "rid {} no longer available".format(rid), + logMethod=logger.debug) + del self.outBoxes[rid] + + def doProcessReceived(self, msg, frm, ident): + if OP_FIELD_NAME in msg and msg[OP_FIELD_NAME] == BATCH: + if f.MSGS.nm in msg and isinstance(msg[f.MSGS.nm], list): + # Removing ping and pong messages from Batch + relevantMsgs = [] + for m in msg[f.MSGS.nm]: + r = self.handlePingPong(m, frm, ident) + if not r: + relevantMsgs.append(m) + + if not relevantMsgs: + return None + msg[f.MSGS.nm] = relevantMsgs + return msg diff --git a/plenum/common/config_util.py b/plenum/common/config_util.py index b8e51bc5e1..18245a2b99 100644 --- a/plenum/common/config_util.py +++ b/plenum/common/config_util.py @@ -2,7 +2,6 @@ from importlib import import_module from importlib.util import module_from_spec, spec_from_file_location -import plenum.common CONFIG = None diff --git a/plenum/common/crypto.py b/plenum/common/crypto.py deleted file mode 100644 index 2149f4fdd1..0000000000 --- a/plenum/common/crypto.py +++ /dev/null @@ -1,36 +0,0 @@ -import ctypes -from binascii import unhexlify, hexlify - -from libnacl import crypto_box_SECRETKEYBYTES, nacl, crypto_box_PUBLICKEYBYTES -from plenum.common.util import cleanSeed, isHex -from raet.nacling import Signer - - -def ed25519SkToCurve25519(sk, toHex=False): - if isHex(sk): - sk = unhexlify(sk) - secretKey = ctypes.create_string_buffer(crypto_box_SECRETKEYBYTES) - ret = nacl.crypto_sign_ed25519_sk_to_curve25519(secretKey, sk) - if ret: - raise Exception("error in converting ed22519 key to curve25519") - return hexlify(secretKey.raw) if toHex else secretKey.raw - - -def ed25519PkToCurve25519(pk, toHex=False): - if isHex(pk): - pk = unhexlify(pk) - publicKey = ctypes.create_string_buffer(crypto_box_PUBLICKEYBYTES) - ret = nacl.crypto_sign_ed25519_pk_to_curve25519(publicKey, pk) - if ret: - raise Exception("error in converting ed22519 key to curve25519") - return hexlify(publicKey.raw) if toHex else publicKey.raw - - -def getEd25519AndCurve25519Keys(seed=None): - if seed: - seed = cleanSeed(seed) - signer = Signer(seed) - sigkey, verkey = signer.keyhex, signer.verhex - prikey, pubkey = hexlify(ed25519SkToCurve25519(signer.keyraw)), \ - hexlify(ed25519PkToCurve25519(signer.verraw)) - return (sigkey, verkey), (prikey, pubkey) diff --git a/plenum/common/did_method.py b/plenum/common/did_method.py index f177eb95f8..f301ab79a9 100644 --- a/plenum/common/did_method.py +++ b/plenum/common/did_method.py @@ -2,7 +2,7 @@ from plenum.common.exceptions import DidMethodNotFound from plenum.common.signer_did import DidSigner -from plenum.common.types import Identifier +from stp_core.types import Identifier Seed = str SignerConstructor = Callable[[Identifier], Seed] diff --git a/plenum/common/error.py b/plenum/common/error.py index 0ce8f19d46..c440fad50e 100644 --- a/plenum/common/error.py +++ b/plenum/common/error.py @@ -1,5 +1,5 @@ def fault(ex: Exception, msg: str): - from plenum.common.log import getlogger + from stp_core.common.log import getlogger getlogger().error(msg, exc_info=ex) diff --git a/plenum/common/error_codes.py b/plenum/common/error_codes.py deleted file mode 100644 index fd8264808b..0000000000 --- a/plenum/common/error_codes.py +++ /dev/null @@ -1 +0,0 @@ -SOCKET_BIND_ERROR_ALREADY_IN_USE = 98 diff --git a/plenum/common/eventually.py b/plenum/common/eventually.py deleted file mode 100644 index 42e36be668..0000000000 --- a/plenum/common/eventually.py +++ /dev/null @@ -1,155 +0,0 @@ -import asyncio -import os -import time -from asyncio.coroutines import CoroWrapper -from inspect import isawaitable -from typing import Callable, TypeVar, Optional, Iterable - -from plenum.common.log import getlogger - -from plenum.common.ratchet import Ratchet - -T = TypeVar('T') - -logger = getlogger() - -FlexFunc = TypeVar('flexFunc', CoroWrapper, Callable[[], T]) - - -# increase this number to allow eventually to change timeouts proportionatly -def getSlowFactor(): - numOfCpus = os.cpu_count() - if numOfCpus == 8 or numOfCpus is None: - return 1 - elif numOfCpus == 4: - return 1.5 - elif numOfCpus < 4: - return 2 - -slowFactor = getSlowFactor() - - -async def eventuallySoon(coroFunc: FlexFunc, *args): - return await eventually(coroFunc, *args, - retryWait=0.1 * slowFactor, - timeout=3 * slowFactor, - ratchetSteps=10) - - -async def eventuallyAll(*coroFuncs: FlexFunc, # (use functools.partials if needed) - totalTimeout: float, - retryWait: float=0.1, - acceptableExceptions=None, - acceptableFails: int=0): - """ - - :param coroFuncs: iterable of no-arg functions - :param totalTimeout: - :param retryWait: - :param acceptableExceptions: - :param acceptableFails: how many of the passed in coroutines can - ultimately fail and still be ok - :return: - """ - start = time.perf_counter() - - totalTimeout *= slowFactor - - def remaining(): - return totalTimeout + start - time.perf_counter() - funcNames = [] - others = 0 - fails = 0 - for cf in coroFuncs: - if len(funcNames) < 2: - funcNames.append(getFuncName(cf)) - else: - others += 1 - # noinspection PyBroadException - try: - await eventually(cf, - retryWait=retryWait * slowFactor, - timeout=remaining(), - acceptableExceptions=acceptableExceptions, - verbose=False) - except Exception: - fails += 1 - logger.debug("a coro {} timed out without succeeding; fail count: " - "{}, acceptable: {}". - format(getFuncName(cf), fails, acceptableFails)) - if fails > acceptableFails: - raise - if others: - funcNames.append("and {} others".format(others)) - desc = ", ".join(funcNames) - logger.debug("{} succeeded with {:.2f} seconds to spare". - format(desc, remaining())) - - -def getFuncName(f): - if hasattr(f, "__name__"): - return f.__name__ - elif hasattr(f, "func"): - return "partial({})".format(getFuncName(f.func)) - else: - return "" - - -def recordFail(fname, timeout): - pass - - -def recordSuccess(fname, timeout, param, remain): - pass - - -async def eventually(coroFunc: FlexFunc, - *args, - retryWait: float=0.1, - timeout: float=5, - ratchetSteps: Optional[int]=None, - acceptableExceptions=None, - verbose=True) -> T: - - if acceptableExceptions and not isinstance(acceptableExceptions, Iterable): - acceptableExceptions = [acceptableExceptions] - start = time.perf_counter() - - ratchet = Ratchet.fromGoalDuration(retryWait*slowFactor, - ratchetSteps, - timeout*slowFactor).gen() \ - if ratchetSteps else None - - fname = getFuncName(coroFunc) - while True: - remain = 0 - try: - remain = start + timeout*slowFactor - time.perf_counter() - if remain < 0: - # this provides a convenient breakpoint for a debugger - logger.warning("{} last try...".format(fname), - extra={"cli": False}) - # noinspection PyCallingNonCallable - res = coroFunc(*args) - if isawaitable(res): - res = await res - if verbose: - recordSuccess(fname, timeout, timeout*slowFactor, remain) - - logger.debug("{} succeeded with {:.2f} seconds to spare". - format(fname, remain)) - return res - except Exception as ex: - if acceptableExceptions and type(ex) not in acceptableExceptions: - raise - if remain >= 0: - if verbose: - logger.trace("{} not succeeded yet, {:.2f} seconds " - "remaining...".format(fname, remain)) - await asyncio.sleep(next(ratchet) if ratchet else retryWait) - else: - recordFail(fname, timeout) - logger.error("{} failed; not trying any more because {} " - "seconds have passed; args were {}". - format(fname, timeout, args)) - raise ex diff --git a/plenum/common/exceptions.py b/plenum/common/exceptions.py index 6a5da05030..74b67cec5d 100644 --- a/plenum/common/exceptions.py +++ b/plenum/common/exceptions.py @@ -1,5 +1,6 @@ +from re import compile + from plenum.server.suspicion_codes import Suspicion -from re import compile, match class ReqInfo: @@ -8,23 +9,6 @@ def __init__(self, identifier=None, reqId=None): self.reqId = reqId -class NodeError(Exception): - pass - - -class PortNotAvailableForNodeWebServer(NodeError): - pass - - -class RemoteError(NodeError): - def __init__(self, remote): - self.remote = remote - - -class RemoteNotFound(RemoteError): - pass - - class BaseExc(Exception): # def __init__(self, code: int=None, reason: str=None): # self.code = code @@ -105,7 +89,7 @@ class UnregisteredIdentifier(SigningException): reason = 'provided owner identifier not registered with agent' -class RaetKeysNotFoundException(Exception): +class KeysNotFoundException(Exception): code = 141 reason = 'Keys not found in the keep for {}. ' \ 'To generate them run script ' @@ -228,31 +212,6 @@ class OrientDBNotRunning(GraphStorageNotAvailable): pass -class EndpointException(Exception): - pass - - -class MissingEndpoint(EndpointException): - def __init__(self): - super().__init__('missing endpoint') - - -class InvalidEndpointIpAddress(EndpointException): - def __init__(self, endpoint): - super().__init__("invalid endpoint address: '{}'".format(endpoint)) - - -class InvalidEndpointPort(EndpointException): - def __init__(self, endpoint): - super().__init__("invalid endpoint port: '{}'".format(endpoint)) - - -class PortNotAvailable(OSError): - def __init__(self, port): - self.port = port - super().__init__("port not available: {}".format(port)) - - class OperationError(Exception): def __init__(self, error): super().__init__("error occurred during operation: {}".format(error)) diff --git a/plenum/common/jsonpickle_util.py b/plenum/common/jsonpickle_util.py new file mode 100644 index 0000000000..505652a50b --- /dev/null +++ b/plenum/common/jsonpickle_util.py @@ -0,0 +1,29 @@ +from enum import Enum + +import jsonpickle +from jsonpickle import tags +from jsonpickle.unpickler import loadclass + + +ENUMVALUE = 'py/enumvalue' + + +class EnumHandler(jsonpickle.handlers.BaseHandler): + """ + Jsonpickle handler for enumerations. + Used to avoid the issue https://github.com/jsonpickle/jsonpickle/issues/135 + in jsonpickle 0.9.2 which is the canonical version for Ubuntu 16.04. + Provides a custom format for serialization of enumerations. + """ + + def flatten(self, obj, data): + data[ENUMVALUE] = obj.value + return data + + def restore(self, obj): + enum_class = loadclass(obj[tags.OBJECT]) + return enum_class(obj[ENUMVALUE]) + + +def setUpJsonpickle(): + jsonpickle.handlers.register(Enum, EnumHandler, base=True) diff --git a/plenum/common/keygen_utils.py b/plenum/common/keygen_utils.py new file mode 100644 index 0000000000..85a1dc8e34 --- /dev/null +++ b/plenum/common/keygen_utils.py @@ -0,0 +1,51 @@ +import os + +from plenum.common.stacks import nodeStackClass +from stp_zmq.util import createCertsFromKeys + +from plenum.common.constants import CLIENT_STACK_SUFFIX + + +def initLocalKeys(name, baseDir, sigseed, override=False, config=None): + pubkey, verkey = nodeStackClass.initLocalKeys(name, baseDir, sigseed, + override=override) + print("Public key is", pubkey) + print("Verification key is", verkey) + return pubkey, verkey + + +def initRemoteKeys(name, baseDir, sigseed, verkey, override=False, config=None): + nodeStackClass.initRemoteKeys(name, baseDir, sigseed, verkey, + override=override) + + + +def initNodeKeysForBothStacks(name, baseDir, sigseed, override=False, config=None): + nodeStackClass.initLocalKeys(name, baseDir, sigseed, override=override) + nodeStackClass.initLocalKeys(name + CLIENT_STACK_SUFFIX, baseDir, sigseed, + override=override) + + + +def areKeysSetup(name, baseDir, config=None): + return nodeStackClass.areKeysSetup(name, baseDir) + + +def learnKeysFromOthers(baseDir, nodeName, otherNodes): + otherNodeStacks = [] + for otherNode in otherNodes: + if otherNode.name != nodeName: + otherNodeStacks.append(otherNode.nodestack) + otherNodeStacks.append(otherNode.clientstack) + nodeStackClass.learnKeysFromOthers(baseDir, nodeName, otherNodeStacks) + + +def tellKeysToOthers(node, otherNodes): + otherNodeStacks = [] + for otherNode in otherNodes: + if otherNode != node: + otherNodeStacks.append(otherNode.nodestack) + otherNodeStacks.append(otherNode.clientstack) + + node.nodestack.tellKeysToOthers(otherNodeStacks) + node.clientstack.tellKeysToOthers(otherNodeStacks) diff --git a/plenum/common/ledger_manager.py b/plenum/common/ledger_manager.py index 79879b05a0..abe37fa76c 100644 --- a/plenum/common/ledger_manager.py +++ b/plenum/common/ledger_manager.py @@ -15,13 +15,12 @@ from ledger.merkle_verifier import MerkleVerifier from ledger.util import F -from plenum.common.exceptions import RemoteNotFound from plenum.common.startable import LedgerState from plenum.common.types import LedgerStatus, CatchupRep, ConsistencyProof, f, \ CatchupReq, ConsProofRequest from plenum.common.util import getMaxFailures from plenum.common.config_util import getConfig -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.server.has_action_queue import HasActionQueue logger = getlogger() @@ -66,6 +65,7 @@ def __init__(self, owner, ownedByNode: bool=True): # transactions that need to be applied to the domain transaction ledger self.receivedCatchUpReplies = {} # type: Dict[int, List] + # Keep track of received replies from different senders self.recvdCatchupRepliesFrm = {} # type: Dict[int, Dict[str, List[CatchupRep]]] @@ -121,7 +121,8 @@ def addLedger(self, typ: int, ledger: Ledger, def checkIfCPsNeeded(self, ledgerType): if self.consistencyProofsTimers[ledgerType] is not None: - logger.debug("{} requesting consistency proofs after timeout".format(self)) + logger.debug("{} requesting consistency proofs of {} after timeout". + format(self, ledgerType)) adjustedF = getMaxFailures(self.owner.totalNodes - 1) recvdConsProof = self.recvdConsistencyProofs[ledgerType] grpdPrf, nullProofs = self._groupConsistencyProofs(recvdConsProof) @@ -138,6 +139,7 @@ def checkIfCPsNeeded(self, ledgerType): self.recvdConsistencyProofs[ledgerType] = {} self.consistencyProofsTimers[ledgerType] = None + self.recvdCatchupRepliesFrm[ledgerType] = {} def checkIfTxnsNeeded(self, ledgerType): if self.catchupReplyTimers[ledgerType] is not None: @@ -151,7 +153,7 @@ def checkIfTxnsNeeded(self, ledgerType): if totalMissing: logger.debug( "{} requesting {} missing transactions after timeout". - format(self, totalMissing)) + format(self, totalMissing)) eligibleNodes = list(self.nodestack.conns - self.blacklistedNodes) # Shuffling order of nodes so that catchup requests dont go to @@ -180,14 +182,11 @@ def addReqsForMissing(frm, to): missing = to - frm + 1 numBatches = math.ceil(missing / batchSize) for i in range(numBatches): - req = CatchupReq(ledgerType, - frm + (i * batchSize), - min(to, frm + - ((i + 1) * batchSize) - 1)) - logger.debug("{} creating catchup request {} to {}". - format(self, frm+(i*batchSize), - min(to, frm+((i+1)*batchSize)-1) - )) + s = frm + (i * batchSize) + e = min(to, frm + ((i + 1) * batchSize) - 1) + req = CatchupReq(ledgerType, s, e, end) + logger.debug("{} creating catchup request {} to {} till {}". + format(self, s, e, end)) cReqs.append(req) return missing @@ -256,9 +255,9 @@ def processLedgerStatus(self, status: LedgerStatus, frm: str): # registries (old approach) ledgerStatus = LedgerStatus(*status) if status else None if ledgerStatus.txnSeqNo < 0: - self.discard(status, reason="Received negative sequence " - "number from {}".format(frm), - logMethod=logger.warn) + self.discard(status, reason="Received negative sequence number " + "from {}".format(frm), + logMethod=logger.warn) if not status: logger.debug("{} found ledger status to be null from {}". format(self, frm)) @@ -374,12 +373,12 @@ def processCatchupReq(self, req: CatchupReq, frm: str): txns = ledger.getAllTxn(start, end) logger.debug("node {} requested catchup for {} from {} to {}" - .format(frm, end - start, start, end)) + .format(frm, end - start+1, start, end)) logger.debug("{} generating consistency proof: {} from {}". - format(self, end, ledger.size)) + format(self, end, req.catchupTill)) consProof = [b64encode(p).decode() for p in - ledger.tree.consistency_proof(end, ledger.size)] + ledger.tree.consistency_proof(end, req.catchupTill)] self.sendTo(msg=CatchupRep(getattr(req, f.LEDGER_TYPE.nm), txns, consProof), to=frm) @@ -426,11 +425,13 @@ def _processCatchupReplies(self, ledgerType, ledger: Ledger, catchUpReplies: List): # Removing transactions for sequence numbers are already # present in the ledger + # TODO: Inefficient, should check list in reverse and stop at first + # match since list is already sorted numProcessed = sum(1 for s, _ in catchUpReplies if s <= ledger.size) - catchUpReplies = catchUpReplies[numProcessed:] if numProcessed: logger.debug("{} found {} already processed transactions in the " "catchup replies".format(self, numProcessed)) + catchUpReplies = catchUpReplies[numProcessed:] if catchUpReplies: seqNo = catchUpReplies[0][0] if seqNo - ledger.seqNo == 1: @@ -445,7 +446,7 @@ def _processCatchupReplies(self, ledgerType, ledger: Ledger, self._removePrcdCatchupReply(ledgerType, nodeName, seqNo) return numProcessed + toBeProcessed + \ self._processCatchupReplies(ledgerType, ledger, - catchUpReplies[toBeProcessed:]) + catchUpReplies[toBeProcessed:]) else: if self.ownedByNode: self.owner.blacklistNode(nodeName, @@ -711,7 +712,7 @@ def getCatchupReqs(self, consProof: ConsistencyProof): e = min(s + batchLength - 1, end) for i in range(nodeCount): reqs.append(CatchupReq(getattr(consProof, f.LEDGER_TYPE.nm), - s, e)) + s, e, end)) s = e + 1 e = min(s + batchLength - 1, end) if s > end: @@ -811,15 +812,14 @@ def processStashedLedgerStatuses(self, ledgerType: int): def getStack(self, remoteName: str): if self.ownedByNode: - try: - self.clientstack.getRemote(remoteName) + if self.clientstack.hasRemote(remoteName): return self.clientstack - except RemoteNotFound: + else: pass - try: - self.nodestack.getRemote(remoteName) + + if self.nodestack.hasRemote(remoteName): return self.nodestack - except RemoteNotFound: + else: logger.error("{} cannot find remote with name {}". format(self, remoteName)) diff --git a/plenum/common/log.py b/plenum/common/log.py deleted file mode 100644 index 739c96c122..0000000000 --- a/plenum/common/log.py +++ /dev/null @@ -1,238 +0,0 @@ -import inspect -import logging -import os -import sys - -from ioflo.base.consoling import getConsole, Console - -from plenum.common.logging.TimeAndSizeRotatingFileHandler \ - import TimeAndSizeRotatingFileHandler -from plenum.common.util import Singleton, adict - -TRACE_LOG_LEVEL = 5 -DISPLAY_LOG_LEVEL = 25 - - -class CustomAdapter(logging.LoggerAdapter): - def trace(self, msg, *args, **kwargs): - self.log(TRACE_LOG_LEVEL, msg, *args, **kwargs) - - def display(self, msg, *args, **kwargs): - self.log(DISPLAY_LOG_LEVEL, msg, *args, **kwargs) - - -class CallbackHandler(logging.Handler): - def __init__(self, typestr, default_tags, callback, override_tags): - """ - Initialize the handler. - """ - super().__init__() - self.callback = callback - self.tags = default_tags - self.update_tags(override_tags or {}) - self.typestr = typestr - - def update_tags(self, override_tags): - self.tags.update(override_tags) - - def emit(self, record): - """ - Passes the log record back to the CLI for rendering - """ - should_cb = None - attr_val = None - if hasattr(record, self.typestr): - attr_val = getattr(record, self.typestr) - should_cb = bool(attr_val) - if should_cb is None and record.levelno >= logging.INFO: - should_cb = True - if hasattr(record, 'tags'): - for t in record.tags: - if t in self.tags: - if self.tags[t]: - should_cb = True - continue - else: - should_cb = False - break - if should_cb: - self.callback(record, attr_val) - - -class CliHandler(CallbackHandler): - def __init__(self, callback, override_tags=None): - default_tags = { - "add_replica": True - } - super().__init__(typestr="cli", - default_tags=default_tags, - callback=callback, - override_tags=override_tags) - - -class DemoHandler(CallbackHandler): - def __init__(self, callback, override_tags=None): - default_tags = { - "add_replica": True - } - super().__init__(typestr="demo", - default_tags=default_tags, - callback=callback, - override_tags=override_tags) - - -def getlogger(name=None): - return Logger().getlogger(name) - - -class TestingHandler(logging.Handler): - def __init__(self, tester): - """ - Initialize the handler. - """ - super().__init__() - self.tester = tester - - def emit(self, record): - """ - Captures a record. - """ - self.tester(record) - - -class Logger(metaclass=Singleton): - def __init__(self, config=None): - from plenum.common.config_util import getConfig - # TODO: This should take directory - self._config = config or getConfig() - self._addTraceToLogging() - self._addDisplayToLogging() - - self._handlers = {} - self._format = logging.Formatter(fmt=self._config.logFormat, - style=self._config.logFormatStyle) - - self._default_raet_verbosity = \ - getRAETLogLevelFromConfig("RAETLogLevel", - Console.Wordage.terse, - self._config) - - self._default_raet_log_file = \ - getRAETLogFilePath("RAETLogFilePath", self._config) - - if self._config.enableStdOutLogging: - self.enableStdLogging() - - logLevel = logging.INFO - if hasattr(self._config, "logLevel"): - logLevel = self._config.logLevel - self.setLogLevel(logLevel) - - @staticmethod - def getlogger(name=None): - if not name: - curframe = inspect.currentframe() - calframe = inspect.getouterframes(curframe, 2) - name = inspect.getmodule(calframe[1][0]).__name__ - logger = logging.getLogger(name) - return logger - - @staticmethod - def setLogLevel(log_level): - logging.root.setLevel(log_level) - - def setupRaet(self, raet_log_level=None, raet_log_file=None): - console = getConsole() - - verbosity = raet_log_level \ - if raet_log_level is not None \ - else self._default_raet_verbosity - file = raet_log_file or self._default_raet_log_file - - logging.info("Setting RAET log level {}".format(verbosity), - extra={"cli": False}) - - console.reinit(verbosity=verbosity, path=file, flushy=True) - - def enableStdLogging(self): - # only enable if CLI is not - if 'cli' in self._handlers: - raise RuntimeError('cannot configure STD logging ' - 'when CLI logging is enabled') - new = logging.StreamHandler(sys.stdout) - self._setHandler('std', new) - - def enableCliLogging(self, callback, override_tags=None): - h = CliHandler(callback, override_tags) - self._setHandler('cli', h) - # assumption is there's never a need to have std logging when in CLI - self._clearHandler('std') - - def enableFileLogging(self, filename): - d = os.path.dirname(filename) - if not os.path.exists(d): - os.makedirs(d) - new = TimeAndSizeRotatingFileHandler( - filename, - when=self._config.logRotationWhen, - interval=self._config.logRotationInterval, - backupCount=self._config.logRotationBackupCount, - utc=True, - maxBytes=self._config.logRotationMaxBytes) - self._setHandler('file', new) - - def _setHandler(self, typ: str, new_handler): - if new_handler.formatter is None: - new_handler.setFormatter(self._format) - - # assuming indempotence and removing old one first - self._clearHandler(typ) - - self._handlers[typ] = new_handler - logging.root.addHandler(new_handler) - - def _clearHandler(self, typ: str): - old = self._handlers.get(typ) - if old: - logging.root.removeHandler(old) - - @staticmethod - def _addTraceToLogging(): - logging.addLevelName(TRACE_LOG_LEVEL, "TRACE") - - def trace(self, message, *args, **kwargs): - if self.isEnabledFor(TRACE_LOG_LEVEL): - self._log(TRACE_LOG_LEVEL, message, args, **kwargs) - - logging.Logger.trace = trace - - @staticmethod - def _addDisplayToLogging(): - logging.addLevelName(DISPLAY_LOG_LEVEL, "DISPLAY") - - def display(self, message, *args, **kwargs): - if self.isEnabledFor(DISPLAY_LOG_LEVEL): - self._log(DISPLAY_LOG_LEVEL, message, args, **kwargs) - - logging.Logger.display = display - - -def getRAETLogLevelFromConfig(paramName, defaultValue, config): - try: - defaultVerbosity = config.__getattribute__(paramName) - defaultVerbosity = Console.Wordage.__getattribute__(defaultVerbosity) - except AttributeError: - defaultVerbosity = defaultValue - logging.debug("Ignoring RAET log level {} from config and using {} " - "instead".format(paramName, defaultValue)) - return defaultVerbosity - - -def getRAETLogFilePath(paramName, config): - try: - filePath = config.__getattribute__(paramName) - except AttributeError: - filePath = None - return filePath - - diff --git a/plenum/common/logging/TimeAndSizeRotatingFileHandler.py b/plenum/common/logging/TimeAndSizeRotatingFileHandler.py deleted file mode 100644 index 117777081d..0000000000 --- a/plenum/common/logging/TimeAndSizeRotatingFileHandler.py +++ /dev/null @@ -1,41 +0,0 @@ -import os -from logging.handlers import TimedRotatingFileHandler -from logging.handlers import RotatingFileHandler - - -class TimeAndSizeRotatingFileHandler(TimedRotatingFileHandler, RotatingFileHandler): - - def __init__(self, filename, when = 'h', interval = 1, backupCount = 0, - encoding = None, delay = False, utc = False, atTime = None, - mode = 'a', maxBytes=0): - - TimedRotatingFileHandler.__init__(self, filename, when, interval, - backupCount, encoding, delay, - utc, atTime) - RotatingFileHandler.__init__(self, filename, mode, maxBytes, - backupCount, encoding, delay) - - def shouldRollover(self, record): - return bool(TimedRotatingFileHandler.shouldRollover(self, record)) or \ - bool(RotatingFileHandler.shouldRollover(self, record)) - - def rotation_filename(self, default_name: str): - - if not os.path.exists(default_name): - return default_name - - dir = os.path.dirname(default_name) - defaultFileName = os.path.basename(default_name) - fileNames = os.listdir(dir) - - maxIndex = -1 - for fileName in fileNames: - if fileName.startswith(defaultFileName): - split = fileName.split(".") - try: - index = int(split[-1] if len(split) > 0 else 0) - except ValueError: - index = 0 - if index > maxIndex: - maxIndex = index - return "{}.{}".format(default_name, maxIndex + 1) \ No newline at end of file diff --git a/plenum/common/looper.py b/plenum/common/looper.py deleted file mode 100644 index afedeb1bb8..0000000000 --- a/plenum/common/looper.py +++ /dev/null @@ -1,302 +0,0 @@ -import asyncio -import inspect -import signal -import sys -import time -from asyncio import Task -from asyncio.coroutines import CoroWrapper -from typing import List - -# import uvloop -from plenum.common.exceptions import ProdableAlreadyAdded -from plenum.common.startable import Status -from plenum.common.log import getlogger -from plenum.common.util import lxor - -logger = getlogger() - - -class Prodable: - """ - An interface for declaring classes that can be started and prodded. When an - object is prodded, it just means that the event loop is giving it a chance - to do something. - """ - - def name(self): - raise NotImplementedError("subclass {} should implement this method" - .format(self)) - - async def prod(self, limit) -> int: - """ - Action to be performed each time the Prodable object gets processor - resources. - - :param limit: the number of messages to be processed - """ - raise NotImplementedError("subclass {} should implement this method" - .format(self)) - - def start(self, loop): - """ - Actions to be performed when the Prodable is starting up. - """ - raise NotImplementedError("subclass {} should implement this method" - .format(self)) - - def stop(self): - """ - Actions to be performed when the Prodable is starting up. - """ - raise NotImplementedError("subclass {} should implement this method" - .format(self)) - - def get_status(self) -> Status: - """ - Get the current status of this Prodable - """ - raise NotImplementedError("subclass {} should implement this method" - .format(self)) - - -class Looper: - """ - A helper class for asyncio's event_loop - """ - - def __init__(self, - prodables: List[Prodable]=None, - loop=None, - debug=False, - autoStart=True): - """ - Initialize looper with an event loop. - - :param prodables: a list of prodables that this event loop will execute - :param loop: the event loop to use - :param debug: set_debug on event loop will be set to this value - :param autoStart: start immediately? - """ - self.prodables = list(prodables) if prodables is not None \ - else [] # type: List[Prodable] - - # if sys.platform == 'linux': - # asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) - - if loop: - self.loop = loop - else: - try: - #if sys.platform == 'win32': - # loop = asyncio.ProactorEventLoop() - # asyncio.set_event_loop(loop) - l = asyncio.get_event_loop() - if l.is_closed(): - raise RuntimeError("event loop was closed") - except Exception as ex: - logger.warning("Looper could not get default event loop; " - "creating a new one: {}".format(ex)) - # Trying out uvloop for linux - l = asyncio.new_event_loop() - asyncio.set_event_loop(l) - self.loop = l - - self.runFut = self.loop.create_task(self.runForever()) # type: Task - self.running = True # type: bool - self.loop.set_debug(debug) - - # TODO: uncomment this when python bug fixed (not just closed, but solved!) - # https://bugs.python.org/issue23548 - # - # signals = [item for item in dir(signal) - # if item.startswith("SIG") and item[3] != "_"] - - signals = ["SIGINT"] - - setSignal = \ - signal.signal if sys.platform == 'win32' \ - else self.loop.add_signal_handler - - for sigName in signals: - try: - logger.debug("Setting handler for {}".format(sigName)) - sigNum = getattr(signal, sigName) - setSignal(sigNum, self.handleSignal) - except RuntimeError as e: - logger.debug("Cannot set handler for {} because {}".format(sigName, e)) - - self.autoStart = autoStart # type: bool - if self.autoStart: - self.startall() - - # @staticmethod - # def new_event_loop(): - # eventLib = asyncio if sys.platform == 'win32' else uvloop - # return eventLib.new_event_loop() - - async def prodAllOnce(self): - """ - Call `prod` once for each Prodable in this Looper - - :return: the sum of the number of events executed successfully - """ - limit = None - s = 0 - for n in self.prodables: - s += await n.prod(limit) - return s - - def add(self, prodable: Prodable) -> None: - """ - Add one Prodable object to this Looper's list of Prodables - - :param prodable: the Prodable object to add - """ - if prodable.name in [p.name for p in self.prodables]: - raise ProdableAlreadyAdded("Prodable {} already added.". - format(prodable.name)) - self.prodables.append(prodable) - if self.autoStart: - prodable.start(self.loop) - - def removeProdable(self, prodable: Prodable=None, name: str=None) -> None: - """ - Remove the specified Prodable object from this Looper's list of Prodables - - :param prodable: the Prodable to remove - """ - if prodable: - self.prodables.remove(prodable) - elif name: - for p in self.prodables: - if hasattr(p, "name") and getattr(p, "name") == name: - prodable = p - break - if prodable: - self.prodables.remove(prodable) - else: - logger.warn("Trying to remove a prodable {} which is not present" - .format(prodable)) - else: - logger.error("Provide a prodable object or a prodable name") - - def hasProdable(self, prodable: Prodable=None, name: str=None): - assert lxor(prodable, name), \ - "One and only one of prodable or name must be provided" - - for p in self.prodables: - if (prodable and p == prodable) or (name and name == p.name): - return True - - return False - - async def runOnceNicely(self): - """ - Execute `runOnce` with a small tolerance of 0.01 seconds so that the Prodables - can complete their other asynchronous tasks not running on the event-loop. - """ - start = time.perf_counter() - msgsProcessed = await self.prodAllOnce() - if msgsProcessed == 0: - await asyncio.sleep(0.01, loop=self.loop) # if no let other stuff run - dur = time.perf_counter() - start - if dur >= 0.5: - logger.info("it took {:.3f} seconds to run once nicely". - format(dur), extra={"cli": False}) - - def runFor(self, timeout): - self.run(asyncio.sleep(timeout)) - - async def runForever(self): - """ - Keep calling `runOnceNicely` in an infinite loop. - """ - while self.running: - await self.runOnceNicely() - - def run(self, *coros: CoroWrapper): - """ - Runs an arbitrary list of coroutines in order and then quits the loop, - if not running as a context manager. - """ - if not self.running: - raise RuntimeError("not running!") - - async def wrapper(): - results = [] - for coro in coros: - try: - if inspect.isawaitable(coro): - results.append(await coro) - elif inspect.isfunction(coro): - res = coro() - if inspect.isawaitable(res): - results.append(await res) - else: - results.append(res) - else: - raise RuntimeError("don't know how to run {}".format(coro)) - except Exception as ex: - logger.error("Error while running coroutine {}: {}" - .format(coro.__name__, ex.__repr__())) - raise ex - if len(results) == 1: - return results[0] - return results - if coros: - what = wrapper() - else: - # if no coros supplied, then assume we run forever - what = self.runFut - return self.loop.run_until_complete(what) - - def handleSignal(self, sig=None): - # Allowing sig to be optional since asyncio not passing the signal or - # KeyboardInterrupt (Ctrl+C) - logger.info("Signal {} received, stopping looper...".format(sig)) - self.running = False - - async def shutdown(self): - """ - Shut down this Looper. - """ - logger.info("Looper shutting down now...", - extra={"cli": False}) - self.running = False - start = time.perf_counter() - await self.runFut - self.stopall() - logger.info("Looper shut down in {:.3f} seconds.". - format(time.perf_counter() - start), - extra={"cli": False}) - - def __enter__(self): - return self - - def shutdownSync(self): - self.loop.run_until_complete(self.shutdown()) - - # noinspection PyUnusedLocal - def __exit__(self, exc_type, exc_val, exc_tb): - self.shutdownSync() - - async def __aenter__(self): - return self - - # noinspection PyUnusedLocal - async def __aexit__(self, exc_type, exc, tb): - await self.shutdown() - - def startall(self): - """ - Start all the Prodables in this Looper's `prodables` - """ - for n in self.prodables: - n.start(self.loop) - - def stopall(self): - """ - Stop all the Prodables in this Looper's `prodables` - """ - for n in self.prodables: - n.stop() diff --git a/plenum/common/message_processor.py b/plenum/common/message_processor.py new file mode 100644 index 0000000000..5c476ff24d --- /dev/null +++ b/plenum/common/message_processor.py @@ -0,0 +1,61 @@ +import logging +from typing import Dict + +from plenum.common.request import Request +from plenum.common.types import TaggedTupleBase +from stp_core.crypto.signer import Signer + + +class MessageProcessor: + """ + Helper functions for messages. + """ + + def __init__(self, allowDictOnly = False): + self.allowDictOnly = allowDictOnly # if True, message must be converted to Dict before sending. + + def discard(self, msg, reason, logMethod=logging.error, cliOutput=False): + """ + Discard a message and log a reason using the specified `logMethod`. + + :param msg: the message to discard + :param reason: the reason why this message is being discarded + :param logMethod: the logging function to be used + :param cliOutput: if truthy, informs a CLI that the logged msg should + be printed + """ + reason = "" if not reason else " because {}".format(reason) + logMethod("{} discarding message {}{}".format(self, msg, reason), + extra={"cli": cliOutput}) + + + def toDict(self, msg: Dict) -> Dict: + """ + Return a dictionary form of the message + + :param msg: the message to be sent + :raises: ValueError if msg cannot be converted to an appropriate format + for transmission + """ + + if isinstance(msg, TaggedTupleBase): + tmsg = msg.melted() + elif isinstance(msg, Request): + tmsg = msg.as_dict + elif hasattr(msg, "_asdict"): + tmsg = dict(msg._asdict()) + elif hasattr(msg, "__dict__"): + tmsg = dict(msg.__dict__) + elif self.allowDictOnly: + raise ValueError("Message cannot be converted to an appropriate " + "format for transmission") + else: + tmsg = msg + return tmsg + + + def prepForSending(self, msg: Dict, signer: Signer = None) -> Dict: + msg = self.toDict(msg) + if signer: + return signer.sign(msg) + return msg \ No newline at end of file diff --git a/plenum/common/motor.py b/plenum/common/motor.py index 6cbaef3e72..db14a60155 100644 --- a/plenum/common/motor.py +++ b/plenum/common/motor.py @@ -1,6 +1,6 @@ -from plenum.common.looper import Prodable +from stp_core.loop.looper import Prodable from plenum.common.startable import Status -from plenum.common.log import getlogger +from stp_core.common.log import getlogger logger = getlogger() diff --git a/plenum/common/pkg_util.py b/plenum/common/pkg_util.py index 6497ed3c9a..a972e1d394 100644 --- a/plenum/common/pkg_util.py +++ b/plenum/common/pkg_util.py @@ -41,4 +41,4 @@ def check_deps(dependencies, parent=""): dependencies.__name__ meta = getPackageMeta(pkg) deps = meta.__dependencies__ - check_deps(deps) \ No newline at end of file + check_deps(deps) diff --git a/plenum/common/plugin_helper.py b/plenum/common/plugin_helper.py index 9d03a60527..c020300f9f 100644 --- a/plenum/common/plugin_helper.py +++ b/plenum/common/plugin_helper.py @@ -2,7 +2,7 @@ import os from plenum.common.config_util import getConfig -from plenum.common.log import getlogger +from stp_core.common.log import getlogger pluginsLoaded = {} # Dict(baseDir, List[plugin names]) pluginsNotFound = {} # Dict(baseDir, List[plugin names]) @@ -50,7 +50,7 @@ def loadPlugins(baseDir): i += 1 else: if not pluginsNotFound.get(pluginPath): - logger.warn("Note: Plugin file does not exists: {}. " + logger.warning("Note: Plugin file does not exists: {}. " "Create plugin file if you want to load it" .format(pluginPath), extra={"cli": False}) pluginsNotFound[pluginPath] = "Notified" @@ -59,7 +59,7 @@ def loadPlugins(baseDir): # TODO: Is this strategy ok to catch any exception and # just print the error and continue, # or it should fail if there is error in plugin loading - logger.warn( + logger.warning( "** Error occurred during loading plugin {}: {}" .format(pluginPath, str(ex))) diff --git a/plenum/common/port_dispenser.py b/plenum/common/port_dispenser.py deleted file mode 100644 index d9b4a264fe..0000000000 --- a/plenum/common/port_dispenser.py +++ /dev/null @@ -1,76 +0,0 @@ -import os -import tempfile - -import portalocker - -from plenum.common import log as log -from plenum.common.types import HA -from plenum.common.util import checkPortAvailable - - -class PortDispenser: - """ - This class provides a system-wide mechanism to provide a available socket - ports for testing. Tests should call getNext to get the next available port. - There is no guarantee of sequential port numbers, as other tests running - concurrently might grab a port before one process is done getting all the - ports it needs. This should pose no problem, as tests shouldn't depend on - port numbers. It leverages the filesystem lock mechanism to ensure there - are no overlaps. - """ - - maxportretries = 3 - logger = log.getlogger() - - def __init__(self, ip: str, filename: str=None, minPort=6000, maxPort=9999): - self.ip = ip - self.FILE = filename or os.path.join(tempfile.gettempdir(), - 'plenum-portmutex.{}.txt'.format(ip)) - self.minPort = minPort - self.maxPort = maxPort - self.initFile() - - def initFile(self): - if not os.path.exists(self.FILE): - with open(self.FILE, "w") as file: - file.write(str(self.minPort)) - - def get(self, count: int=1, readOnly: bool=False, recurlvl=0): - with open(self.FILE, "r+") as file: - portalocker.lock(file, portalocker.LOCK_EX) - ports = [] - while len(ports) < count: - file.seek(0) - port = int(file.readline()) - if readOnly: - return port - port += 1 - if port > self.maxPort: - port = self.minPort - file.seek(0) - file.write(str(port)) - try: - checkPortAvailable(("",port)) - ports.append(port) - self.logger.debug("new port dispensed: {}".format(port)) - except: - if recurlvl < self.maxportretries: - self.logger.debug("port {} unavailable, trying again...". - format(port)) - else: - self.logger.debug("port {} unavailable, max retries {} " - "reached". - format(port, self.maxportretries)) - raise - return ports - - def getNext(self, count: int=1, ip=None): - ip = ip or self.ip - has = [HA(ip, port) for port in self.get(count)] - if len(has) == 1: - return has[0] - else: - return has - - -genHa = PortDispenser("127.0.0.1").getNext \ No newline at end of file diff --git a/plenum/common/raet.py b/plenum/common/raet.py deleted file mode 100644 index 3f45dc742a..0000000000 --- a/plenum/common/raet.py +++ /dev/null @@ -1,159 +0,0 @@ -import json - -import os -from collections import OrderedDict -from raet.nacling import Signer, Privateer -from raet.road.keeping import RoadKeep - -from plenum.common.crypto import ed25519SkToCurve25519, ed25519PkToCurve25519 -from plenum.common.util import hasKeys, hexToFriendly - - -def initLocalKeep(name, baseDir, sigseed, override=False): - """ - Initialize RAET local keep. Write local role data to file. - - :param name: name of the node - :param baseDir: base directory - :param pkseed: seed to generate public and private key pair - :param sigseed: seed to generate signing and verification key pair - :param override: overwrite the local role.json file if already exists - :return: tuple(public key, verification key) - """ - rolePath = os.path.join(baseDir, name, "role", "local", "role.json") - if os.path.isfile(rolePath): - if not override: - raise FileExistsError("Keys exists for local role {}".format(name)) - - if sigseed and not isinstance(sigseed, bytes): - sigseed = sigseed.encode() - - signer = Signer(sigseed) - keep = RoadKeep(stackname=name, baseroledirpath=baseDir) - sigkey, verkey = signer.keyhex, signer.verhex - prikey, pubkey = ed25519SkToCurve25519(sigkey, toHex=True), \ - ed25519PkToCurve25519(verkey, toHex=True) - data = OrderedDict([ - ("role", name), - ("prihex", prikey), - ("sighex", sigkey) - ]) - keep.dumpLocalRoleData(data) - return pubkey.decode(), verkey.decode() - - -def initRemoteKeep(name, remoteName, baseDir, verkey, override=False): - """ - Initialize RAET remote keep - - :param name: name of the node - :param remoteName: name of the remote to store keys for - :param baseDir: base directory - :param pubkey: public key of the remote - :param verkey: private key of the remote - :param override: overwrite the role.remoteName.json file if it already - exists. - """ - rolePath = os.path.join(baseDir, name, "role", "remote", "role.{}.json". - format(remoteName)) - if os.path.isfile(rolePath): - if not override: - raise FileExistsError("Keys exists for remote role {}". - format(remoteName)) - - keep = RoadKeep(stackname=name, baseroledirpath=baseDir) - data = OrderedDict([ - ('role', remoteName), - ('acceptance', 1), - ('pubhex', ed25519PkToCurve25519(verkey, toHex=True)), - ('verhex', verkey) - ]) - keep.dumpRemoteRoleData(data, role=remoteName) - - -def isLocalKeepSetup(name, baseDir=None) -> bool: - """ - Check that the local RAET keep has the values of role, sighex and prihex - populated for the given node - - :param name: the name of the node to check the keys for - :param baseDir: base directory of Plenum - :return: whether the keys are setup - """ - localRoleData = getLocalKeep(name=name, baseDir=baseDir) - return hasKeys(localRoleData, ['role', 'sighex', 'prihex']) - - -def getLocalKeep(name, baseDir=None): - keep = RoadKeep(stackname=name, baseroledirpath=baseDir) - localRoleData = keep.loadLocalRoleData() - return localRoleData - - -def getLocalRoleKeyByName(roleName, baseDir, keyName): - localRoleData = getLocalKeep(roleName, baseDir) - keyhex = localRoleData.get(keyName) - keyhex = str(keyhex) if keyhex is not None else None - if keyhex is None: - raise BaseException("Seems {} keypair is not created yet" - .format(roleName)) - return keyhex - - -def getLocalVerKey(roleName, baseDir=None): - sighex = getLocalRoleKeyByName(roleName, baseDir, 'sighex') - signer = Signer(sighex) - return signer.verhex.decode() - - -def getLocalPubKey(roleName, baseDir=None): - prihex = getLocalRoleKeyByName(roleName, baseDir, 'prihex') - privateer = Privateer(prihex) - return privateer.pubhex.decode() - - -def getEncodedLocalVerKey(name, baseDir=None): - verKey = getLocalVerKey(name, baseDir) - return hexToFriendly(verKey) - - -def getLocalEstateData(name, baseDir): - estatePath = os.path.expanduser(os.path.join(baseDir, name, "local", - "estate.json")) - if os.path.isfile(estatePath): - return json.loads(open(estatePath).read()) - - -def getHaFromLocalEstate(name, basedirpath): - localEstate = getLocalEstateData(name, basedirpath) - if localEstate: - return localEstate.get("ha") - - -def isRaetKeepDir(directory): - if os.path.isdir(os.path.join(directory, 'local')) and \ - os.path.isdir(os.path.join(directory, 'remote')) and \ - os.path.isdir(os.path.join(directory, 'role')): - return True - return False - - -def isPortUsed(keepDir, port): - """ - Checks if the any local remote present in `keepDir` is bound to the given - port - :param keepDir: - :param port: - :return: - """ - for item in os.listdir(keepDir): - itemDir = os.path.join(keepDir, item) - if os.path.isdir(itemDir) and isRaetKeepDir(itemDir): - try: - localRemoteData = json.load(open(os.path.join(itemDir, 'local', - 'estate.json'))) - if localRemoteData['ha'][1] == port: - return True - except: - continue - return False diff --git a/plenum/common/ratchet.py b/plenum/common/ratchet.py deleted file mode 100644 index 95c76c13b0..0000000000 --- a/plenum/common/ratchet.py +++ /dev/null @@ -1,91 +0,0 @@ -import functools -from math import exp, log - - -class Ratchet: - def __init__(self, a: float, b: float, c: float=0, - base: float=None, peak: float=None): - """ - Models an exponential curve; useful for providing the number of seconds - to wait between retries - - :param a: multiplier - :param b: exponent multiplier - :param c: offset - :param base: minimum number returned - :param peak: maximum number returned - """ - self.a = a - self.b = b - self.c = c - self.base = base - self.peak = peak - - @classmethod - def fromGoals(cls, start: float, end: float, steps: int): - b = log(end/start)/(steps-1) - return cls(a=start, b=b) - - @staticmethod - def _sumSeries(a: float, b: float, steps: int) -> float: - """ - Return value of the the following polynomial. - .. math:: - (a * e^(b*steps) - 1) / (e^b - 1) - - :param a: multiplier - :param b: exponent multiplier - :param steps: the number of steps - """ - return a * (exp(b*steps)-1) / (exp(b)-1) - - @classmethod - def fromGoalDuration(cls, start, steps, total): - return cls(a=start, b=Ratchet.goalDuration(start, steps, total)) - - @staticmethod - @functools.lru_cache() - def goalDuration(start: float, steps: int, total: float) -> float: - """ - Finds a b-value (common ratio) that satisfies a total duration within - 1 millisecond. Not terribly efficient, so using lru_cache. Don't know - a way to compute the common ratio when the sum of a finite geometric - series is known. Found myself needing to factor polynomials with an - arbitrarily - high degree. - - :param start: a-value - :param steps: how many steps - :param total: total duration of the series of n-steps - :return: b value - """ - a = start - up = None - dn = None - b = 1.0 - while True: - s = Ratchet._sumSeries(a, b, steps) - total - if abs(s) < .001: - break - elif s < 0: - dn = b - b = (up+b)/2 if up else b + 1 # halfway between b and upper if upper defined - else: - up = b - b = (dn+b)/2 if dn else b/2 - return b - - def get(self, iteration: int): - v = (self.a * exp(self.b * iteration)) + self.c - v = max(self.base, v) if self.base else v - v = min(self.peak, v) if self.peak else v - return v - - def gen(self): - i = 0 - while True: - newI = yield self.get(i) - if newI is not None: - i = newI - else: - i += 1 diff --git a/plenum/common/request.py b/plenum/common/request.py index 995c453897..b20d764180 100644 --- a/plenum/common/request.py +++ b/plenum/common/request.py @@ -1,9 +1,11 @@ from hashlib import sha256 from typing import Mapping, NamedTuple +from stp_core.types import Identifier + from plenum.common.signing import serializeMsg from plenum.common.constants import REQDIGEST -from plenum.common.types import Identifier, f +from plenum.common.types import f, OPERATION class Request: @@ -15,27 +17,30 @@ def __init__(self, self.identifier = identifier self.reqId = reqId self.operation = operation + self.digest = self.getDigest() self.signature = signature + @property + def as_dict(self): + return { + f.IDENTIFIER.nm: self.identifier, + f.REQ_ID.nm: self.reqId, + OPERATION: self.operation, + f.SIG.nm: self.signature + } + def __eq__(self, other): - return self.__dict__ == other.__dict__ + return self.as_dict == other.as_dict def __repr__(self): - return "{}: {}".format(self.__class__.__name__, self.__dict__) + return "{}: {}".format(self.__class__.__name__, self.as_dict) @property def key(self): return self.identifier, self.reqId - @property - def digest(self): - # The digest needs to be of the whole request. If only client id and - # request id are used to construct digest, then a malicious client might - # send different operations to different nodes and the nodes will not - # realize an have different ledgers. - return sha256(serializeMsg(self.__dict__)).hexdigest() - # DEPR - # return sha256("{}{}".format(*self.key).encode('utf-8')).hexdigest() + def getDigest(self): + return sha256(serializeMsg(self.signingState)).hexdigest() @property def reqDigest(self): @@ -44,8 +49,13 @@ def reqDigest(self): def __getstate__(self): return self.__dict__ - def getSigningState(self): - return self.__dict__ + @property + def signingState(self): + return { + f.IDENTIFIER.nm: self.identifier, + f.REQ_ID.nm: self.reqId, + OPERATION: self.operation + } def __setstate__(self, state): self.__dict__.update(state) diff --git a/plenum/common/script_helper.py b/plenum/common/script_helper.py index f4261ace34..242c8c3810 100644 --- a/plenum/common/script_helper.py +++ b/plenum/common/script_helper.py @@ -2,17 +2,21 @@ from jsonpickle import json from ledger.stores.text_file_store import TextFileStore +from stp_core.loop.eventually import eventually +from stp_core.network.port_dispenser import genHa +from stp_core.types import HA +from stp_raet.util import getLocalVerKey, getLocalPubKey + from plenum.client.client import Client from plenum.client.wallet import Wallet -from plenum.common.constants import TXN_TYPE, TARGET_NYM, DATA, NODE_IP, \ - NODE_PORT, CLIENT_IP, CLIENT_PORT, ALIAS, NODE, CLIENT_STACK_SUFFIX -from plenum.common.eventually import eventually -from plenum.common.port_dispenser import genHa -from plenum.common.raet import initLocalKeep, getLocalVerKey, getLocalPubKey +from plenum.common import util +from plenum.common.transactions import PlenumTransactions from plenum.common.roles import Roles from plenum.common.signer_simple import SimpleSigner -from plenum.common.transactions import PlenumTransactions -from plenum.common.types import HA +from plenum.common.constants import TXN_TYPE, TARGET_NYM, DATA, NODE_IP, \ + NODE_PORT, CLIENT_IP, CLIENT_PORT, ALIAS, NODE, CLIENT_STACK_SUFFIX +from plenum.test import waits +from plenum.test.test_node import getAllReplicas NodeInfoFile = "node-info" GenTxnFile = "genesis_txn" @@ -49,12 +53,14 @@ def storeToFile(baseDir, dbName, value, key, storeHash=True, isLineNoKey=False): ledger.put(value) else: ledger.put(value, key) + ledger.close() def getNodeInfo(baseDir, nodeName): ledger = getLedger(baseDir, NodeInfoFile, storeHash=False, isLineNoKey=False) rec = ledger.get(nodeName) + ledger.close() return json.loads(rec) @@ -89,6 +95,7 @@ def storeNodeInfo(baseDir, nodeName, steward, nodeip, nodeport, clientip, for key, value in newRec: storeToFile(baseDir, NodeInfoFile, value, key, storeHash=False, isLineNoKey=False) + ledger.close() def storeExportedTxns(baseDir, txn): @@ -101,13 +108,6 @@ def storeGenTxns(baseDir, txn): isLineNoKey=True) -def initKeep(baseDir, name, sigseed, override=False): - pubkey, verkey = initLocalKeep(name, baseDir, sigseed, override) - print("Public key is", pubkey) - print("Verification key is", verkey) - return pubkey, verkey - - def getStewardKeyFromName(baseDir, name): return getLocalVerKey(name, baseDir) @@ -255,8 +255,9 @@ def changeHA(looper, config, nodeName, nodeSeed, newNodeHA, client = Client(stewardName, ha=('0.0.0.0', randomClientPort), config=config) looper.add(client) + timeout = waits.expectedClientConnectionTimeout(3) looper.run(eventually(__checkClientConnected, client, - retryWait=1, timeout=5)) + retryWait=1, timeout=timeout)) nodeVerKey = SimpleSigner(seed=nodeSeed).verkey diff --git a/plenum/common/signer.py b/plenum/common/signer.py deleted file mode 100644 index b73b0536e3..0000000000 --- a/plenum/common/signer.py +++ /dev/null @@ -1,23 +0,0 @@ -from abc import abstractproperty, abstractmethod -from typing import Dict - -from plenum.common.types import Identifier - - -class Signer: - """ - Interface that defines a sign method. - """ - @abstractproperty - def identifier(self) -> Identifier: - raise NotImplementedError - - @abstractmethod - def sign(self, msg: Dict) -> Dict: - raise NotImplementedError - - @abstractproperty - def alias(self) -> str: - raise NotImplementedError - - diff --git a/plenum/common/signer_did.py b/plenum/common/signer_did.py index c7f2f35718..7c27925375 100644 --- a/plenum/common/signer_did.py +++ b/plenum/common/signer_did.py @@ -1,17 +1,14 @@ -from abc import abstractproperty - import base58 from binascii import hexlify from typing import Dict from libnacl import randombytes -from raet.nacling import SigningKey -from raet.nacling import Signer as NaclSigner +from stp_core.crypto.nacl_wrappers import SigningKey, Signer as NaclSigner -from plenum.common.signer import Signer +from stp_core.crypto.signer import Signer from plenum.common.signing import serializeMsg -from plenum.common.types import Identifier -from plenum.common.util import hexToFriendly, rawToFriendly, friendlyToRaw +from stp_core.types import Identifier +from plenum.common.util import rawToFriendly, friendlyToRaw class DidIdentity: diff --git a/plenum/common/signer_simple.py b/plenum/common/signer_simple.py index 54b1d96199..6c3a15fe90 100644 --- a/plenum/common/signer_simple.py +++ b/plenum/common/signer_simple.py @@ -3,10 +3,9 @@ from typing import Dict from libnacl import randombytes -from raet.nacling import SigningKey -from raet.nacling import Signer as NaclSigner +from stp_core.crypto.nacl_wrappers import SigningKey, Signer as NaclSigner -from plenum.common.signer import Signer +from stp_core.crypto.signer import Signer from plenum.common.signing import serializeMsg from plenum.common.util import hexToFriendly diff --git a/plenum/common/signing.py b/plenum/common/signing.py index d166b797c5..bd10cf14b1 100644 --- a/plenum/common/signing.py +++ b/plenum/common/signing.py @@ -23,7 +23,7 @@ from collections import Iterable from typing import Mapping -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.common.types import f from plenum.common.error import error @@ -84,4 +84,4 @@ def serializeMsg(msg: Mapping): """ ser = serialize(msg) logger.trace("serialized msg {} into {}".format(msg, ser)) - return ser.encode('utf-8') \ No newline at end of file + return ser.encode('utf-8') diff --git a/plenum/common/stack_manager.py b/plenum/common/stack_manager.py index 1e41524fd7..a54de57d2b 100644 --- a/plenum/common/stack_manager.py +++ b/plenum/common/stack_manager.py @@ -3,17 +3,17 @@ from abc import abstractproperty from collections import OrderedDict +from plenum.common.keygen_utils import initRemoteKeys +from stp_core.types import HA +from stp_core.network.exceptions import RemoteNotFound + from ledger.compact_merkle_tree import CompactMerkleTree from ledger.ledger import Ledger from ledger.stores.file_hash_store import FileHashStore -from plenum.common.exceptions import RemoteNotFound -from plenum.common.raet import initRemoteKeep from plenum.common.constants import DATA, ALIAS, TARGET_NYM, NODE_IP, CLIENT_IP, \ CLIENT_PORT, NODE_PORT, VERKEY, TXN_TYPE, NODE, SERVICES, VALIDATOR, CLIENT_STACK_SUFFIX -from plenum.common.types import HA from plenum.common.util import cryptonymToHex, updateNestedDict -from plenum.common.log import getlogger - +from stp_core.common.log import getlogger logger = getlogger() @@ -22,6 +22,7 @@ def __init__(self, name, basedirpath, isNode=True): self.name = name self.basedirpath = basedirpath self.isNode = isNode + self.hashStore = None @abstractproperty def hasLedger(self) -> bool: @@ -49,8 +50,8 @@ def ledger(self): shutil.copy(defaultTxnFile, self.ledgerLocation) dataDir = self.ledgerLocation - self._ledger = Ledger(CompactMerkleTree(hashStore=FileHashStore( - dataDir=dataDir)), + self.hashStore = FileHashStore(dataDir=dataDir) + self._ledger = Ledger(CompactMerkleTree(hashStore=self.hashStore), dataDir=dataDir, fileName=self.ledgerFile, ensureDurability=self.config.EnsureLedgerDurability) @@ -116,8 +117,8 @@ def connectNewRemote(self, txn, remoteName, nodeOrClientObj, addRemote=True): # Override any keys found, reason being the scenario where # before this node comes to know about the other node, the other # node tries to connect to it. - initRemoteKeep(self.name, remoteName, self.basedirpath, verkey, - override=True) + initRemoteKeys(self.name, remoteName, self.basedirpath, + verkey, override=True) except Exception as ex: logger.error("Exception while initializing keep for remote {}". format(ex)) @@ -133,6 +134,7 @@ def connectNewRemote(self, txn, remoteName, nodeOrClientObj, addRemote=True): logger.debug("{} adding new node {} with HA {}".format(self.name, remoteName, cliHa)) + nodeOrClientObj.nodestack.maintainConnections(force=True) def stackHaChanged(self, txn, remoteName, nodeOrClientObj): nodeHa = (txn[DATA][NODE_IP], txn[DATA][NODE_PORT]) @@ -143,12 +145,14 @@ def stackHaChanged(self, txn, remoteName, nodeOrClientObj): nodeOrClientObj.cliNodeReg[remoteName + CLIENT_STACK_SUFFIX] = HA(*cliHa) else: nodeOrClientObj.nodeReg[remoteName] = HA(*cliHa) + + nodeOrClientObj.nodestack.maintainConnections(force=True) + return rid def stackKeysChanged(self, txn, remoteName, nodeOrClientObj): logger.debug("{} clearing remote role data in keep of {}". format(nodeOrClientObj.nodestack.name, remoteName)) - nodeOrClientObj.nodestack.keep.clearRemoteRoleData(remoteName) logger.debug( "{} removing remote {}".format(nodeOrClientObj, remoteName)) # Removing remote so that the nodestack will attempt to connect @@ -157,16 +161,19 @@ def stackKeysChanged(self, txn, remoteName, nodeOrClientObj): verkey = txn[VERKEY] try: # Override any keys found - initRemoteKeep(self.name, remoteName, self.basedirpath, verkey, - override=True) + initRemoteKeys(self.name, remoteName, self.basedirpath, + verkey, override=True) except Exception as ex: logger.error("Exception while initializing keep for remote {}". format(ex)) + + nodeOrClientObj.nodestack.maintainConnections(force=True) return rid @staticmethod def removeRemote(stack, remoteName): try: + stack.disconnectByName(remoteName) rid = stack.removeRemoteByName(remoteName) logger.debug( "{} removed remote {}".format(stack, remoteName)) @@ -187,8 +194,8 @@ def addRemoteKeysFromLedger(self, keys): # before this node comes to know about the other node, the other # node tries to connect to it. # Do it only for Nodes, not for Clients! - if self.isNode: - initRemoteKeep(self.name, remoteName, self.basedirpath, key, + #if self.isNode: + initRemoteKeys(self.name, remoteName, self.basedirpath, key, override=True) except Exception as ex: logger.error("Exception while initializing keep for remote {}". diff --git a/plenum/common/stacked.py b/plenum/common/stacked.py deleted file mode 100644 index 398e5e3f24..0000000000 --- a/plenum/common/stacked.py +++ /dev/null @@ -1,898 +0,0 @@ -import sys -import time -from collections import Callable -from collections import deque -from typing import Any, Set, Optional, List, Iterable -from typing import Dict -from typing import Tuple - -from raet.raeting import AutoMode, TrnsKind -from raet.road.estating import RemoteEstate -from raet.road.keeping import RoadKeep -from raet.road.stacking import RoadStack -from raet.road.transacting import Joiner, Allower, Messenger - -from plenum.common.crypto import getEd25519AndCurve25519Keys, \ - ed25519SkToCurve25519 -from plenum.common.exceptions import RemoteNotFound -from plenum.common.log import getlogger -from plenum.common.ratchet import Ratchet -from plenum.common.signer import Signer -from plenum.common.types import Batch, TaggedTupleBase, HA -from plenum.common.request import Request -from plenum.common.util import distributedConnectionMap, \ - MessageProcessor, checkPortAvailable -from plenum.common.config_util import getConfig -from plenum.common.error import error - -logger = getlogger() - -# this overrides the defaults -Joiner.RedoTimeoutMin = 1.0 -Joiner.RedoTimeoutMax = 10.0 - -Allower.RedoTimeoutMin = 1.0 -Allower.RedoTimeoutMax = 10.0 - -Messenger.RedoTimeoutMin = 1.0 -Messenger.RedoTimeoutMax = 10.0 - - -class Stack(RoadStack): - def __init__(self, *args, **kwargs): - checkPortAvailable(kwargs['ha']) - basedirpath = kwargs.get('basedirpath') - keep = RoadKeep(basedirpath=basedirpath, - stackname=kwargs['name'], - auto=kwargs.get('auto'), - baseroledirpath=basedirpath) # type: RoadKeep - kwargs['keep'] = keep - localRoleData = keep.loadLocalRoleData() - - sighex = kwargs.pop('sighex', None) or localRoleData['sighex'] - if not sighex: - (sighex, _), (prihex, _) = getEd25519AndCurve25519Keys() - else: - prihex = ed25519SkToCurve25519(sighex, toHex=True) - kwargs['sigkey'] = sighex - kwargs['prikey'] = prihex - self.msgHandler = kwargs.pop('msgHandler', None) # type: Callable - super().__init__(*args, **kwargs) - if self.ha[1] != kwargs['ha'].port: - error("the stack port number has changed, likely due to " - "information in the keep. {} passed {}, actual {}". - format(kwargs['name'], kwargs['ha'].port, self.ha[1])) - self.created = time.perf_counter() - self.coro = None - config = getConfig() - try: - self.messageTimeout = config.RAETMessageTimeout - except AttributeError: - # if no timeout is set then message will never timeout - self.messageTimeout = 0 - - def __repr__(self): - return self.name - - def start(self): - if not self.opened: - self.open() - logger.info("stack {} starting at {} in {} mode" - .format(self, self.ha, self.keep.auto.name), - extra={"cli": False}) - self.coro = self._raetcoro() - - def stop(self): - if self.opened: - self.close() - self.coro = None - logger.info("stack {} stopped".format(self.name), extra={"cli": False}) - - async def service(self, limit=None) -> int: - """ - Service `limit` number of received messages in this stack. - - :param limit: the maximum number of messages to be processed. If None, - processes all of the messages in rxMsgs. - :return: the number of messages processed. - """ - pracLimit = limit if limit else sys.maxsize - if self.coro: - x = next(self.coro) - if x > 0: - for x in range(pracLimit): - try: - self.msgHandler(self.rxMsgs.popleft()) - except IndexError: - break - return x - else: - logger.debug("{} is stopped".format(self)) - return 0 - - @property - def age(self): - """ - Returns the time elapsed since this stack was created - """ - return time.perf_counter() - self.created - - def _raetcoro(self): - """ - Generator to service all messages. - Yields the length of rxMsgs queue of this stack. - """ - while True: - try: - self._serviceStack(self.age) - l = len(self.rxMsgs) - except Exception as ex: - if isinstance(ex, OSError) and \ - len(ex.args) > 0 and \ - ex.args[0] == 22: - logger.error("Error servicing stack {}: {}. This could be " - "due to binding to an internal network " - "and trying to route to an external one.". - format(self.name, ex), extra={'cli': 'WARNING'}) - else: - logger.error("Error servicing stack {}: {} {}". - format(self.name, ex, ex.args), - extra={'cli': 'WARNING'}) - l = 0 - yield l - - def _serviceStack(self, age): - """ - Update stacks clock and service all tx and rx messages. - - :param age: update timestamp of this RoadStack to this value - """ - self.updateStamp(age) - self.serviceAll() - - def updateStamp(self, age=None): - """ - Change the timestamp of this stack's raet store. - - :param age: the timestamp will be set to this value - """ - self.store.changeStamp(age if age else self.age) - - @property - def opened(self): - return self.server.opened - - def open(self): - """ - Open the UDP socket of this stack's server. - """ - self.server.open() # close the UDP socket - - def close(self): - """ - Close the UDP socket of this stack's server. - """ - self.server.close() # close the UDP socket - - # TODO: Does this serve the same purpose as `conns`, if yes then remove - @property - def connecteds(self) -> Set[str]: - """ - Return the names of the nodes this node is connected to. - """ - return {r.name for r in self.remotes.values() - if self.isRemoteConnected(r)} - - @staticmethod - def isRemoteConnected(r: RemoteEstate) -> bool: - """ - A node is considered to be connected if it is joined, allowed and alived. - - :param r: the remote to check - """ - return r.joined and r.allowed and r.alived - - def isConnectedTo(self, name: str=None, ha: Tuple=None): - assert (name, ha).count(None) == 1, "One and only one of name or ha " \ - "should be passed. Passed " \ - "name: {}, ha: {}".format(name, ha) - try: - remote = self.getRemote(name, ha) - except RemoteNotFound: - return False - return self.isRemoteConnected(remote) - - def getRemote(self, name: str=None, ha: Tuple=None) -> RemoteEstate: - """ - Find the remote by name or ha. - - :param name: the name of the remote to find - :param ha: host address pair the remote to find - :raises: RemoteNotFound - """ - assert (name, ha).count(None) == 1, "One and only one of name or ha " \ - "should be passed. Passed " \ - "name: {}, ha: {}".format(name, ha) - remote = self.findInRemotesByName(name) if name else \ - self.findInRemotesByHA(ha) - if not remote: - raise RemoteNotFound(name or ha) - return remote - - def findInRemotesByHA(self, remoteHa): - remotes = [r for r in self.remotes.values() - if r.ha == remoteHa] - assert len(remotes) <= 1, "Found remotes {}: {}".\ - format(len(remotes), [(r.name, r.ha) for r in remotes]) - if remotes: - return remotes[0] - return None - - def findInRemotesByName(self, name: str) -> RemoteEstate: - """ - Find the remote by name. - - :param name: the name of the remote to find - :raises: RemoteNotFound - """ - try: - return next(r for r in self.remotes.values() - if r.name == name) - except StopIteration: - return None - - def removeRemoteByName(self, name: str) -> int: - """ - Remove the remote by name. - - :param name: the name of the remote to remove - :raises: RemoteNotFound - """ - remote = self.getRemote(name) - rid = remote.uid - self.removeRemote(remote) - return rid - - def send(self, msg: Any, remoteName: str): - """ - Transmit the specified message to the remote specified by `remoteName`. - - :param msg: a message - :param remoteName: the name of the remote - """ - rid = self.getRemote(remoteName).uid - # Setting timeout to never expire - self.transmit(msg, rid, timeout=self.messageTimeout) - - -class SimpleStack(Stack): - localips = ['127.0.0.1', '0.0.0.0'] - - def __init__(self, stackParams: Dict, msgHandler: Callable, sighex: str=None): - self.stackParams = stackParams - self.msgHandler = msgHandler - self._conns = set() # type: Set[str] - super().__init__(**stackParams, msgHandler=self.msgHandler, sighex=sighex) - - @property - def isKeySharing(self): - return self.keep.auto != AutoMode.never - - @property - def conns(self) -> Set[str]: - """ - Get the connections of this node. - - :return: set of names of the connected nodes - """ - return self._conns - - @conns.setter - def conns(self, value: Set[str]) -> None: - """ - Updates the connection count of this node if not already done. - """ - if not self._conns == value: - old = self._conns - self._conns = value - ins = value - old - outs = old - value - logger.debug("{}'s connections changed from {} to {}".format(self, - old, - value)) - self._connsChanged(ins, outs) - - def checkConns(self): - """ - Evaluate the connected nodes - """ - self.conns = self.connecteds - - def _connsChanged(self, ins: Set[str], outs: Set[str]) -> None: - """ - A series of operations to perform once a connection count has changed. - - - Set f to max number of failures this system can handle. - - Set status to one of started, started_hungry or starting depending on - the number of protocol instances. - - Check protocol instances. See `checkProtocolInstaces()` - - :param ins: new nodes connected - :param outs: nodes no longer connected - """ - for o in outs: - logger.info("{} disconnected from {}".format(self, o), - extra={"cli": "IMPORTANT", - "tags": ["connected"]}) - for i in ins: - logger.info("{} now connected to {}".format(self, i), - extra={"cli": "IMPORTANT", - "tags": ["connected"]}) - - # remove remotes for same ha when a connection is made - remote = self.getRemote(i) - others = [r for r in self.remotes.values() - if r.ha == remote.ha and r.name != i] - for o in others: - logger.debug("{} removing other remote".format(self)) - self.removeRemote(o) - - self.onConnsChanged(ins, outs) - - def onConnsChanged(self, ins: Set[str], outs: Set[str]): - """ - Subclasses can override - """ - pass - - def start(self): - super().start() - # super().__init__(**self.stackParams, msgHandler=self.msgHandler) - - def sign(self, msg: Dict, signer: Signer) -> Dict: - """ - No signing is implemented. Returns the msg as it is. - An overriding class can define the signing implementation - - :param msg: the message to sign - """ - return msg # don't sign by default - - def prepForSending(self, msg: Dict, signer: Signer = None) -> Dict: - """ - Return a dictionary form of the message - - :param msg: the message to be sent - :raises: ValueError if msg cannot be converted to an appropriate format - for transmission - """ - if isinstance(msg, TaggedTupleBase): - tmsg = msg.melted() - elif isinstance(msg, Request): - tmsg = msg.__getstate__() - elif hasattr(msg, "_asdict"): - tmsg = dict(msg._asdict()) - elif hasattr(msg, "__dict__"): - tmsg = dict(msg.__dict__) - else: - raise ValueError("Message cannot be converted to an appropriate " - "format for transmission") - if signer: - return self.sign(tmsg, signer) - return tmsg - - def sameAddr(self, ha, ha2) -> bool: - """ - Check whether the two arguments correspond to the same address - """ - if ha == ha2: - return True - elif ha[1] != ha2[1]: - return False - else: - return ha[0] in self.localips and ha2[0] in self.localips - - -class KITStack(SimpleStack): - # Keep In Touch Stack. Stack which maintains connections mentioned in - # its registry - def __init__(self, stackParams: dict, msgHandler: Callable, - registry: Dict[str, HA], sighex: str=None): - self.registry = registry - - super().__init__(stackParams, msgHandler, sighex) - # self.bootstrapped = False - - self.lastcheck = {} # type: Dict[int, Tuple[int, float]] - self.ratchet = Ratchet(a=8, b=0.198, c=-4, base=8, peak=3600) - - # holds the last time we checked remotes - self.nextCheck = 0 - - # courteous bi-directional joins - self.connectNicelyUntil = None - - self.reconnectToMissingIn = 6 - self.reconnectToDisconnectedIn = 6 - - def start(self): - super().start() - if self.name in self.registry: - # remove this node's registration from the Registry - # (no need to connect to itself) - del self.registry[self.name] - - async def serviceLifecycle(self) -> None: - """ - Async function that does the following activities if the node is going: - (See `Status.going`) - - - check connections (See `checkConns`) - - maintain connections (See `maintainConnections`) - """ - self.checkConns() - self.maintainConnections() - - def addRemote(self, remote, dump=False): - if not self.findInNodeRegByHA(remote.ha): - logger.debug('Remote {} with HA {} not added -> not found in registry'.format(remote.name, remote.ha)) - return - return super(KITStack, self).addRemote(remote, dump) - - def createRemote(self, ha): - if ha and not self.findInNodeRegByHA(ha): - logger.debug('Remote with HA {} not added -> not found in registry'.format(ha)) - return - return super(KITStack, self).createRemote(ha) - - def processRx(self, packet): - # Override to add check that in case of join new remote is in registry. This is done to avoid creation - # of unnecessary JSON files for remotes - tk = packet.data['tk'] - - if tk in [TrnsKind.join]: # join transaction - sha = (packet.data['sh'], packet.data['sp']) - if not self.findInNodeRegByHA(sha): - return self.handleJoinFromUnregisteredRemote(sha) - - return super(KITStack, self).processRx(packet) - - def handleJoinFromUnregisteredRemote(self, sha): - logger.debug('Remote with HA {} not added -> not found in registry'.format(sha)) - return None - - def connect(self, name, rid: Optional[int]=None) -> Optional[int]: - """ - Connect to the node specified by name. - - :param name: name of the node to connect to - :type name: str or (HA, tuple) - :return: the uid of the remote estate, or None if a connect is not - attempted - """ - # if not self.isKeySharing: - # logger.debug("{} skipping join with {} because not key sharing". - # format(self, name)) - # return None - if rid: - remote = self.remotes[rid] - else: - if isinstance(name, (HA, tuple)): - node_ha = name - elif isinstance(name, str): - node_ha = self.registry[name] - else: - raise AttributeError() - - remote = RemoteEstate(stack=self, - ha=node_ha) - self.addRemote(remote) - # updates the store time so the join timer is accurate - self.updateStamp() - self.join(uid=remote.uid, cascade=True, timeout=30) - logger.info("{} looking for {} at {}:{}". - format(self, name or remote.name, *remote.ha), - extra={"cli": "PLAIN", "tags": ["node-looking"]}) - return remote.uid - - @property - def notConnectedNodes(self) -> Set[str]: - """ - Returns the names of nodes in the registry this node is NOT connected - to. - """ - return set(self.registry.keys()) - self.conns - - def maintainConnections(self, force=False): - """ - Ensure appropriate connections. - - """ - cur = time.perf_counter() - if cur > self.nextCheck or force: - - self.nextCheck = cur + (6 if self.isKeySharing else 15) - # check again in 15 seconds, - # unless sooner because of retries below - - conns, disconns = self.remotesByConnected() - - for disconn in disconns: - self.handleDisconnectedRemote(cur, disconn) - - # remove items that have been connected - for connected in conns: - self.lastcheck.pop(connected.uid, None) - - self.connectToMissing(cur) - - logger.debug("{} next check for retries in {:.2f} seconds". - format(self, self.nextCheck - cur)) - return True - return False - - def connectToMissing(self, currentTime): - """ - Try to connect to the missing node within the time specified by - `reconnectToMissingIn` - - :param currentTime: the current time - """ - missing = self.reconcileNodeReg() - if missing: - logger.debug("{} found the following missing connections: {}". - format(self, ", ".join(missing))) - if self.connectNicelyUntil is None: - self.connectNicelyUntil = \ - currentTime + self.reconnectToMissingIn - if currentTime <= self.connectNicelyUntil: - names = list(self.registry.keys()) - names.append(self.name) - nices = set(distributedConnectionMap(names)[self.name]) - for name in nices: - logger.debug("{} being nice and waiting for {} to join". - format(self, name)) - missing = missing.difference(nices) - - for name in missing: - self.connect(name) - - def handleDisconnectedRemote(self, cur, disconn): - """ - - :param disconn: disconnected remote - """ - - # if disconn.main: - # logger.trace("{} remote {} is main, so skipping". - # format(self, disconn.uid)) - # return - - logger.trace("{} handling disconnected remote {}".format(self, disconn)) - - if disconn.joinInProcess(): - logger.trace("{} join already in process, so " - "waiting to check for reconnects". - format(self)) - self.nextCheck = min(self.nextCheck, - cur + self.reconnectToDisconnectedIn) - return - - if disconn.allowInProcess(): - logger.trace("{} allow already in process, so " - "waiting to check for reconnects". - format(self)) - self.nextCheck = min(self.nextCheck, - cur + self.reconnectToDisconnectedIn) - return - - if disconn.name not in self.registry: - # TODO this is almost identical to line 615; make sure we refactor - regName = self.findInNodeRegByHA(disconn.ha) - if regName: - logger.debug("{} forgiving name mismatch for {} with same " - "ha {} using another name {}". - format(self, regName, disconn.ha, disconn.name)) - else: - logger.debug("{} skipping reconnect on {} because " - "it's not found in the registry". - format(self, disconn.name)) - return - count, last = self.lastcheck.get(disconn.uid, (0, 0)) - dname = self.getRemoteName(disconn) - # TODO come back to ratcheting retries - # secsSinceLastCheck = cur - last - # secsToWait = self.ratchet.get(count) - # secsToWaitNext = self.ratchet.get(count + 1) - # if secsSinceLastCheck > secsToWait: - # extra = "" if not last else "; needed to wait at least {} and " \ - # "waited {} (next try will be {} " \ - # "seconds)".format(round(secsToWait, 2), - # round(secsSinceLastCheck, 2), - # round(secsToWaitNext, 2))) - - logger.debug("{} retrying to connect with {}". - format(self, dname)) - self.lastcheck[disconn.uid] = count + 1, cur - # self.nextCheck = min(self.nextCheck, - # cur + secsToWaitNext) - if disconn.joinInProcess(): - logger.debug("waiting, because join is already in " - "progress") - elif disconn.joined: - self.updateStamp() - self.allow(uid=disconn.uid, cascade=True, timeout=20) - logger.debug("{} disconnected node {} is joined".format( - self, disconn.name), extra={"cli": "STATUS"}) - else: - self.connect(dname, disconn.uid) - - def findInNodeRegByHA(self, remoteHa): - """ - Returns the name of the remote by HA if found in the node registry, else - returns None - """ - regName = [nm for nm, ha in self.registry.items() - if self.sameAddr(ha, remoteHa)] - if len(regName) > 1: - raise RuntimeError("more than one node registry entry with the " - "same ha {}: {}".format(remoteHa, regName)) - if regName: - return regName[0] - return None - - def reconcileNodeReg(self): - """ - Handle remotes missing from the node registry and clean up old remotes - no longer in this node's registry. - - 1. nice bootstrap - 2. force bootstrap - 3. retry connections - - 1. not in remotes - 2. in remotes, not joined, not allowed, not join in process - 3. in remotes, not joined, not allowed, join in process - 4. in remotes, joined, not allowed, not allow in process - 5. in remotes, joined, not allowed, allow in process - 6. in remotes, joined, allowed, - - :return: the missing remotes - """ - matches = set() # good matches found in nodestack remotes - legacy = set() # old remotes that are no longer in registry - conflicts = set() # matches found, but the ha conflicts - logger.debug("{} nodereg is {}". - format(self, self.registry.items())) - logger.debug("{} remotes are {}". - format(self, [r.name for r in self.remotes.values()])) - - for r in self.remotes.values(): - if r.name in self.registry: - if self.sameAddr(r.ha, self.registry[r.name]): - matches.add(r.name) - logger.debug("{} matched remote is {} {}". - format(self, r.uid, r.ha)) - else: - conflicts.add((r.name, r.ha)) - # error("{} ha for {} doesn't match. ha of remote is {} but " - # "should be {}". - # format(self, r.name, r.ha, self.registry[r.name])) - logger.error("{} ha for {} doesn't match. ha of remote is {} but " - "should be {}". - format(self, r.name, r.ha, self.registry[r.name])) - else: - regName = self.findInNodeRegByHA(r.ha) - - # This change fixes test - # `testNodeConnectionAfterKeysharingRestarted` in - # `test_node_connection` - # regName = [nm for nm, ha in self.nodeReg.items() if ha == - # r.ha and (r.joined or r.joinInProcess())] - logger.debug("{} unmatched remote is {} {}". - format(self, r.uid, r.ha)) - if regName: - logger.debug("{} forgiving name mismatch for {} with same " - "ha {} using another name {}". - format(self, regName, r.ha, r.name)) - matches.add(regName) - else: - logger.debug("{} found a legacy remote {} " - "without a matching ha {}". - format(self, r.name, r.ha)) - logger.info(str(self.registry)) - legacy.add(r) - - # missing from remotes... need to connect - missing = set(self.registry.keys()) - matches - - if len(missing) + len(matches) + len(conflicts) != len(self.registry): - logger.error("Error reconciling nodeReg with remotes") - logger.error("missing: {}".format(missing)) - logger.error("matches: {}".format(matches)) - logger.error("conflicts: {}".format(conflicts)) - logger.error("nodeReg: {}".format(self.registry.keys())) - logger.error("Error reconciling nodeReg with remotes; see logs") - - if conflicts: - logger.error("found conflicting address information {} in registry" - .format(conflicts)) - if legacy: - for l in legacy: - logger.error("{} found legacy entry [{}, {}] in remotes, " - "that were not in registry". - format(self, l.name, l.ha)) - self.removeRemote(l) - return missing - - def remotesByConnected(self): - """ - Partitions the remotes into connected and disconnected - - :return: tuple(connected remotes, disconnected remotes) - """ - conns, disconns = [], [] - for r in self.remotes.values(): - array = conns if Stack.isRemoteConnected(r) else disconns - array.append(r) - return conns, disconns - - def getRemoteName(self, remote): - """ - Returns the name of the remote object if found in node registry. - - :param remote: the remote object - """ - if remote.name not in self.registry: - find = [name for name, ha in self.registry.items() - if ha == remote.ha] - assert len(find) == 1 - return find[0] - return remote.name - - -class Batched(MessageProcessor): - """ - A mixin to allow batching of requests to be send to remotes. - """ - - def __init__(self): - """ - :param self: 'NodeStacked' - """ - self.outBoxes = {} # type: Dict[int, deque] - - def _enqueue(self, msg: Any, rid: int, signer: Signer) -> None: - """ - Enqueue the message into the remote's queue. - - :param msg: the message to enqueue - :param rid: the id of the remote node - """ - payload = self.prepForSending(msg, signer) - if rid not in self.outBoxes: - self.outBoxes[rid] = deque() - self.outBoxes[rid].append(payload) - - def _enqueueIntoAllRemotes(self, msg: Any, signer: Signer) -> None: - """ - Enqueue the specified message into all the remotes in the nodestack. - - :param msg: the message to enqueue - """ - for rid in self.remotes.keys(): - self._enqueue(msg, rid, signer) - - def send(self, msg: Any, *rids: Iterable[int], signer: Signer=None) -> None: - """ - Enqueue the given message into the outBoxes of the specified remotes - or into the outBoxes of all the remotes if rids is None - - :param msg: the message to enqueue - :param rids: ids of the remotes to whose outBoxes - this message must be enqueued - """ - if rids: - for r in rids: - self._enqueue(msg, r, signer) - else: - self._enqueueIntoAllRemotes(msg, signer) - - def flushOutBoxes(self) -> None: - """ - Clear the outBoxes and transmit batched messages to remotes. - """ - removedRemotes = [] - for rid, msgs in self.outBoxes.items(): - try: - dest = self.remotes[rid].name - except KeyError: - removedRemotes.append(rid) - continue - if msgs: - if len(msgs) == 1: - msg = msgs.popleft() - # Setting timeout to never expire - self.transmit(msg, rid, timeout=self.messageTimeout) - logger.trace("{} sending msg {} to {}".format(self, msg, dest)) - else: - logger.debug("{} batching {} msgs to {} into one transmission". - format(self, len(msgs), dest)) - logger.trace(" messages: {}".format(msgs)) - batch = Batch([], None) - while msgs: - batch.messages.append(msgs.popleft()) - # don't need to sign the batch, when the composed msgs are - # signed - payload = self.prepForSending(batch) - logger.trace("{} sending payload to {}: {}".format(self, - dest, - payload)) - # Setting timeout to never expire - self.transmit(payload, rid, timeout=self.messageTimeout) - for rid in removedRemotes: - logger.warning("{} rid {} has been removed".format(self, rid), - extra={"cli": False}) - msgs = self.outBoxes[rid] - if msgs: - self.discard(msgs, "rid {} no longer available".format(rid), - logMethod=logger.debug) - del self.outBoxes[rid] - - -class ClientStack(SimpleStack): - def __init__(self, stackParams: dict, msgHandler: Callable): - # The client stack needs to be mutable unless we explicitly decide - # not to - stackParams["mutable"] = stackParams.get("mutable", True) - SimpleStack.__init__(self, stackParams, msgHandler) - self.connectedClients = set() - - def serviceClientStack(self): - newClients = self.connecteds - self.connectedClients - self.connectedClients = self.connecteds - return newClients - - def newClientsConnected(self, newClients): - raise NotImplementedError("{} must implement this method".format(self)) - - def transmitToClient(self, msg: Any, remoteName: str): - """ - Transmit the specified message to the remote client specified by `remoteName`. - - :param msg: a message - :param remoteName: the name of the remote - """ - # At this time, nodes are not signing messages to clients, beyond what - # happens inherently with RAET - payload = self.prepForSending(msg) - try: - self.send(payload, remoteName) - except Exception as ex: - # TODO: This should not be an error since the client might not have - # sent the request to all nodes but only some nodes and other - # nodes might have got this request through PROPAGATE and thus - # might not have connection with the client. - logger.error("{} unable to send message {} to client {}; Exception: {}" - .format(self, msg, remoteName, ex.__repr__())) - - def transmitToClients(self, msg: Any, remoteNames: List[str]): - for nm in remoteNames: - self.transmitToClient(msg, nm) - - -class NodeStack(Batched, KITStack): - def __init__(self, stackParams: dict, msgHandler: Callable, - registry: Dict[str, HA], sighex: str=None): - Batched.__init__(self) - # TODO: Just to get around the restriction of port numbers changed on - # Azure. Remove this soon to relax port numbers only but not IP. - stackParams["mutable"] = stackParams.get("mutable", True) - KITStack.__init__(self, stackParams, msgHandler, registry, sighex) - - def start(self): - KITStack.start(self) - logger.info("{} listening for other nodes at {}:{}". - format(self, *self.ha), - extra={"tags": ["node-listening"]}) - diff --git a/plenum/common/stacks.py b/plenum/common/stacks.py new file mode 100644 index 0000000000..b6d0ac7e07 --- /dev/null +++ b/plenum/common/stacks.py @@ -0,0 +1,138 @@ +from typing import Callable, Any, List, Dict + +from plenum import config +from plenum.common.batched import Batched, logger +from plenum.common.message_processor import MessageProcessor +from stp_raet.rstack import SimpleRStack, KITRStack +from stp_core.types import HA +from stp_zmq.zstack import SimpleZStack, KITZStack + + +class ClientZStack(SimpleZStack, MessageProcessor): + def __init__(self, stackParams: dict, msgHandler: Callable, seed=None): + SimpleZStack.__init__(self, stackParams, msgHandler, seed=seed, + onlyListener=True, + listenerQuota=config.LISTENER_MESSAGE_QUOTA, + remoteQuota=config.REMOTES_MESSAGE_QUOTA) + MessageProcessor.__init__(self, allowDictOnly=False) + self.connectedClients = set() + + def serviceClientStack(self): + newClients = self.connecteds - self.connectedClients + self.connectedClients = self.connecteds + return newClients + + def newClientsConnected(self, newClients): + raise NotImplementedError("{} must implement this method".format(self)) + + def transmitToClient(self, msg: Any, remoteName: str): + """ + Transmit the specified message to the remote client specified by `remoteName`. + + :param msg: a message + :param remoteName: the name of the remote + """ + # At this time, nodes are not signing messages to clients, beyond what + # happens inherently with RAET + payload = self.prepForSending(msg) + try: + if isinstance(remoteName, str): + remoteName = remoteName.encode() + self.send(payload, remoteName) + except Exception as ex: + # TODO: This should not be an error since the client might not have + # sent the request to all nodes but only some nodes and other + # nodes might have got this request through PROPAGATE and thus + # might not have connection with the client. + logger.error("{} unable to send message {} to client {}; Exception: {}" + .format(self, msg, remoteName, ex.__repr__())) + + def transmitToClients(self, msg: Any, remoteNames: List[str]): + #TODO: Handle `remoteNames` + for nm in self.peersWithoutRemotes: + self.transmitToClient(msg, nm) + + +class NodeZStack(Batched, KITZStack): + def __init__(self, stackParams: dict, msgHandler: Callable, + registry: Dict[str, HA], seed=None, sighex: str=None): + Batched.__init__(self) + KITZStack.__init__(self, stackParams, msgHandler, registry=registry, + seed=seed, sighex=sighex, + listenerQuota=config.LISTENER_MESSAGE_QUOTA, + remoteQuota=config.REMOTES_MESSAGE_QUOTA) + MessageProcessor.__init__(self, allowDictOnly=False) + + # TODO: Reconsider defaulting `reSetupAuth` to True. + def start(self, restricted=None, reSetupAuth=True): + KITZStack.start(self, restricted=restricted, reSetupAuth=reSetupAuth) + logger.info("{} listening for other nodes at {}:{}". + format(self, *self.ha), + extra={"tags": ["node-listening"]}) + + +class ClientRStack(SimpleRStack, MessageProcessor): + def __init__(self, stackParams: dict, msgHandler: Callable, seed=None): + # The client stack needs to be mutable unless we explicitly decide + # not to + stackParams["mutable"] = stackParams.get("mutable", True) + stackParams["messageTimeout"] = config.RAETMessageTimeout + SimpleRStack.__init__(self, stackParams, msgHandler) + MessageProcessor.__init__(self, allowDictOnly=True) + self.connectedClients = set() + + def serviceClientStack(self): + newClients = self.connecteds - self.connectedClients + self.connectedClients = self.connecteds + return newClients + + def newClientsConnected(self, newClients): + raise NotImplementedError("{} must implement this method".format(self)) + + def transmitToClient(self, msg: Any, remoteName: str): + """ + Transmit the specified message to the remote client specified by `remoteName`. + + :param msg: a message + :param remoteName: the name of the remote + """ + # At this time, nodes are not signing messages to clients, beyond what + # happens inherently with RAET + payload = self.prepForSending(msg) + try: + self.send(payload, remoteName) + except Exception as ex: + # TODO: This should not be an error since the client might not have + # sent the request to all nodes but only some nodes and other + # nodes might have got this request through PROPAGATE and thus + # might not have connection with the client. + logger.error("{} unable to send message {} to client {}; Exception: {}" + .format(self, msg, remoteName, ex.__repr__())) + + def transmitToClients(self, msg: Any, remoteNames: List[str]): + for nm in remoteNames: + self.transmitToClient(msg, nm) + + +class NodeRStack(Batched, KITRStack): + def __init__(self, stackParams: dict, msgHandler: Callable, + registry: Dict[str, HA], seed=None, sighex: str=None): + Batched.__init__(self) + # TODO: Just to get around the restriction of port numbers changed on + # Azure. Remove this soon to relax port numbers only but not IP. + stackParams["mutable"] = stackParams.get("mutable", True) + stackParams["messageTimeout"] = config.RAETMessageTimeout + KITRStack.__init__(self, stackParams, msgHandler, registry, sighex) + MessageProcessor.__init__(self, allowDictOnly=True) + + def start(self): + KITRStack.start(self) + logger.info("{} listening for other nodes at {}:{}". + format(self, *self.ha), + extra={"tags": ["node-listening"]}) + + + + +nodeStackClass = NodeZStack if config.UseZStack else NodeRStack +clientStackClass = ClientZStack if config.UseZStack else ClientRStack diff --git a/plenum/common/test_network_setup.py b/plenum/common/test_network_setup.py index 6697da8847..dee8d86ad9 100644 --- a/plenum/common/test_network_setup.py +++ b/plenum/common/test_network_setup.py @@ -3,16 +3,15 @@ from hashlib import sha256 from ledger.serializers.compact_serializer import CompactSerializer -from raet.nacling import Signer +from stp_core.crypto.nacl_wrappers import Signer from ledger.compact_merkle_tree import CompactMerkleTree from ledger.ledger import Ledger -from plenum.common.raet import initLocalKeep +from plenum.common.keygen_utils import initLocalKeys from plenum.common.constants import TARGET_NYM, TXN_TYPE, DATA, ALIAS, \ - TXN_ID, NODE, CLIENT_IP, CLIENT_PORT, NODE_IP, NODE_PORT, NYM, \ - STEWARD, \ - ROLE, SERVICES, VALIDATOR, TRUSTEE + TXN_ID, NODE, CLIENT_IP, CLIENT_PORT, NODE_IP, NODE_PORT, CLIENT_STACK_SUFFIX, NYM, \ + STEWARD, ROLE, SERVICES, VALIDATOR, TRUSTEE from plenum.common.types import f from plenum.common.util import hexToFriendly @@ -37,17 +36,28 @@ def getSigningSeed(name: str) -> bytes: def getNymFromVerkey(verkey: bytes): return hexToFriendly(verkey) + @staticmethod + def writeNodeParamsFile(filePath, name, nPort, cPort): + contents = [ + 'NODE_NAME={}'.format(name), + 'NODE_PORT={}'.format(nPort), + 'NODE_CLIENT_PORT={}'.format(cPort) + ] + with open(filePath, 'w') as f: + f.writelines(os.linesep.join(contents)) + @staticmethod def bootstrapTestNodesCore(config, envName, appendToLedgers, domainTxnFieldOrder, ips, nodeCount, clientCount, - nodeNum, startingPort): - + nodeNum, startingPort, nodeParamsFileName): baseDir = config.baseDir if not os.path.exists(baseDir): os.makedirs(baseDir, exist_ok=True) - if not ips: + localNodes = not ips + + if localNodes: ips = ['127.0.0.1'] * nodeCount else: ips = ips.split(",") @@ -119,11 +129,22 @@ def bootstrapTestNodesCore(config, envName, appendToLedgers, ip = ips[num - 1] sigseed = TestNetworkSetup.getSigningSeed(nodeName) if nodeNum == num: - _, verkey = initLocalKeep(nodeName, baseDir, sigseed, True) + _, verkey = initLocalKeys(nodeName, baseDir, sigseed, True, + config=config) + _, verkey = initLocalKeys(nodeName+CLIENT_STACK_SUFFIX, baseDir, + sigseed, True, config=config) verkey = verkey.encode() print("This node with name {} will use ports {} and {} for " "nodestack and clientstack respectively" .format(nodeName, nodePort, clientPort)) + + if not localNodes: + paramsFilePath = os.path.join(baseDir, nodeParamsFileName) + print('Nodes will not run locally, so writing ' + '{}'.format(paramsFilePath)) + TestNetworkSetup.writeNodeParamsFile( + paramsFilePath, nodeName, nodePort, clientPort) + else: verkey = Signer(sigseed).verhex txn = { @@ -159,14 +180,15 @@ def bootstrapTestNodesCore(config, envName, appendToLedgers, domainLedger.stop() @staticmethod - def bootstrapTestNodes(config, startingPort, domainTxnFieldOrder): + def bootstrapTestNodes(config, startingPort, nodeParamsFileName, + domainTxnFieldOrder): parser = argparse.ArgumentParser( description="Generate pool transactions for testing") parser.add_argument('--nodes', required=True, type=int, help='node count, ' - 'should be less than 20') + 'should be less than 100') parser.add_argument('--clients', required=True, type=int, help='client count') parser.add_argument('--nodeNum', type=int, @@ -188,16 +210,16 @@ def bootstrapTestNodes(config, startingPort, domainTxnFieldOrder): parser.add_argument('--appendToLedgers', help="Determine if ledger files needs to be erased " - "before writting new information or not.", + "before writing new information or not.", action='store_true') args = parser.parse_args() - if args.nodes > 20: + if args.nodes > 100: print("Cannot run {} nodes for testing purposes as of now. " "This is not a problem with the protocol but some placeholder" " rules we put in place which will be replaced by our " - "Governance model. Going to run only 20".format(args.nodes)) - nodeCount = 20 + "Governance model. Going to run only 100".format(args.nodes)) + nodeCount = 100 else: nodeCount = args.nodes clientCount = args.clients @@ -212,4 +234,5 @@ def bootstrapTestNodes(config, startingPort, domainTxnFieldOrder): TestNetworkSetup.bootstrapTestNodesCore(config, envName, appendToLedgers, domainTxnFieldOrder, ips, nodeCount, clientCount, - nodeNum, startingPort) + nodeNum, startingPort, + nodeParamsFileName) diff --git a/plenum/common/throttler.py b/plenum/common/throttler.py index 20761943e9..af3e114d03 100644 --- a/plenum/common/throttler.py +++ b/plenum/common/throttler.py @@ -1,6 +1,6 @@ import time -from plenum.common.log import getlogger +from stp_core.common.log import getlogger logger = getlogger() diff --git a/plenum/common/transaction_store.py b/plenum/common/transaction_store.py index 0406801ea6..19e715998b 100644 --- a/plenum/common/transaction_store.py +++ b/plenum/common/transaction_store.py @@ -5,7 +5,7 @@ from plenum.common.constants import TXN_ID from plenum.common.types import Reply, f -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.persistence.storage import Storage logger = getlogger() diff --git a/plenum/common/txn_util.py b/plenum/common/txn_util.py index c4a329e30d..c83adf97c9 100644 --- a/plenum/common/txn_util.py +++ b/plenum/common/txn_util.py @@ -2,7 +2,6 @@ from collections import OrderedDict import portalocker -import time from ledger.stores.file_hash_store import FileHashStore @@ -14,7 +13,7 @@ from plenum.common.constants import TXN_ID, TXN_TIME, TXN_TYPE, TARGET_NYM, ROLE, \ ALIAS, VERKEY, TYPE, IDENTIFIER, DATA from plenum.common.types import f -from plenum.common.log import getlogger +from stp_core.common.log import getlogger logger = getlogger() @@ -57,12 +56,11 @@ def createGenesisTxnFile(genesisTxns, targetDir, fileName, fieldOrdering, ledger.stop() -def updateGenesisPoolTxnFile(genesisTxnDir, genesisTxnFile, txn, - waitTimeIfAlreadyLocked=5): +def updateGenesisPoolTxnFile(genesisTxnDir, genesisTxnFile, txn): # The lock is an advisory lock, it might not work on linux filesystems # not mounted with option `-o mand`, another approach can be to use a .lock # file to indicate presence or absence of .lock - genesisFilePath = open(os.path.join(genesisTxnDir, genesisTxnFile), 'a+') + genesisFilePath = os.path.join(genesisTxnDir, genesisTxnFile) try: # Exclusively lock file in a non blocking manner. Locking is neccessary # since there might be multiple clients running on a machine so genesis @@ -70,32 +68,24 @@ def updateGenesisPoolTxnFile(genesisTxnDir, genesisTxnFile, txn, # TODO: There is no automated test in the codebase that confirms it. # It has only been manaully tested in the python terminal. Add a test # for it using multiple processes writing concurrently - portalocker.Lock(genesisFilePath, truncate=None, - flags=portalocker.LOCK_EX | portalocker.LOCK_NB) - seqNo = txn[F.seqNo.name] - ledger = Ledger(CompactMerkleTree(hashStore=FileHashStore( - dataDir=genesisTxnDir)), dataDir=genesisTxnDir, - fileName=genesisTxnFile) - ledgerSize = len(ledger) - if seqNo - ledgerSize == 1: - ledger.add({k:v for k,v in txn.items() if k != F.seqNo.name}) - logger.debug('Adding transaction with sequence number {} in' - ' genesis pool transaction file'.format(seqNo)) - else: - logger.debug('Already {} genesis pool transactions present so ' - 'transaction with sequence number {} ' - 'not applicable'.format(ledgerSize, seqNo)) - portalocker.unlock(genesisFilePath) - except portalocker.AlreadyLocked as ex: - logger.info("file is already locked: {}, will retry in few seconds". - format(genesisFilePath)) - if waitTimeIfAlreadyLocked <=15: - time.sleep(waitTimeIfAlreadyLocked) - updateGenesisPoolTxnFile(genesisTxnDir, genesisTxnFile, txn, - waitTimeIfAlreadyLocked+5) - else: - logger.error("already locked error even after few attempts {}: {}". - format(genesisFilePath, str(ex))) + with portalocker.Lock(genesisFilePath, + truncate=None, + flags=portalocker.LOCK_EX | portalocker.LOCK_NB): + seqNo = txn[F.seqNo.name] + fileHashStore = FileHashStore(dataDir=genesisTxnDir) + ledger = Ledger(CompactMerkleTree(hashStore=fileHashStore), + dataDir=genesisTxnDir, fileName=genesisTxnFile) + ledgerSize = len(ledger) + if seqNo - ledgerSize == 1: + ledger.add({k:v for k,v in txn.items() if k != F.seqNo.name}) + logger.debug('Adding transaction with sequence number {} in' + ' genesis pool transaction file'.format(seqNo)) + else: + logger.debug('Already {} genesis pool transactions present so ' + 'transaction with sequence number {} ' + 'not applicable'.format(ledgerSize, seqNo)) + ledger.stop() + fileHashStore.close() except portalocker.LockException as ex: logger.error("error occurred during locking file {}: {}". format(genesisFilePath, str(ex))) diff --git a/plenum/common/types.py b/plenum/common/types.py index f781f71da5..abfe7bb18f 100644 --- a/plenum/common/types.py +++ b/plenum/common/types.py @@ -8,9 +8,7 @@ CATCHUP_REQ, CATCHUP_REP, POOL_LEDGER_TXNS, CONS_PROOF_REQUEST, CHECKPOINT, \ CHECKPOINT_STATE, THREE_PC_STATE, OP_FIELD_NAME -HA = NamedTuple("HA", [ - ("host", str), - ("port", int)]) +from stp_core.types import HA NodeDetail = NamedTuple("NodeDetail", [ ("ha", HA), @@ -56,6 +54,7 @@ class f: # provides a namespace for reusable field constants LEDGER_TYPE = Field("ledgerType", int) SEQ_NO_START = Field("seqNoStart", int) SEQ_NO_END = Field("seqNoEnd", int) + CATCHUP_TILL = Field("catchupTill", int) HASHES = Field("hashes", List[str]) TXNS = Field("txns", List[Any]) TXN = Field("txn", Any) @@ -90,9 +89,9 @@ def melted(self): # noinspection PyProtectedMember def TaggedTuple(typename, fields): cls = NamedTuple(typename, fields) - if any(field == OP_FIELD_NAME for field in cls._fields): - raise RuntimeError("field name '{}' is reserved in TaggedTuple" - .format(OP_FIELD_NAME)) + if OP_FIELD_NAME in cls._fields: + raise RuntimeError("field name '{}' is reserved in TaggedTuple" + .format(OP_FIELD_NAME)) cls.__bases__ += (TaggedTupleBase,) cls.typename = typename return cls @@ -131,8 +130,6 @@ def TaggedTuple(typename, fields): OPERATION = 'operation' -Identifier = str - RequestAck = TaggedTuple(REQACK, [ f.IDENTIFIER, f.REQ_ID]) @@ -190,11 +187,12 @@ def TaggedTuple(typename, fields): Checkpoint = TaggedTuple(CHECKPOINT, [ f.INST_ID, f.VIEW_NO, - f.SEQ_NO, + f.SEQ_NO_START, + f.SEQ_NO_END, f.DIGEST]) CheckpointState = NamedTuple(CHECKPOINT_STATE, [ - f.SEQ_NO, + f.SEQ_NO, # Current ppSeqNo in the checkpoint f.DIGESTS, # Digest of all the requests in the checkpoint f.DIGEST, # Final digest of the checkpoint, after all requests in its # range have been ordered @@ -235,6 +233,7 @@ def TaggedTuple(typename, fields): f.LEDGER_TYPE, f.SEQ_NO_START, f.SEQ_NO_END, + f.CATCHUP_TILL ]) CatchupRep = TaggedTuple(CATCHUP_REP, [ diff --git a/plenum/common/util.py b/plenum/common/util.py index f5bf398f76..f4ea48bbe8 100644 --- a/plenum/common/util.py +++ b/plenum/common/util.py @@ -1,36 +1,39 @@ import asyncio -import os - import collections +import glob import inspect +import ipaddress import itertools import json import logging -import math +import os import random -import socket import string import time +import math from binascii import unhexlify, hexlify from collections import Counter from collections import OrderedDict from math import floor +from os.path import basename from typing import TypeVar, Iterable, Mapping, Set, Sequence, Any, Dict, \ - Tuple, Union, List, NamedTuple, Callable + Tuple, Union, NamedTuple, Callable import base58 -import errno import libnacl.secret -from ledger.util import F -from libnacl import crypto_hash_sha256 - -from plenum.common.error import error +import psutil +from jsonpickle import encode, decode from six import iteritems, string_types -import ipaddress -from plenum.common.exceptions import PortNotAvailable -from plenum.common.exceptions import EndpointException, MissingEndpoint, \ +from ledger.util import F +from plenum.cli.constants import WALLET_FILE_EXTENSION +from plenum.common.error import error +from stp_core.crypto.util import isHexKey, isHex +from stp_core.network.exceptions import \ + MissingEndpoint, \ InvalidEndpointIpAddress, InvalidEndpointPort +import functools + T = TypeVar('T') Seconds = TypeVar("Seconds", int, float) @@ -115,22 +118,6 @@ def getRandomPortNumber() -> int: return random.randint(8090, 65530) -def isHex(val: str) -> bool: - """ - Return whether the given str represents a hex value or not - - :param val: the string to check - :return: whether the given str represents a hex value - """ - if isinstance(val, bytes): - # only decodes utf-8 string - try: - val = val.decode() - except: - return False - return isinstance(val, str) and all(c in string.hexdigits for c in val) - - async def runall(corogen): """ Run an array of coroutines @@ -206,6 +193,13 @@ def getNoInstances(nodeCount: int) -> int: return getMaxFailures(nodeCount) + 1 +def totalConnections(nodeCount: int) -> int: + """ + :return: number of connections between nodes + """ + return math.ceil((nodeCount * (nodeCount - 1)) / 2) + + def prime_gen() -> int: # credit to David Eppstein, Wolfgang Beneicke, Paul Hofstra """ @@ -225,75 +219,6 @@ def prime_gen() -> int: D[x] = p -def evenCompare(a: str, b: str) -> bool: - """ - A deterministic but more evenly distributed comparator than simple alphabetical. - Useful when comparing consecutive strings and an even distribution is needed. - Provides an even chance of returning true as often as false - """ - ab = a.encode('utf-8') - bb = b.encode('utf-8') - ac = crypto_hash_sha256(ab) - bc = crypto_hash_sha256(bb) - return ac < bc - - -def distributedConnectionMap(names: List[str]) -> OrderedDict: - """ - Create a map where every node is connected every other node. - Assume each key in the returned dictionary to be connected to each item in - its value(list). - - :param names: a list of node names - :return: a dictionary of name -> list(name). - """ - names.sort() - combos = list(itertools.combinations(names, 2)) - maxPer = math.ceil(len(list(combos)) / len(names)) - # maxconns = math.ceil(len(names) / 2) - connmap = OrderedDict((n, []) for n in names) - for a, b in combos: - if len(connmap[a]) < maxPer: - connmap[a].append(b) - else: - connmap[b].append(a) - return connmap - - -def checkPortAvailable(ha): - """Checks whether the given port is available""" - sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - try: - sock.bind(ha) - except OSError as exc: - if exc.args[0] in [errno.EADDRINUSE, errno.EADDRNOTAVAIL]: - raise PortNotAvailable(ha) - else: - raise exc - finally: - sock.close() - - -class MessageProcessor: - """ - Helper functions for messages. - """ - - def discard(self, msg, reason, logMethod=logging.error, cliOutput=False): - """ - Discard a message and log a reason using the specified `logMethod`. - - :param msg: the message to discard - :param reason: the reason why this message is being discarded - :param logMethod: the logging function to be used - :param cliOutput: if truthy, informs a CLI that the logged msg should - be printed - """ - reason = "" if not reason else " because {}".format(reason) - logMethod("{} discarding message {}{}".format(self, msg, reason), - extra={"cli": cliOutput}) - - class adict(dict): """Dict with attr access to keys.""" marker = object() @@ -339,19 +264,6 @@ async def untilTrue(condition, *args, timeout=5) -> bool: return result -def hasKeys(data, keynames): - """ - Checks whether all keys are present in the given data, and are not None - """ - # if all keys in `keynames` are not present in `data` - if len(set(keynames).difference(set(data.keys()))) != 0: - return False - for key in keynames: - if data[key] is None: - return False - return True - - def firstKey(d: Dict): return next(iter(d.keys())) @@ -360,37 +272,6 @@ def firstValue(d: Dict): return next(iter(d.values())) -def seedFromHex(seed): - if len(seed) == 64: - try: - return unhexlify(seed) - except: - pass - - -def cleanSeed(seed=None): - if seed: - bts = seedFromHex(seed) - if not bts: - if isinstance(seed, str): - seed = seed.encode('utf-8') - bts = bytes(seed) - if len(seed) != 32: - error('seed length must be 32 bytes') - return bts - - -def isHexKey(key): - try: - if len(key) == 64 and isHex(key): - return True - except ValueError as ex: - return False - except Exception as ex: - print(ex) - exit() - - def getCryptonym(identifier): return base58.b58encode(unhexlify(identifier.encode())).decode() \ if isHexKey(identifier) else identifier @@ -411,7 +292,7 @@ def rawToFriendly(raw): return base58.b58encode(raw) -def friendlyToRaw(f ): +def friendlyToRaw(f): return base58.b58decode(f) @@ -540,11 +421,6 @@ def isMaxCheckTimeExpired(startTime, maxCheckForMillis): return startTimeRounded + maxCheckForMillis < curTimeRounded -def randomSeed(size=32): - return ''.join(random.choice(string.hexdigits) - for _ in range(size)).encode() - - def lxor(a, b): # Logical xor of 2 items, return true when one of them is truthy and # one of them falsy @@ -553,7 +429,10 @@ def lxor(a, b): def getCallableName(callable: Callable): # If it is a function or method then access its `__name__` - if inspect.isfunction(callable) or inspect.ismethod(callable): + if inspect.isfunction(callable) or \ + inspect.ismethod(callable) or \ + isinstance(callable, functools.partial): + if hasattr(callable, "__name__"): return callable.__name__ # If it is a partial then access its `func`'s `__name__` @@ -581,15 +460,6 @@ def createDirIfNotExists(dir): os.makedirs(dir) -class Singleton(type): - _instances = {} - - def __call__(cls, *args, **kwargs): - if cls not in cls._instances: - cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) - return cls._instances[cls] - - def is_valid_port(port): return port.isdigit() and int(port) in range(1, 65536) @@ -609,7 +479,45 @@ def check_endpoint_valid(endpoint, required: bool=True): raise InvalidEndpointPort(endpoint) +def getOpenConnections(): + pr = psutil.Process(os.getpid()) + return pr.connections() + + def getFormattedErrorMsg(msg): msgHalfLength = int(len(msg) / 2) errorLine = "-" * msgHalfLength + "ERROR" + "-" * msgHalfLength return "\n\n" + errorLine + "\n " + msg + "\n" + errorLine + "\n" + +def normalizedWalletFileName(walletName): + return "{}.{}".format(walletName.lower(), WALLET_FILE_EXTENSION) + + +def getWalletFilePath(basedir, walletFileName): + return os.path.join(basedir, walletFileName) + + +def saveGivenWallet(wallet, fileName, contextDir): + createDirIfNotExists(contextDir) + walletFilePath = getWalletFilePath( + contextDir, fileName) + with open(walletFilePath, "w+") as walletFile: + encodedWallet = encode(wallet, keys=True) + walletFile.write(encodedWallet) + return walletFilePath + + +def getWalletByPath(walletFilePath): + with open(walletFilePath) as walletFile: + wallet = decode(walletFile.read(), keys=True) + return wallet + + +def getLastSavedWalletFileName(dir): + def getLastModifiedTime(file): + return os.stat(file).st_mtime_ns + + filePattern = "*.{}".format(WALLET_FILE_EXTENSION) + newest = max(glob.iglob('{}/{}'.format(dir, filePattern)), + key=getLastModifiedTime) + return basename(newest) \ No newline at end of file diff --git a/plenum/common/verifier.py b/plenum/common/verifier.py index 5df305bb77..fabed255c3 100644 --- a/plenum/common/verifier.py +++ b/plenum/common/verifier.py @@ -4,7 +4,7 @@ from base58 import b58decode, b58encode from plenum.common.signing import serializeMsg -from raet.nacling import Verifier as NaclVerifier +from stp_core.crypto.nacl_wrappers import Verifier as NaclVerifier class Verifier: diff --git a/plenum/config.py b/plenum/config.py index de5011e592..7e7482c416 100644 --- a/plenum/config.py +++ b/plenum/config.py @@ -121,7 +121,8 @@ # OPTIONS RELATED TO TESTS # Expected time for one stack to get connected to another -ExpectedConnectTime = 3.3 if sys.platform == 'win32' else 1.4 +ExpectedConnectTime = 3.3 if sys.platform == 'win32' else 2 + # After ordering every `CHK_FREQ` requests, replica sends a CHECKPOINT CHK_FREQ = 100 @@ -150,3 +151,11 @@ EnsureLedgerDurability = True log_override_tags = dict(cli={}, demo={}) + +# TODO needs to be refactored to use a transport protocol abstraction +UseZStack = True + + +# Number of messages zstack accepts at once +LISTENER_MESSAGE_QUOTA = 100 +REMOTES_MESSAGE_QUOTA = 100 diff --git a/plenum/persistence/client_req_rep_store_file.py b/plenum/persistence/client_req_rep_store_file.py index 2dd4b958b2..34b34ad628 100644 --- a/plenum/persistence/client_req_rep_store_file.py +++ b/plenum/persistence/client_req_rep_store_file.py @@ -1,8 +1,10 @@ import json import os -from collections import OrderedDict +from collections import OrderedDict, namedtuple from typing import Any, Sequence, List, Dict +from plenum.common.constants import REQACK, REQNACK, REPLY + from ledger.stores.directory_store import DirectoryStore from ledger.util import F from plenum.common.has_file_storage import HasFileStorage @@ -14,6 +16,8 @@ class ClientReqRepStoreFile(ClientReqRepStore, HasFileStorage): + LinePrefixes = namedtuple('LP', ['Request', REQACK, REQNACK, REPLY]) + def __init__(self, name, baseDir): self.baseDir = baseDir self.dataDir = "data/clients" @@ -24,6 +28,8 @@ def __init__(self, name, baseDir): os.makedirs(self.dataLocation) self.reqStore = DirectoryStore(self.dataLocation, "Requests") self._serializer = None + self.delimiter = '~' + self.linePrefixes = self.LinePrefixes('0', 'A', 'N', 'R') @property def lastReqId(self) -> int: @@ -34,29 +40,38 @@ def addRequest(self, req: Request): idr = req.identifier reqId = req.reqId key = "{}{}".format(idr, reqId) - self.reqStore.appendToValue(key, "0:{}". - format(self.serializeReq(req))) + self.reqStore.appendToValue(key, "{}{}{}". + format(self.linePrefixes.Request, + self.delimiter, + self.serializeReq(req))) def addAck(self, msg: Any, sender: str): idr = msg[f.IDENTIFIER.nm] reqId = msg[f.REQ_ID.nm] key = "{}{}".format(idr, reqId) - self.reqStore.appendToValue(key, "A:{}".format(sender)) + self.reqStore.appendToValue(key, "{}{}{}". + format(self.linePrefixes.REQACK, + self.delimiter, sender)) def addNack(self, msg: Any, sender: str): idr = msg[f.IDENTIFIER.nm] reqId = msg[f.REQ_ID.nm] key = "{}{}".format(idr, reqId) reason = msg[f.REASON.nm] - self.reqStore.appendToValue(key, "N:{}:{}". - format(sender, reason)) + self.reqStore.appendToValue(key, "{}{}{}{}{}". + format(self.linePrefixes.REQNACK, + self.delimiter, sender, + self.delimiter, reason)) def addReply(self, identifier: str, reqId: int, sender: str, - result: Any) -> Sequence[str]: + result: Any) -> int: serializedReply = self.txnSerializer.serialize(result, toBytes=False) key = "{}{}".format(identifier, reqId) self.reqStore.appendToValue(key, - "R:{}:{}".format(sender, serializedReply)) + "{}{}{}{}{}". + format(self.linePrefixes.REPLY, + self.delimiter, sender, + self.delimiter, serializedReply)) return len(self._getSerializedReplies(identifier, reqId)) def hasRequest(self, identifier: str, reqId: int) -> bool: @@ -64,7 +79,8 @@ def hasRequest(self, identifier: str, reqId: int) -> bool: return self.reqStore.exists(key) def getRequest(self, identifier: str, reqId: int) -> Request: - for r in self._getLinesWithPrefix(identifier, reqId, "0:"): + for r in self._getLinesWithPrefix(identifier, reqId, "{}{}". + format(self.linePrefixes.Request, self.delimiter)): return self.deserializeReq(r[2:]) def getReplies(self, identifier: str, reqId: int): @@ -74,14 +90,18 @@ def getReplies(self, identifier: str, reqId: int): return replies def getAcks(self, identifier: str, reqId: int) -> List[str]: - ackLines = self._getLinesWithPrefix(identifier, reqId, "A:") + ackLines = self._getLinesWithPrefix(identifier, reqId, "{}{}". + format(self.linePrefixes.REQACK, + self.delimiter)) return [line[2:] for line in ackLines] def getNacks(self, identifier: str, reqId: int) -> dict: - nackLines = self._getLinesWithPrefix(identifier, reqId, "N:") + nackLines = self._getLinesWithPrefix(identifier, reqId, "{}{}". + format(self.linePrefixes.REQNACK, + self.delimiter)) result = {} for line in nackLines: - sender, reason = line[2:].split(":", 1) + sender, reason = line[2:].split(self.delimiter, 1) result[sender] = reason return result @@ -107,10 +127,12 @@ def _getLinesWithPrefix(self, identifier: str, reqId: int, def _getSerializedReplies(self, identifier: str, reqId: int) -> \ Dict[str, str]: - replyLines = self._getLinesWithPrefix(identifier, reqId, "R:") + replyLines = self._getLinesWithPrefix(identifier, reqId, "{}{}". + format(self.linePrefixes.REPLY, + self.delimiter)) result = {} for line in replyLines: - sender, reply = line[2:].split(":", 1) + sender, reply = line[2:].split(self.delimiter, 1) result[sender] = reply return result diff --git a/plenum/persistence/client_txn_log.py b/plenum/persistence/client_txn_log.py index 0437405a92..74c1f859be 100644 --- a/plenum/persistence/client_txn_log.py +++ b/plenum/persistence/client_txn_log.py @@ -24,6 +24,9 @@ def __init__(self, name, baseDir=None): "transactions") self.serializer = CompactSerializer(fields=self.txnFieldOrdering) + def close(self): + self.transactionLog.close() + @property def txnFieldOrdering(self): fields = getTxnOrderedFields() diff --git a/plenum/persistence/orientdb_graph_store.py b/plenum/persistence/orientdb_graph_store.py index b5e7a48fc9..c8b9650316 100644 --- a/plenum/persistence/orientdb_graph_store.py +++ b/plenum/persistence/orientdb_graph_store.py @@ -2,7 +2,7 @@ import pyorient -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.persistence.graph_store import GraphStore from plenum.persistence.orientdb_store import OrientDbStore diff --git a/plenum/persistence/orientdb_hash_store.py b/plenum/persistence/orientdb_hash_store.py index 2c4a2163b3..49a64da5c0 100644 --- a/plenum/persistence/orientdb_hash_store.py +++ b/plenum/persistence/orientdb_hash_store.py @@ -2,7 +2,7 @@ from ledger.stores.hash_store import HashStore from ledger.util import F -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.persistence.orientdb_store import OrientDbStore @@ -123,6 +123,9 @@ def classesNeeded(self): return [(self.leafHashClass, self.createLeafHashClass), (self.nodeHashClass, self.createNodeHashClass)] + def close(self): + self.store.close() + def reset(self) -> bool: def trunc(clazz): self.store.client.command( diff --git a/plenum/persistence/orientdb_store.py b/plenum/persistence/orientdb_store.py index 21c3eff4ad..b9010d9968 100644 --- a/plenum/persistence/orientdb_store.py +++ b/plenum/persistence/orientdb_store.py @@ -3,7 +3,7 @@ import pyorient from plenum.common.error import error from plenum.common.exceptions import OrientDBNotRunning -from plenum.common.log import getlogger +from stp_core.common.log import getlogger logger = getlogger() @@ -83,6 +83,10 @@ def getPlaceHolderQueryStringFromDict(args: Dict, joiner=", "): items.append(("{} = " + valPlaceHolder).format(key, val)) return joiner.join(items) + def close(self): + if self.client._connection.connected: + self.client.db_close(self.client._connection.db_opened) + def createOrientDbInMemStore(config, name, dbType): """ @@ -93,10 +97,13 @@ def createOrientDbInMemStore(config, name, dbType): try: client = pyorient.OrientDB(host=host, port=port) client.connect(user=config.OrientDB['user'], - password=config.OrientDB['password']) + password=config.OrientDB['password']) + # except ValueError: + # client.connect(user=config.OrientDB['user'], + # password=config.OrientDB['password']) except pyorient.exceptions.PyOrientConnectionException: error("OrientDB connection failed. Check if DB is running " - "on port {}".format(port)) + "on port {}".format(port)) try: if client.db_exists(name, pyorient.STORAGE_TYPE_MEMORY): client.db_drop(name, type=pyorient.STORAGE_TYPE_MEMORY) diff --git a/plenum/server/client_authn.py b/plenum/server/client_authn.py index 67020c0897..94319e0467 100644 --- a/plenum/server/client_authn.py +++ b/plenum/server/client_authn.py @@ -5,11 +5,11 @@ from abc import abstractmethod from typing import Dict -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.common.exceptions import InvalidSignature, EmptySignature, \ MissingSignature, EmptyIdentifier, \ - MissingIdentifier, InvalidIdentifier, CouldNotAuthenticate, \ + MissingIdentifier, CouldNotAuthenticate, \ SigningException, InvalidSignatureFormat, UnknownIdentifier from plenum.common.signing import serializeMsg from plenum.common.types import f diff --git a/plenum/server/has_action_queue.py b/plenum/server/has_action_queue.py index 1282b699b2..65e02dae33 100644 --- a/plenum/server/has_action_queue.py +++ b/plenum/server/has_action_queue.py @@ -1,9 +1,8 @@ import time from collections import deque -from functools import partial from typing import Callable -from plenum.common.log import getlogger +from stp_core.common.log import getlogger logger = getlogger() diff --git a/plenum/server/monitor.py b/plenum/server/monitor.py index 84b253531d..ff763a3c9e 100644 --- a/plenum/server/monitor.py +++ b/plenum/server/monitor.py @@ -7,21 +7,19 @@ import psutil +from plenum.common.config_util import getConfig +from stp_core.common.log import getlogger from plenum.common.types import EVENT_REQ_ORDERED, EVENT_NODE_STARTED, \ EVENT_PERIODIC_STATS_THROUGHPUT, PLUGIN_TYPE_STATS_CONSUMER, \ EVENT_VIEW_CHANGE, EVENT_PERIODIC_STATS_LATENCIES, \ EVENT_PERIODIC_STATS_NODES, EVENT_PERIODIC_STATS_TOTAL_REQUESTS,\ EVENT_PERIODIC_STATS_NODE_INFO, EVENT_PERIODIC_STATS_SYSTEM_PERFORMANCE_INFO -from plenum.common.stacked import NodeStack from plenum.server.blacklister import Blacklister -from plenum.common.config_util import getConfig -from plenum.common.log import getlogger from plenum.server.has_action_queue import HasActionQueue from plenum.server.instances import Instances -from plenum.server.plugin.has_plugin_loader_helper import PluginLoaderHelper from plenum.server.notifier_plugin_manager import notifierPluginTriggerEvents, \ PluginManager - +from plenum.server.plugin.has_plugin_loader_helper import PluginLoaderHelper pluginManager = PluginManager() logger = getlogger() @@ -38,7 +36,7 @@ class Monitor(HasActionQueue, PluginLoaderHelper): """ def __init__(self, name: str, Delta: float, Lambda: float, Omega: float, - instances: Instances, nodestack: NodeStack, + instances: Instances, nodestack, blacklister: Blacklister, nodeInfo: Dict, notifierEventTriggeringConfig: Dict, pluginPaths: Iterable[str]=None): @@ -122,10 +120,11 @@ def __init__(self, name: str, Delta: float, Lambda: float, Omega: float, HasActionQueue.__init__(self) if config.SendMonitorStats: - self._schedule(self.sendPeriodicStats, config.DashboardUpdateFreq) + self.startRepeating(self.sendPeriodicStats, + config.DashboardUpdateFreq) - self._schedule(self.checkPerformance, - config.notifierEventTriggeringConfig['clusterThroughputSpike']['freq']) + self.startRepeating(self.checkPerformance, + config.notifierEventTriggeringConfig['clusterThroughputSpike']['freq']) def __repr__(self): return self.name @@ -273,10 +272,10 @@ def isMasterThroughputTooLow(self): else: tooLow = r < self.Delta if tooLow: - logger.debug("{} master throughput {} is lower than Delta {}.". - format(self, r, self.Delta)) + logger.debug("{} master throughput ratio {} is lower than " + "Delta {}.".format(self, r, self.Delta)) else: - logger.trace("{} master throughput {} is acceptable.". + logger.trace("{} master throughput ratio {} is acceptable.". format(self, r)) return tooLow @@ -403,12 +402,9 @@ def sendPeriodicStats(self): self.sendNodeInfo() self.sendSystemPerfomanceInfo() self.sendTotalRequests() - self._schedule(self.sendPeriodicStats, config.DashboardUpdateFreq) def checkPerformance(self): self.sendClusterThroughputSpike() - self._schedule(self.checkPerformance, - config.notifierEventTriggeringConfig['clusterThroughputSpike']['freq']) def sendClusterThroughputSpike(self): if self.instances.masterId is None: @@ -433,7 +429,7 @@ def highResThroughput(self): now = time.perf_counter() while self.orderedRequestsInLast and \ (now - self.orderedRequestsInLast[0]) > \ - config.ThroughputWindowSize: + config.ThroughputWindowSize: self.orderedRequestsInLast = self.orderedRequestsInLast[1:] return len(self.orderedRequestsInLast) / config.ThroughputWindowSize diff --git a/plenum/server/node.py b/plenum/server/node.py index 59e559a767..44f8c27dc9 100644 --- a/plenum/server/node.py +++ b/plenum/server/node.py @@ -4,16 +4,20 @@ import random import shutil import time +from binascii import unhexlify from collections import deque, defaultdict -from functools import partial +from contextlib import closing from hashlib import sha256 from typing import Dict, Any, Mapping, Iterable, List, Optional, \ Sequence, Set, Tuple -from contextlib import closing import pyorient +from plenum.common.stacks import nodeStackClass, clientStackClass +from stp_core.crypto.signer import Signer +from stp_core.network.network_interface import NetworkInterface +from stp_core.ratchet import Ratchet + from plenum.common.roles import Roles -from raet.raeting import AutoMode from ledger.compact_merkle_tree import CompactMerkleTree from ledger.ledger import Ledger @@ -23,21 +27,20 @@ from ledger.stores.memory_hash_store import MemoryHashStore from ledger.util import F from plenum.client.wallet import Wallet +from plenum.common.config_util import getConfig from plenum.common.exceptions import SuspiciousNode, SuspiciousClient, \ MissingNodeOp, InvalidNodeOp, InvalidNodeMsg, InvalidClientMsgType, \ InvalidClientOp, InvalidClientRequest, BaseExc, \ - InvalidClientMessageException, RaetKeysNotFoundException as REx, BlowUp, \ + InvalidClientMessageException, KeysNotFoundException as REx, BlowUp, \ UnauthorizedClientRequest from plenum.common.has_file_storage import HasFileStorage +from plenum.common.keygen_utils import areKeysSetup from plenum.common.ledger_manager import LedgerManager -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.common.motor import Motor from plenum.common.plugin_helper import loadPlugins -from plenum.common.raet import isLocalKeepSetup -from plenum.common.ratchet import Ratchet -from plenum.common.signer import Signer +from plenum.common.request import Request from plenum.common.signer_simple import SimpleSigner -from plenum.common.stacked import NodeStack, ClientStack from plenum.common.startable import Status, Mode, LedgerState from plenum.common.throttler import Throttler from plenum.common.constants import TXN_TYPE, TXN_ID, TXN_TIME, POOL_TXN_TYPES, \ @@ -54,10 +57,8 @@ CatchupReq, CatchupRep, \ PLUGIN_TYPE_VERIFICATION, PLUGIN_TYPE_PROCESSING, PoolLedgerTxns, \ ConsProofRequest, ElectionType, ThreePhaseType, Checkpoint, ThreePCState -from plenum.common.request import Request -from plenum.common.util import MessageProcessor, friendlyEx, getMaxFailures, \ - rawToFriendly -from plenum.common.config_util import getConfig +from plenum.common.util import friendlyEx, getMaxFailures +from plenum.common.message_processor import MessageProcessor from plenum.common.verifier import DidVerifier from plenum.common.constants import DATA, ALIAS, NODE_IP @@ -74,6 +75,8 @@ from plenum.server.instances import Instances from plenum.server.models import InstanceChanges from plenum.server.monitor import Monitor +from plenum.server.notifier_plugin_manager import notifierPluginTriggerEvents, \ + PluginManager from plenum.server.plugin.has_plugin_loader_helper import PluginLoaderHelper from plenum.server.pool_manager import HasPoolManager, TxnPoolManager, \ RegistryPoolManager @@ -82,9 +85,6 @@ from plenum.server.propagator import Propagator from plenum.server.router import Router from plenum.server.suspicion_codes import Suspicions -from plenum.server.notifier_plugin_manager import notifierPluginTriggerEvents, \ - PluginManager - pluginManager = PluginManager() logger = getlogger() @@ -98,7 +98,7 @@ class Node(HasActionQueue, Motor, Propagator, MessageProcessor, HasFileStorage, """ suspicions = {s.code: s.reason for s in Suspicions.getList()} - keygenScript = "init_plenum_raet_keep" + keygenScript = "init_plenum_keys" def __init__(self, name: str, @@ -111,7 +111,8 @@ def __init__(self, primaryDecider: PrimaryDecider = None, pluginPaths: Iterable[str]=None, storage: Storage=None, - config=None): + config=None, + seed=None): """ Create a new node. @@ -131,7 +132,7 @@ def __init__(self, HasFileStorage.__init__(self, name, baseDir=self.basedirpath, dataDir=self.dataDir) - self.__class__.ensureKeysAreSetup(name, basedirpath) + self.ensureKeysAreSetup() self.opVerifiers = self.getPluginsByType(pluginPaths, PLUGIN_TYPE_VERIFICATION) self.reqProcessors = self.getPluginsByType(pluginPaths, @@ -153,15 +154,23 @@ def __init__(self, self.nodeReg = self.poolManager.nodeReg + kwargs = dict(stackParams=self.poolManager.nstack, + msgHandler=self.handleOneNodeMsg, registry=self.nodeReg) + cls = self.nodeStackClass + kwargs.update(seed=seed) # noinspection PyCallingNonCallable - self.nodestack = self.nodeStackClass(self.poolManager.nstack, - self.handleOneNodeMsg, - self.nodeReg) + self.nodestack = cls(**kwargs) self.nodestack.onConnsChanged = self.onConnsChanged + kwargs = dict(stackParams=self.poolManager.cstack, + msgHandler=self.handleOneClientMsg) + cls = self.clientStackClass + kwargs.update(seed=seed) + # noinspection PyCallingNonCallable - self.clientstack = self.clientStackClass(self.poolManager.cstack, - self.handleOneClientMsg) + self.clientstack = cls(**kwargs) + # self.clientstack = self.clientStackClass(self.poolManager.cstack, + # self.handleOneClientMsg) self.cliNodeReg = self.poolManager.cliNodeReg @@ -334,7 +343,7 @@ def id(self): def wallet(self): if not self._wallet: wallet = Wallet(self.name) - signer = SimpleSigner(seed=self.nodestack.local.signer.keyraw) + signer = SimpleSigner(seed=unhexlify(self.nodestack.keyhex)) wallet.addIdentifier(signer=signer) self._wallet = wallet return self._wallet @@ -380,12 +389,12 @@ def isParticipating(self): return self.mode == Mode.participating @property - def nodeStackClass(self) -> NodeStack: - return NodeStack + def nodeStackClass(self) -> NetworkInterface: + return nodeStackClass @property - def clientStackClass(self) -> ClientStack: - return ClientStack + def clientStackClass(self) -> NetworkInterface: + return clientStackClass def getPrimaryStorage(self): """ @@ -467,6 +476,7 @@ def start(self, loop): self.primaryStorage.start(loop, ensureDurability= self.config.EnsureLedgerDurability) + self.nodestack.start() self.clientstack.start() @@ -474,7 +484,7 @@ def start(self, loop): # if first time running this node if not self.nodestack.remotes: - logger.info("{} first time running; waiting for key sharing..." + logger.info("{} first time running..." "".format(self), extra={"cli": "LOW_STATUS", "tags": ["node-key-sharing"]}) else: @@ -510,6 +520,22 @@ def connectedNodeCount(self) -> int: """ return len(self.nodestack.conns) + 1 + def stop(self, *args, **kwargs): + super().stop(*args, **kwargs) + + if isinstance(self.hashStore, (FileHashStore, OrientDbHashStore)): + try: + self.hashStore.close() + except Exception as ex: + logger.warning('{} got exception while closing hash store: {}'. + format(self, ex)) + + if isinstance(self.poolManager, TxnPoolManager): + if self.poolManager._ledger is not None: + self.poolManager._ledger.stop() + if self.poolManager.hashStore is not None: + self.poolManager.hashStore.close() + def onStopping(self): """ Actions to be performed on stopping the node. @@ -522,7 +548,21 @@ def onStopping(self): self.reset() # Stop the txn store - self.primaryStorage.stop() + try: + self.primaryStorage.stop() + except Exception as ex: + try: + self.primaryStorage.close() + except Exception as ex: + logger.warning( + '{} got exception while stopping/closing ' + 'primary storage: {}'.format(self, ex)) + + try: + self.secondaryStorage.close() + except Exception as ex: + logger.warning('{} got exception while closing ' + 'secondary storage: {}'.format(self, ex)) self.nodestack.stop() self.clientstack.stop() @@ -553,7 +593,7 @@ async def prod(self, limit: int=None) -> int: :return: total number of messages serviced by this node """ if self.isGoing(): - await self.nodestack.serviceLifecycle() + self.nodestack.serviceLifecycle() self.clientstack.serviceClientStack() c = 0 if self.status is not Status.stopped: @@ -630,14 +670,14 @@ def onConnsChanged(self, joined: Set[str], left: Set[str]): if self.isGoing(): if self.connectedNodeCount == self.totalNodes: self.status = Status.started - self.stopKeySharing() + # self.stopKeySharing() elif self.connectedNodeCount >= self.minimumNodes: self.status = Status.started_hungry else: self.status = Status.starting self.elector.nodeCount = self.connectedNodeCount - - if self.isReady(): + viewChangeStarted = self.startViewChangeIfPrimaryWentOffline(left) + if not viewChangeStarted and self.isReady(): self.checkInstances() # TODO: Should we only send election messages when lagged or # otherwise too? @@ -645,25 +685,25 @@ def onConnsChanged(self, joined: Set[str], left: Set[str]): msgs = self.elector.getElectionMsgsForLaggedNodes() logger.debug("{} has msgs {} for new nodes {}". format(self, msgs, joined)) - for n in joined: - self.sendElectionMsgsToLaggingNode(n, msgs) + for joinedNode in joined: + self.sendElectionMsgsToLaggingNode(joinedNode, msgs) # Communicate current view number if any view change # happened to the connected node if self.viewNo > 0: logger.debug("{} communicating view number {} to {}" - .format(self, self.viewNo-1, n)) - rid = self.nodestack.getRemote(n).uid + .format(self, self.viewNo-1, joinedNode)) + rid = self.nodestack.getRemote(joinedNode).uid self.send(InstanceChange(self.viewNo), rid) # Send ledger status whether ready (connected to enough nodes) or not - for n in joined: - self.sendPoolLedgerStatus(n) + for joinedNode in joined: + self.sendPoolLedgerStatus(joinedNode) # Send the domain ledger status only when it has discovered enough # peers otherwise very few peers will know that this node is lagging # behind and it will not receive sufficient consistency proofs to # verify the exact state of the ledger. if self.mode in (Mode.discovered, Mode.participating): - self.sendDomainLedgerStatus(n) + self.sendDomainLedgerStatus(joinedNode) def newNodeJoined(self, txn): self.setF() @@ -1025,7 +1065,7 @@ def handleOneNodeMsg(self, wrappedMsg): extra={"tags": ["node-msg-validation"]}) self.unpackNodeMsg(*vmsg) else: - logger.info("{} non validated msg {}".format(self, wrappedMsg), + logger.info("{} invalidated msg {}".format(self, wrappedMsg), extra={"tags": ["node-msg-validation"]}) except SuspiciousNode as ex: self.reportSuspiciousNodeEx(ex) @@ -1171,7 +1211,8 @@ def validateClientMsg(self, wrappedMsg): try: cMsg = cls(**msg) except Exception as ex: - raise InvalidClientRequest from ex + raise InvalidClientRequest(msg.get(f.IDENTIFIER.nm), + msg.get(f.REQ_ID.nm)) from ex if self.isSignatureVerificationNeeded(msg): self.verifySignature(cMsg) @@ -1478,7 +1519,7 @@ def checkPerformance(self): self.sendInstanceChange(self.viewNo+1) return False else: - logger.debug("{}s master has higher performance than backups". + logger.debug("{}'s master has higher performance than backups". format(self)) return True @@ -1552,6 +1593,27 @@ def canViewChange(self, proposedViewNo: int) -> bool: return self.instanceChanges.hasQuorum(proposedViewNo, self.f) and \ self.viewNo < proposedViewNo + # TODO: consider moving this to pool manager + def startViewChangeIfPrimaryWentOffline(self, nodesGoingDown): + """ + Starts view change if there are primaries among the nodes which have + gone down. + + :param nodesGoingDown: the nodes which have gone down + :return: whether view change started + """ + for node in nodesGoingDown: + for instId, replica in enumerate(self.replicas): + leftOne = '{}:{}'.format(node, instId) + if replica.primaryName == leftOne: + logger.debug("Primary {} is offline, " + "{} starting view change" + .format(leftOne, self.name)) + self.startViewChange(self.viewNo + 1) + return True + return False + + # TODO: consider moving this to pool manager def startViewChange(self, proposedViewNo: int): """ Trigger the view change process. @@ -1587,7 +1649,8 @@ def verifySignature(self, msg): req = msg if not isinstance(req, Mapping): - req = msg.__getstate__() + # req = msg.__getstate__() + req = msg.as_dict identifier = self.authNr(req).authenticate(req) logger.display("{} authenticated {} signature on {} request {}". @@ -1754,7 +1817,7 @@ def getReplyFor(self, request): def processStashedOrderedReqs(self): i = 0 while self.stashedOrderedReqs: - msg = self.stashedOrderedReqs.pop() + msg = self.stashedOrderedReqs.popleft() if not self.gotInCatchupReplies(msg): self.processOrdered(msg) i += 1 @@ -1772,62 +1835,14 @@ def sync3PhaseState(self): for replica in self.replicas: self.send(replica.threePhaseState) - def startKeySharing(self, timeout=60): - """ - Start key sharing till the timeout is reached. - Other nodes will be able to join this node till the timeout is reached. - - :param timeout: the time till which key sharing is active - """ - if self.nodestack.isKeySharing: - logger.info("{} already key sharing".format(self), - extra={"cli": "LOW_STATUS"}) - else: - logger.info("{} starting key sharing".format(self), - extra={"cli": "STATUS"}) - self.nodestack.keep.auto = AutoMode.always - self._schedule(partial(self.stopKeySharing, timedOut=True), timeout) - - # remove any unjoined remotes - for name, r in self.nodestack.nameRemotes.items(): - if not r.joined: - logger.debug("{} removing unjoined remote {}" - .format(self, r.name)) - # This is a bug in RAET where the `removeRemote` - # of `raet/stacking.py` does not consider the fact that - # renaming of remote might not have happened. Fixed here - # https://github.com/RaetProtocol/raet/pull/9 - self.nodestack.removeRemote(r) - - # if just starting, then bootstrap - force = time.time() - self.created > 5 - self.nodestack.maintainConnections(force=force) - - def stopKeySharing(self, timedOut=False): - """ - Stop key sharing, i.e don't allow any more nodes to join this node. - """ - if self.nodestack.isKeySharing: - if timedOut and self.nodestack.notConnectedNodes: - logger.info("{} key sharing timed out; was not able to " - "connect to {}". - format(self, - ", ".join( - self.nodestack.notConnectedNodes)), - extra={"cli": "WARNING"}) - else: - logger.info("{} completed key sharing".format(self), - extra={"cli": "STATUS"}) - self.nodestack.keep.auto = AutoMode.never - - @classmethod - def ensureKeysAreSetup(cls, name, baseDir): + def ensureKeysAreSetup(self): """ - Check whether the keys are setup in the local RAET keep. - Raises RaetKeysNotFoundException if not found. + Check whether the keys are setup in the local STP keep. + Raises KeysNotFoundException if not found. """ - if not isLocalKeepSetup(name, baseDir): - raise REx(REx.reason.format(name) + cls.keygenScript) + name, baseDir = self.name, self.basedirpath + if not areKeysSetup(name, baseDir, self.config): + raise REx(REx.reason.format(name) + self.keygenScript) def reportSuspiciousNodeEx(self, ex: SuspiciousNode): """ @@ -1846,7 +1861,7 @@ def reportSuspiciousNode(self, :param nodeName: name of the node to report suspicion on :param reason: the reason for suspicion """ - logger.warning("{} suspicion raised on node {} for {}; suspicion code " + logger.warning("{} raised suspicion on node {} for {}; suspicion code " "is {}".format(self, nodeName, reason, code)) # TODO need a more general solution here @@ -1859,9 +1874,14 @@ def reportSuspiciousNode(self, # code=InvalidSignature.code) if code in self.suspicions: - self.blacklistNode(nodeName, - reason=self.suspicions[code], - code=code) + # TODO: Reconsider tolerating some suspicions, and if you tolerate, + # why are they suspicions? + if code not in (Suspicions.DUPLICATE_PPR_SENT, + Suspicions.DUPLICATE_PR_SENT, + Suspicions.DUPLICATE_CM_SENT): + self.blacklistNode(nodeName, + reason=self.suspicions[code], + code=code) if offendingMsg: self.discard(offendingMsg, reason, logger.warning) diff --git a/plenum/server/notifier_plugin_manager.py b/plenum/server/notifier_plugin_manager.py index ba82ac6034..866089e682 100644 --- a/plenum/server/notifier_plugin_manager.py +++ b/plenum/server/notifier_plugin_manager.py @@ -3,7 +3,7 @@ from typing import Dict import time -from plenum.common.log import getlogger +from stp_core.common.log import getlogger logger = getlogger() diff --git a/plenum/server/plugin/stats_consumer/plugin_firebase_stats_consumer.py b/plenum/server/plugin/stats_consumer/plugin_firebase_stats_consumer.py index 61700f79cc..192ebd1ce4 100644 --- a/plenum/server/plugin/stats_consumer/plugin_firebase_stats_consumer.py +++ b/plenum/server/plugin/stats_consumer/plugin_firebase_stats_consumer.py @@ -8,7 +8,7 @@ PLUGIN_TYPE_STATS_CONSUMER, EVENT_VIEW_CHANGE, EVENT_PERIODIC_STATS_NODES, \ EVENT_PERIODIC_STATS_TOTAL_REQUESTS, EVENT_PERIODIC_STATS_NODE_INFO,\ EVENT_PERIODIC_STATS_SYSTEM_PERFORMANCE_INFO -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.config import STATS_SERVER_IP, STATS_SERVER_PORT from plenum.server.plugin.stats_consumer.stats_publisher import StatsPublisher,\ Topic diff --git a/plenum/server/plugin/stats_consumer/stats_publisher.py b/plenum/server/plugin/stats_consumer/stats_publisher.py index ce43b108bc..3e16f4f35d 100644 --- a/plenum/server/plugin/stats_consumer/stats_publisher.py +++ b/plenum/server/plugin/stats_consumer/stats_publisher.py @@ -2,7 +2,7 @@ from collections import deque from enum import Enum, unique -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.common.config_util import getConfig logger = getlogger() diff --git a/plenum/server/plugin_loader.py b/plenum/server/plugin_loader.py index 44d34c5241..5d067935d4 100644 --- a/plenum/server/plugin_loader.py +++ b/plenum/server/plugin_loader.py @@ -9,7 +9,7 @@ from plenum.common.types import PLUGIN_TYPE_VERIFICATION, PLUGIN_TYPE_PROCESSING, PLUGIN_TYPE_STATS_CONSUMER -from plenum.common.log import getlogger +from stp_core.common.log import getlogger logger = getlogger() diff --git a/plenum/server/pool_manager.py b/plenum/server/pool_manager.py index c6abfff15a..42902f675d 100644 --- a/plenum/server/pool_manager.py +++ b/plenum/server/pool_manager.py @@ -4,18 +4,20 @@ from copy import deepcopy from ledger.util import F from plenum.common.txn_util import updateGenesisPoolTxnFile -from raet.raeting import AutoMode from plenum.common.exceptions import UnsupportedOperation, \ - UnauthorizedClientRequest, RemoteNotFound + UnauthorizedClientRequest from plenum.common.stack_manager import TxnStackManager +from stp_core.network.auth_mode import AuthMode +from stp_core.network.exceptions import RemoteNotFound +from stp_core.types import HA -from plenum.common.types import HA, f, Reply +from plenum.common.types import f from plenum.common.constants import TXN_TYPE, NODE, TARGET_NYM, DATA, ALIAS, \ POOL_TXN_TYPES, NODE_IP, NODE_PORT, CLIENT_IP, CLIENT_PORT, VERKEY, SERVICES, \ VALIDATOR, CLIENT_STACK_SUFFIX -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.common.types import NodeDetail @@ -92,7 +94,7 @@ def getStackParamsAndNodeReg(self, name, basedirpath, nodeRegistry=None, nstack = dict(name=name, ha=HA('0.0.0.0', ha[1]), main=True, - auto=AutoMode.never) + auth_mode=AuthMode.RESTRICTED.value) nodeReg[name] = HA(*ha) cliname = cliname or (name + CLIENT_STACK_SUFFIX) @@ -101,7 +103,7 @@ def getStackParamsAndNodeReg(self, name, basedirpath, nodeRegistry=None, cstack = dict(name=cliname or (name + CLIENT_STACK_SUFFIX), ha=HA('0.0.0.0', cliha[1]), main=True, - auto=AutoMode.always) + auth_mode=AuthMode.ALLOW_ANY.value) cliNodeReg[cliname] = HA(*cliha) if basedirpath: @@ -175,30 +177,20 @@ def addNewNodeAndConnect(self, txn): self.connectNewRemote(txn, nodeName, self.node) self.node.newNodeJoined(txn) - def doElectionIfNeeded(self, nodeGoingDown): - for instId, replica in enumerate(self.node.replicas): - if replica.primaryName == '{}:{}'.format(nodeGoingDown, instId): - self.node.startViewChange(self.node.viewNo+1) - return - def nodeHaChanged(self, txn): nodeNym = txn[TARGET_NYM] nodeName = self.getNodeName(nodeNym) # TODO: Check if new HA is same as old HA and only update if # new HA is different. if nodeName == self.name: - logger.debug("{} clearing local data in keep". - format(self.node.nodestack.name)) - self.node.nodestack.keep.clearLocalData() - logger.debug("{} clearing local data in keep". - format(self.node.clientstack.name)) - self.node.clientstack.keep.clearLocalData() + self.node.nodestack.onHostAddressChanged() + self.node.clientstack.onHostAddressChanged() else: rid = self.stackHaChanged(txn, nodeName, self.node) if rid: self.node.nodestack.outBoxes.pop(rid, None) # self.node.sendPoolInfoToClients(txn) - self.doElectionIfNeeded(nodeName) + self.node.startViewChangeIfPrimaryWentOffline([nodeName]) def nodeKeysChanged(self, txn): # TODO: if the node whose keys are being changed is primary for any @@ -220,14 +212,14 @@ def nodeKeysChanged(self, txn): if rid: self.node.nodestack.outBoxes.pop(rid, None) # self.node.sendPoolInfoToClients(txn) - self.doElectionIfNeeded(nodeName) + self.node.startViewChangeIfPrimaryWentOffline([nodeName]) def nodeServicesChanged(self, txn): nodeNym = txn[TARGET_NYM] _, nodeInfo = self.getNodeInfoFromLedger(nodeNym) nodeName = nodeInfo[DATA][ALIAS] - oldServices = set(nodeInfo[DATA][SERVICES]) - newServices = set(txn[DATA][SERVICES]) + oldServices = set(nodeInfo[DATA].get(SERVICES, [])) + newServices = set(txn[DATA].get(SERVICES, [])) if oldServices == newServices: logger.debug("Node {} not changing {} since it is same as existing" .format(nodeNym, SERVICES)) @@ -252,7 +244,7 @@ def nodeServicesChanged(self, txn): format(self, nodeName)) self.node.nodeLeft(txn) - self.doElectionIfNeeded(nodeName) + self.node.startViewChangeIfPrimaryWentOffline([nodeName]) def getNodeName(self, nym): # Assuming ALIAS does not change @@ -420,7 +412,7 @@ def getNodeStackParams(name, nodeRegistry: Dict[str, HA], nstack = dict(name=name, ha=ha, main=True, - auto=AutoMode.never) + auth_mode=AuthMode.RESTRICTED.value) if basedirpath: nstack['basedirpath'] = basedirpath @@ -451,7 +443,7 @@ def getClientStackParams(name, nodeRegistry: Dict[str, HA], cliname, cstack = dict(name=cliname, ha=cliha, main=True, - auto=AutoMode.always) + auth_mode=AuthMode.ALLOW_ANY.value) if basedirpath: cstack['basedirpath'] = basedirpath diff --git a/plenum/server/primary_decider.py b/plenum/server/primary_decider.py index 236b995244..fbd6cf4178 100644 --- a/plenum/server/primary_decider.py +++ b/plenum/server/primary_decider.py @@ -1,6 +1,6 @@ from collections import deque -from plenum.common.util import MessageProcessor +from plenum.common.message_processor import MessageProcessor from plenum.server.has_action_queue import HasActionQueue diff --git a/plenum/server/primary_elector.py b/plenum/server/primary_elector.py index b5603e171e..516b166f22 100644 --- a/plenum/server/primary_elector.py +++ b/plenum/server/primary_elector.py @@ -7,7 +7,7 @@ from plenum.common.types import Nomination, Reelection, Primary, f from plenum.common.util import mostCommonElement, getQuorum -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.server import replica from plenum.server.primary_decider import PrimaryDecider from plenum.server.router import Router @@ -225,7 +225,7 @@ def nominateRandomReplica(self): if r.isPrimary is None] if undecideds: chosen = random.choice(undecideds) - logger.debug("Node {} does not have a primary, " + logger.debug("{} does not have a primary, " "replicas {} are undecided, " "choosing {} to nominate". format(self, undecideds, chosen)) @@ -234,7 +234,7 @@ def nominateRandomReplica(self): self.replicaNominatedForItself = chosen self._schedule(partial(self.nominateReplica, chosen)) else: - logger.debug("Node {} does not have a primary, " + logger.debug("{} does not have a primary, " "but elections for all {} instances " "have been decided". format(self, len(self.replicas))) @@ -380,8 +380,8 @@ def processPrimary(self, prim: Primary, sender: str) -> None: logger.debug) else: logger.debug( - "{} received {} but does it not have primary quorum yet" - .format(self.name, prim)) + "{} received {} but does it not have primary quorum " + "yet".format(self.name, prim)) else: self.discard(prim, "already got primary declaration from {}". @@ -403,8 +403,8 @@ def processReelection(self, reelection: Reelection, sender: str): :param reelection: the reelection request :param sender: name of the node from which the reelection was sent """ - logger.debug( - "{}'s elector started processing reelection msg".format(self.name)) + logger.debug("{}'s elector started processing reelection msg". + format(self.name)) # Check for election round number to discard any previous # reelection round message instId = reelection.instId @@ -434,8 +434,8 @@ def processReelection(self, reelection: Reelection, sender: str): # turn out to be malicious and send re-election frequently if self.hasReelectionQuorum(instId): - logger.debug("{} achieved reelection quorum".format(replica), - extra={"cli": True}) + logger.debug("{} achieved reelection quorum". + format(replica), extra={"cli": True}) # Need to find the most frequent tie reported to avoid `tie`s # from malicious nodes. Since lists are not hashable so # converting each tie(a list of node names) to a tuple. @@ -445,21 +445,21 @@ def processReelection(self, reelection: Reelection, sender: str): self.setElectionDefaults(instId) - # There was a tie among this and some other node(s), so do a - # random wait - if replica.name in tieAmong: - # Try to nominate self after a random delay but dont block - # until that delay and because a nominate from another - # node might be sent - self._schedule(partial(self.nominateReplica, instId), - random.randint(1, 3)) - else: - # Now try to nominate self again as there is a reelection - self.nominateReplica(instId) + if not self.hasPrimaryReplica: + # There was a tie among this and some other node(s), so do a + # random wait + if replica.name in tieAmong: + # Try to nominate self after a random delay but dont block + # until that delay and because a nominate from another + # node might be sent + self._schedule(partial(self.nominateReplica, instId), + random.randint(1, 3)) + else: + # Now try to nominate self again as there is a reelection + self.nominateReplica(instId) else: - logger.debug( - "{} does not have re-election quorum yet. Got only {}".format( - replica, len(self.reElectionProposals[instId]))) + logger.debug("{} does not have re-election quorum yet. " + "Got only {}".format(replica, len(self.reElectionProposals[instId]))) else: self.discard(reelection, "already got re-election proposal from {}". @@ -498,9 +498,9 @@ def hasPrimaryQuorum(self, instId: int) -> bool: q = self.quorum result = pd >= q if result: - logger.trace("{} primary declarations {} meet required quorum {} " - "for instance id {}".format(self.node.replicas[instId], - pd, q, instId)) + logger.trace("{} primary declarations {} meet required " + "quorum {} for instance id {}". + format(self.node.replicas[instId], pd, q, instId)) return result def hasNominationsFromAll(self, instId: int) -> bool: @@ -545,7 +545,8 @@ def decidePrimary(self, instId: int): return if self.hasNominationQuorum(instId): - logger.debug("{} has got nomination quorum now".format(replica)) + logger.debug("{} has got nomination quorum now". + format(replica)) primaryCandidates = self.getPrimaryCandidates(instId) # In case of one clear winner @@ -554,29 +555,30 @@ def decidePrimary(self, instId: int): if self.hasNominationsFromAll(instId) or ( self.scheduledPrimaryDecisions[instId] is not None and self.hasPrimaryDecisionTimerExpired(instId)): - logger.debug( - "{} has nominations from all so sending primary".format( - replica)) + logger.debug("{} has nominations from all so sending " + "primary".format(replica)) self.sendPrimary(instId, primaryName) else: votesNeeded = math.ceil((self.nodeCount + 1) / 2.0) if votes >= votesNeeded or ( self.scheduledPrimaryDecisions[instId] is not None and self.hasPrimaryDecisionTimerExpired(instId)): - logger.debug( - "{} does not have nominations from all but " - "has {} votes for {} so sending primary" - .format(replica, votes, primaryName)) + logger.debug("{} does not have nominations from " + "all but has {} votes for {} so sending " + "primary". + format(replica, votes, primaryName)) self.sendPrimary(instId, primaryName) return else: - logger.debug( - "{} has {} nominations for {}, but needs {}". - format(replica, votes, primaryName, votesNeeded)) + logger.debug("{} has {} nominations for {}, but " + "needs {}".format(replica, votes, + primaryName, + votesNeeded)) self.schedulePrimaryDecision(instId) return else: - logger.debug("{} has {} nominations. Attempting reelection". + logger.debug("{} has {} nominations. Attempting " + "reelection". format(replica, self.nominations[instId])) if self.hasNominationsFromAll(instId) or ( self.scheduledPrimaryDecisions[instId] is not None and @@ -593,7 +595,8 @@ def decidePrimary(self, instId: int): self.schedulePrimaryDecision(instId) else: - logger.debug("{} has not got nomination quorum yet".format(replica)) + logger.debug("{} has not got nomination quorum yet". + format(replica)) def sendNomination(self, name: str, instId: int, viewNo: int): """ @@ -616,8 +619,7 @@ def sendPrimary(self, instId: int, primaryName: str): self.primaryDeclarations[instId][replica.name] = primaryName self.scheduledPrimaryDecisions[instId] = None logger.debug("{} declaring primary as: {} on the basis of {}". - format(replica, primaryName, - self.nominations[instId])) + format(replica, primaryName, self.nominations[instId])) self.send(Primary(primaryName, instId, self.viewNo)) def sendReelection(self, instId: int, @@ -636,7 +638,8 @@ def sendReelection(self, instId: int, self.scheduledPrimaryDecisions[instId] = None logger.debug("{} declaring reelection round {} for: {}". format(replica.name, - self.reElectionRounds[instId], primaryCandidates)) + self.reElectionRounds[instId], + primaryCandidates)) self.send( Reelection(instId, self.reElectionRounds[instId], primaryCandidates, self.viewNo)) @@ -662,13 +665,11 @@ def schedulePrimaryDecision(self, instId: int): self._schedule(partial(self.decidePrimary, instId), (1 * self.nodeCount)) else: - logger.debug( - "{} already scheduled primary decision".format(replica)) + logger.debug("{} already scheduled primary decision". + format(replica)) if self.hasPrimaryDecisionTimerExpired(instId): - logger.debug( - "{} executing already scheduled primary decision " - "since timer expired" - .format(replica)) + logger.debug("{} executing already scheduled primary " + "decision since timer expired".format(replica)) self._schedule(partial(self.decidePrimary, instId)) def hasPrimaryDecisionTimerExpired(self, instId: int) -> bool: @@ -723,14 +724,13 @@ def viewChanged(self, viewNo: int): # Schedule execution of any pending msgs from the new view if viewNo in self.pendingMsgsForViews: - logger.debug("Pending election messages found for view {}". - format(viewNo)) + logger.debug("Pending election messages found for " + "view {}".format(viewNo)) pendingMsgs = self.pendingMsgsForViews.pop(viewNo) self.inBox.extendleft(pendingMsgs) else: - logger.debug( - "{} found no pending election messages for view {}". - format(self.name, viewNo)) + logger.debug("{} found no pending election messages for " + "view {}".format(self.name, viewNo)) self.nominateRandomReplica() else: diff --git a/plenum/server/primary_selector.py b/plenum/server/primary_selector.py index ea13361991..50b5605b03 100644 --- a/plenum/server/primary_selector.py +++ b/plenum/server/primary_selector.py @@ -1,4 +1,4 @@ -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.server import replica from plenum.server.primary_decider import PrimaryDecider @@ -13,7 +13,14 @@ def __init__(self, node): self.nodeNamesByRank = sorted(self.nodeNames) def decidePrimaries(self): # overridden method of PrimaryDecider - self.startSelection() + self.scheduleSelection() + + def scheduleSelection(self): + """ + Schedule election at some time in the future. Currently the election + starts immediately. + """ + self._schedule(self.startSelection) def startSelection(self): logger.debug("{} starting selection".format(self)) diff --git a/plenum/server/propagator.py b/plenum/server/propagator.py index 6cb2897fbd..2f83fe1962 100644 --- a/plenum/server/propagator.py +++ b/plenum/server/propagator.py @@ -2,7 +2,7 @@ from plenum.common.types import Propagate from plenum.common.request import Request -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.common.util import checkIfMoreThanFSameItems logger = getlogger() @@ -15,6 +15,9 @@ class ReqState: def __init__(self, request: Request): self.request = request self.forwarded = False + # forwardedTo helps in finding to how many replicas has this request + # been forwarded to, helps in garbage collection, see `gc` of `Replica` + self.forwardedTo = 0 self.propagates = {} self.finalised = None @@ -51,11 +54,12 @@ def forwarded(self, req: Request) -> bool: """ return self[req.key].forwarded - def flagAsForwarded(self, req: Request): + def flagAsForwarded(self, req: Request, to: int): """ Set the given request's forwarded attribute to True """ self[req.key].forwarded = True + self[req.key].forwardedTo = to def addPropagate(self, req: Request, sender: str): """ @@ -129,7 +133,7 @@ def propagate(self, request: Request, clientName): self.send(propagate) @staticmethod - def createPropagate(request: Union[Request, dict], clientName) -> Propagate: + def createPropagate(request: Union[Request, dict], identifier) -> Propagate: """ Create a new PROPAGATE for the given REQUEST. @@ -140,9 +144,11 @@ def createPropagate(request: Union[Request, dict], clientName) -> Propagate: logger.error("Request not formatted properly to create propagate") return logger.debug("Creating PROPAGATE for REQUEST {}".format(request)) - request = request.__getstate__() if isinstance(request, Request) else \ + request = request.as_dict if isinstance(request, Request) else \ request - return Propagate(request, clientName) + if isinstance(identifier, bytes): + identifier = identifier.decode() + return Propagate(request, identifier) # noinspection PyUnresolvedReferences def canForward(self, request: Request) -> bool: @@ -176,7 +182,7 @@ def forward(self, request: Request): for repQueue in self.msgsToReplicas: repQueue.append(self.requests[key].finalised.reqDigest) self.monitor.requestUnOrdered(*key) - self.requests.flagAsForwarded(request) + self.requests.flagAsForwarded(request, len(self.msgsToReplicas)) # noinspection PyUnresolvedReferences def recordAndPropagate(self, request: Request, clientName): diff --git a/plenum/server/replica.py b/plenum/server/replica.py index 1aa6e9e181..f78e06b060 100644 --- a/plenum/server/replica.py +++ b/plenum/server/replica.py @@ -2,10 +2,11 @@ from collections import deque, OrderedDict from enum import IntEnum from enum import unique -from typing import Dict +from typing import Dict, Union from typing import Optional, Any from typing import Set from typing import Tuple +from hashlib import sha256 from orderedset import OrderedSet from sortedcontainers import SortedDict @@ -18,8 +19,9 @@ Prepare, Commit, Ordered, ThreePhaseMsg, ThreePhaseKey, ThreePCState, \ CheckpointState, Checkpoint from plenum.common.request import ReqDigest -from plenum.common.util import MessageProcessor, updateNamedTuple -from plenum.common.log import getlogger +from plenum.common.util import updateNamedTuple +from plenum.common.message_processor import MessageProcessor +from stp_core.common.log import getlogger from plenum.server.has_action_queue import HasActionQueue from plenum.server.models import Commits, Prepares from plenum.server.router import Router @@ -58,6 +60,9 @@ def inc(self, key): """ self.stats[key] += 1 + def get(self, key): + return self.stats[key] + def __repr__(self): return OrderedDict((TPCStat(k).name, v) for k, v in self.stats.items()) @@ -170,8 +175,8 @@ def __init__(self, node: 'plenum.server.node.Node', instId: int, self.prepares = Prepares() # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], Set[str]]] - self.commits = Commits() # type: Dict[Tuple[int, int], - # Tuple[Tuple[str, int], Set[str]]] + self.commits = Commits() + # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], Set[str]]] # Set of tuples to keep track of ordered requests. Each tuple is # (viewNo, ppSeqNo) @@ -198,13 +203,15 @@ def __init__(self, node: 'plenum.server.node.Node', instId: int, self.checkpoints = SortedDict(lambda k: k[0]) + self.stashedRecvdCheckpoints = {} # type: Dict[Tuple, + # Dict[str, Checkpoint]] + self.stashingWhileOutsideWaterMarks = deque() # Low water mark self._h = 0 # type: int - - # High water mark - self.H = self._h + self.config.LOG_SIZE # type: int + # Set high water mark (`H`) too + self.h = 0 # type: int self.lastPrePrepareSeqNo = self.h # type: int @@ -216,6 +223,7 @@ def h(self) -> int: def h(self, n): self._h = n self.H = self._h + self.config.LOG_SIZE + logger.debug('{} set watermarks as {} {}'.format(self, self.h, self.H)) @property def requests(self): @@ -462,7 +470,12 @@ def dispatchThreePhaseMsg(self, msg: ThreePhaseMsg, sender: str) -> Any: :param sender: the name of the node that sent this request """ senderRep = self.generateName(sender, self.instId) - if self.isPpSeqNoAcceptable(msg.ppSeqNo): + if self.isPpSeqNoStable(msg.ppSeqNo): + self.discard(msg, + "achieved stable checkpoint for 3 phase message", + logger.debug) + return + if self.isPpSeqNoBetweenWaterMarks(msg.ppSeqNo): try: self.threePhaseRouter.handleSync((msg, senderRep)) except SuspiciousNode as ex: @@ -471,7 +484,7 @@ def dispatchThreePhaseMsg(self, msg: ThreePhaseMsg, sender: str) -> Any: logger.debug("{} stashing 3 phase message {} since ppSeqNo {} is " "not between {} and {}". format(self, msg, msg.ppSeqNo, self.h, self.H)) - self.stashingWhileOutsideWaterMarks.append((msg, sender)) + self.stashOutsideWatermarks((msg, sender)) def processReqDigest(self, rd: ReqDigest): """ @@ -548,6 +561,11 @@ def processPrepare(self, prepare: Prepare, sender: str) -> None: # TODO move this try/except up higher logger.debug("{} received PREPARE{} from {}". format(self, (prepare.viewNo, prepare.ppSeqNo), sender)) + if self.isPpSeqNoStable(prepare.ppSeqNo): + self.discard(prepare, + "achieved stable checkpoint for Preapre", + logger.debug) + return try: if self.isValidPrepare(prepare, sender): self.addToPrepares(prepare, sender) @@ -572,6 +590,12 @@ def processCommit(self, commit: Commit, sender: str) -> None: """ logger.debug("{} received COMMIT {} from {}". format(self, commit, sender)) + if self.isPpSeqNoStable(commit.ppSeqNo): + self.discard(commit, + "achieved stable checkpoint for Commit", + logger.debug) + return + if self.isValidCommit(commit, sender): self.stats.inc(TPCStat.CommitRcvd) self.addToCommits(commit, sender) @@ -616,7 +640,7 @@ def doPrePrepare(self, reqDigest: ReqDigest) -> None: "than high water mark {}". format(self, (self.viewNo, self.lastPrePrepareSeqNo+1), self.H)) - self.stashingWhileOutsideWaterMarks.append(reqDigest) + self.stashOutsideWatermarks(reqDigest) return self.lastPrePrepareSeqNo += 1 tm = time.time()*1000 @@ -767,7 +791,7 @@ def isValidPrepare(self, prepare: Prepare, sender: str) -> bool: raise SuspiciousNode(sender, Suspicions.DUPLICATE_PR_SENT, prepare) # If PRE-PREPARE was not sent for this PREPARE, certainly # malicious behavior - elif key not in ppReqs: + if key not in ppReqs: raise SuspiciousNode(sender, Suspicions.UNKNOWN_PR_SENT, prepare) elif prepare.digest != self.requests.digest(ppReqs[key][0]): raise SuspiciousNode(sender, Suspicions.PR_DIGEST_WRONG, prepare) @@ -826,6 +850,7 @@ def isValidCommit(self, commit: Commit, sender: str) -> bool: elif self.commits.hasCommitFrom(commit, sender): raise SuspiciousNode(sender, Suspicions.DUPLICATE_CM_SENT, commit) elif commit.digest != self.getDigestFor3PhaseKey(ThreePhaseKey(*key)): + raise SuspiciousNode(sender, Suspicions.CM_DIGEST_WRONG, commit) elif key in ppReqs and commit.ppTime != ppReqs[key][1]: raise SuspiciousNode(sender, Suspicions.CM_TIME_WRONG, @@ -880,10 +905,20 @@ def canOrder(self, commit: Commit) -> Tuple[bool, Optional[str]]: return True, None def isNextInOrdering(self, commit: Commit): + # TODO: This method does a lot of work, choose correct data + # structures to make it efficient. viewNo, ppSeqNo = commit.viewNo, commit.ppSeqNo + if self.ordered and self.ordered[-1] == (viewNo, ppSeqNo-1): return True - for (v, p) in self.commits: + + # if some PREPAREs/COMMITs were completely missed in the same view + toCheck = set() + toCheck.update(set(self.sentPrePrepares.keys())) + toCheck.update(set(self.prePrepares.keys())) + toCheck.update(set(self.prepares.keys())) + toCheck.update(set(self.commits.keys())) + for (v, p) in toCheck: if v < viewNo: # Have commits from previous view that are unordered. # TODO: Question: would commits be always ordered, what if @@ -896,13 +931,14 @@ def isNextInOrdering(self, commit: Commit): # TODO: Revisit PBFT paper, how to make sure that last request of the # last view has been ordered? Need change in `VIEW CHANGE` mechanism. - # Somehow view change needs to communicate what the last request was. - # Also what if some COMMITs were completely missed in the same view + # View change needs to communicate what the last request was. return True def orderStashedCommits(self): # TODO: What if the first few commits were out of order and stashed? # `self.ordered` would be empty + logger.debug('{} trying to order from stashed commits. {} {}'. + format(self, self.ordered, self.stashedCommitsForOrdering)) if self.ordered: lastOrdered = self.ordered[-1] vToRemove = set() @@ -930,8 +966,6 @@ def orderStashedCommits(self): for v in vToRemove: del self.stashedCommitsForOrdering[v] - # if self.stashedCommitsForOrdering: - # self._schedule(self.orderStashedCommits, 2) if not self.stashedCommitsForOrdering: self.stopRepeating(self.orderStashedCommits) @@ -945,7 +979,7 @@ def isLowestCommitInView(self, commit): ppSeqNos.append(p) return min(ppSeqNos) == commit.ppSeqNo if ppSeqNos else True - def tryOrdering(self, commit: Commit) -> None: + def tryOrdering(self, commit: Commit) -> bool: """ Attempt to send an ORDERED request for the specified COMMIT to the node. @@ -959,7 +993,7 @@ def tryOrdering(self, commit: Commit) -> None: if not digest: logger.error("{} did not find digest for {}, request key {}". format(self, key, reqKey)) - return + return False self.doOrder(*key, *reqKey, digest, commit.ppTime) return True @@ -980,35 +1014,30 @@ def doOrder(self, viewNo, ppSeqNo, identifier, reqId, digest, ppTime): self.addToCheckpoint(ppSeqNo, digest) def processCheckpoint(self, msg: Checkpoint, sender: str): - if self.checkpoints: - seqNo = msg.seqNo - _, firstChk = self.firstCheckPoint - if firstChk.isStable: - if firstChk.seqNo == seqNo: - self.discard(msg, reason="Checkpoint already stable", - logMethod=logger.debug) - return - if firstChk.seqNo > seqNo: - self.discard(msg, reason="Higher stable checkpoint present", - logMethod=logger.debug) - return - for state in self.checkpoints.values(): - if state.seqNo == seqNo: - if state.digest == msg.digest: - state.receivedDigests[sender] = msg.digest - break - else: - logger.error("{} received an incorrect digest {} for " - "checkpoint {} from {}".format(self, - msg.digest, - seqNo, - sender)) - return - if len(state.receivedDigests) == 2*self.f: - self.markCheckPointStable(msg.seqNo) + logger.debug('{} received checkpoint {} from {}'. + format(self, msg, sender)) + seqNoEnd = msg.seqNoEnd + if self.isPpSeqNoStable(seqNoEnd): + self.discard(msg, reason="Checkpoint already stable", + logMethod=logger.debug) + return + + seqNoStart = msg.seqNoStart + key = (seqNoStart, seqNoEnd) + if key in self.checkpoints and self.checkpoints[key].digest: + ckState = self.checkpoints[key] + if ckState.digest == msg.digest: + ckState.receivedDigests[sender] = msg.digest + else: + logger.error("{} received an incorrect digest {} for " + "checkpoint {} from {}".format(self, + msg.digest, + key, + sender)) + return + self.checkIfCheckpointStable(key) else: - self.discard(msg, reason="No checkpoints present to tally", - logMethod=logger.warn) + self.stashCheckpoint(msg, sender) def _newCheckpointState(self, ppSeqNo, digest) -> CheckpointState: s, e = ppSeqNo, ppSeqNo + self.config.CHK_FREQ - 1 @@ -1028,14 +1057,18 @@ def addToCheckpoint(self, ppSeqNo, digest): break else: state = self._newCheckpointState(ppSeqNo, digest) - s, e = ppSeqNo, ppSeqNo + self.config.CHK_FREQ + s, e = ppSeqNo, ppSeqNo + self.config.CHK_FREQ - 1 if len(state.digests) == self.config.CHK_FREQ: - state = updateNamedTuple(state, digest=serialize(state.digests), + state = updateNamedTuple(state, + digest=sha256( + serialize(state.digests).encode() + ).hexdigest(), digests=[]) self.checkpoints[s, e] = state - self.send(Checkpoint(self.instId, self.viewNo, ppSeqNo, + self.send(Checkpoint(self.instId, self.viewNo, s, e, state.digest)) + self.processStashedCheckpoints((s, e)) def markCheckPointStable(self, seqNo): previousCheckpoints = [] @@ -1058,6 +1091,32 @@ def markCheckPointStable(self, seqNo): logger.debug("{} marked stable checkpoint {}".format(self, (s, e))) self.processStashedMsgsForNewWaterMarks() + def checkIfCheckpointStable(self, key: Tuple[int, int]): + ckState = self.checkpoints[key] + if len(ckState.receivedDigests) == 2 * self.f: + self.markCheckPointStable(ckState.seqNo) + return True + else: + logger.debug('{} has state.receivedDigests as {}'. + format(self, ckState.receivedDigests.keys())) + return False + + def stashCheckpoint(self, ck: Checkpoint, sender: str): + seqNoStart, seqNoEnd = ck.seqNoStart, ck.seqNoEnd + if (seqNoStart, seqNoEnd) not in self.stashedRecvdCheckpoints: + self.stashedRecvdCheckpoints[seqNoStart, seqNoEnd] = {} + self.stashedRecvdCheckpoints[seqNoStart, seqNoEnd][sender] = ck + + def processStashedCheckpoints(self, key): + i = 0 + if key in self.stashedRecvdCheckpoints: + for sender, ck in self.stashedRecvdCheckpoints[key].items(): + self.processCheckpoint(ck, sender) + i += 1 + logger.debug('{} processed {} stashed checkpoints for {}'. + format(self, i, key)) + return i + def gc(self, tillSeqNo): logger.debug("{} cleaning up till {}".format(self, tillSeqNo)) tpcKeys = set() @@ -1081,15 +1140,27 @@ def gc(self, tillSeqNo): self.prePrepares.pop(k, None) self.prepares.pop(k, None) self.commits.pop(k, None) - if k in self.ordered: - self.ordered.remove(k) + # if k in self.ordered: + # self.ordered.remove(k) for k in reqKeys: - self.requests.pop(k, None) + self.requests[k].forwardedTo -= 1 + if self.requests[k].forwardedTo == 0: + logger.debug('{} clearing requests {} from previous checkpoints'. + format(self, len(reqKeys))) + self.requests.pop(k) + + def stashOutsideWatermarks(self, item: Union[ReqDigest, Tuple]): + self.stashingWhileOutsideWaterMarks.append(item) def processStashedMsgsForNewWaterMarks(self): - while self.stashingWhileOutsideWaterMarks: - item = self.stashingWhileOutsideWaterMarks.pop() + # `stashingWhileOutsideWaterMarks` can grow from methods called in the + # loop below, so `stashingWhileOutsideWaterMarks` might never + # become empty during the execution of this method resulting + # in an infinite loop + itemsToConsume = len(self.stashingWhileOutsideWaterMarks) + while itemsToConsume: + item = self.stashingWhileOutsideWaterMarks.popleft() logger.debug("{} processing stashed item {} after new stable " "checkpoint".format(self, item)) @@ -1101,22 +1172,45 @@ def processStashedMsgsForNewWaterMarks(self): logger.error("{} cannot process {} " "from stashingWhileOutsideWaterMarks". format(self, item)) + itemsToConsume -= 1 + + @staticmethod + def peekitem(d, i): + # Adding it since its not present in version supported by + # Ubuntu repositories. + key = d._list[i] + return key, d[key] @property def firstCheckPoint(self) -> Tuple[Tuple[int, int], CheckpointState]: if not self.checkpoints: return None else: - return self.checkpoints.peekitem(0) + return self.peekitem(self.checkpoints, 0) + # return self.checkpoints.peekitem(0) @property def lastCheckPoint(self) -> Tuple[Tuple[int, int], CheckpointState]: if not self.checkpoints: return None else: - return self.checkpoints.peekitem(-1) + return self.peekitem(self.checkpoints, -1) + # return self.checkpoints.peekitem(-1) + + def isPpSeqNoStable(self, ppSeqNo): + """ + :param ppSeqNo: + :return: True if ppSeqNo is less than or equal to last stable + checkpoint, false otherwise + """ + ck = self.firstCheckPoint + if ck: + _, ckState = ck + return ckState.isStable and ckState.seqNo >= ppSeqNo + else: + return False - def isPpSeqNoAcceptable(self, ppSeqNo: int): + def isPpSeqNoBetweenWaterMarks(self, ppSeqNo: int): return self.h < ppSeqNo <= self.H def addToOrdered(self, viewNo: int, ppSeqNo: int): diff --git a/plenum/test/blacklist/test_blacklist_client.py b/plenum/test/blacklist/test_blacklist_client.py index 3fc8b014ae..98332c8f70 100644 --- a/plenum/test/blacklist/test_blacklist_client.py +++ b/plenum/test/blacklist/test_blacklist_client.py @@ -1,6 +1,7 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually +from plenum.test import waits from plenum.test.malicious_behaviors_client import makeClientFaulty, \ sendsUnsignedRequest @@ -21,4 +22,5 @@ def chk(): for node in nodeSet: assert not node.isClientBlacklisted(client1.name) - looper.run(eventually(chk, retryWait=1, timeout=3)) + timeout = waits.expectedClientConnectionTimeout(nodeSet.f) + looper.run(eventually(chk, retryWait=1, timeout=timeout)) diff --git a/plenum/test/blacklist/test_blacklist_node_on_multiple_nominations.py b/plenum/test/blacklist/test_blacklist_node_on_multiple_nominations.py index 25f4701119..2192deac12 100644 --- a/plenum/test/blacklist/test_blacklist_node_on_multiple_nominations.py +++ b/plenum/test/blacklist/test_blacklist_node_on_multiple_nominations.py @@ -1,7 +1,8 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.types import Nomination +from plenum.test import waits whitelist = ['already got nomination', 'doing nothing for now'] @@ -25,4 +26,5 @@ def chk(): for node in A, C, D: assert node.isNodeBlacklisted(B.name) - looper.run(eventually(chk, retryWait=1, timeout=3)) + timeout = waits.expectedNominationTimeout(len(nodeSet.nodes)) + looper.run(eventually(chk, retryWait=1, timeout=timeout)) diff --git a/plenum/test/blacklist/test_blacklist_node_on_multiple_primary_declarations.py b/plenum/test/blacklist/test_blacklist_node_on_multiple_primary_declarations.py index 479545099b..8ee3b4e576 100644 --- a/plenum/test/blacklist/test_blacklist_node_on_multiple_primary_declarations.py +++ b/plenum/test/blacklist/test_blacklist_node_on_multiple_primary_declarations.py @@ -1,7 +1,8 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.types import Primary +from plenum.test import waits whitelist = ['got primary declaration', 'doing nothing for now', @@ -29,4 +30,5 @@ def chk(): for node in A, C, D: assert node.isNodeBlacklisted(B.name) - looper.run(eventually(chk, retryWait=1, timeout=3)) + timeout = waits.expectedNominationTimeout(len(nodeSet.nodes)) + looper.run(eventually(chk, retryWait=1, timeout=timeout)) diff --git a/plenum/test/checkpoints/conftest.py b/plenum/test/checkpoints/conftest.py index 1884911a93..ecbeff66ec 100644 --- a/plenum/test/checkpoints/conftest.py +++ b/plenum/test/checkpoints/conftest.py @@ -3,18 +3,20 @@ from plenum.test.pool_transactions.conftest import looper, clientAndWallet1, \ client1, wallet1, client1Connected -CHK_FREQ = 3 +CHK_FREQ = 5 @pytest.fixture(scope="module") def chkFreqPatched(tconf, request): oldChkFreq = tconf.CHK_FREQ + oldLogSize = tconf.LOG_SIZE + tconf.CHK_FREQ = CHK_FREQ tconf.LOG_SIZE = 3*tconf.CHK_FREQ def reset(): tconf.CHK_FREQ = oldChkFreq - tconf.LOG_SIZE = 3*tconf.CHK_FREQ + tconf.LOG_SIZE = oldLogSize request.addfinalizer(reset) diff --git a/plenum/test/checkpoints/helper.py b/plenum/test/checkpoints/helper.py index 85b76a1faf..d9e9ddc01d 100644 --- a/plenum/test/checkpoints/helper.py +++ b/plenum/test/checkpoints/helper.py @@ -1,9 +1,10 @@ def chkChkpoints(nodes, total: int, stableIndex: int=None): for node in nodes: for r in node.replicas: - assert len(r.checkpoints) == total + assert len(r.checkpoints) == total, '{} checkpoints {}'.\ + format(r, len(r.checkpoints)) if stableIndex is not None: - assert r.checkpoints.values()[stableIndex].isStable + assert r.checkpoints.values()[stableIndex].isStable, r.name else: for state in r.checkpoints.values(): assert not state.isStable diff --git a/plenum/test/checkpoints/test_basic_checkpointing.py b/plenum/test/checkpoints/test_basic_checkpointing.py index 11b9738ac7..6d752f3b8e 100644 --- a/plenum/test/checkpoints/test_basic_checkpointing.py +++ b/plenum/test/checkpoints/test_basic_checkpointing.py @@ -1,4 +1,5 @@ -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually +from plenum.test import waits from plenum.test.checkpoints.conftest import CHK_FREQ from plenum.test.checkpoints.helper import chkChkpoints from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies @@ -18,7 +19,8 @@ def testCheckpointCreated(chkFreqPatched, looper, txnPoolNodeSet, client1, sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1, 1) - looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, 0, retryWait=1)) + timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) + looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, 0, retryWait=1, timeout=timeout)) def testOldCheckpointDeleted(chkFreqPatched, looper, txnPoolNodeSet, client1, @@ -32,4 +34,5 @@ def testOldCheckpointDeleted(chkFreqPatched, looper, txnPoolNodeSet, client1, sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1, 1) - looper.run(eventually(chkChkpoints, txnPoolNodeSet, 2, 0, retryWait=1)) + timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) + looper.run(eventually(chkChkpoints, txnPoolNodeSet, 2, 0, retryWait=1, timeout=timeout)) diff --git a/plenum/test/checkpoints/test_discard_old_checkpoint_messages.py b/plenum/test/checkpoints/test_discard_old_checkpoint_messages.py index a8c93f9f71..b954a02540 100644 --- a/plenum/test/checkpoints/test_discard_old_checkpoint_messages.py +++ b/plenum/test/checkpoints/test_discard_old_checkpoint_messages.py @@ -1,4 +1,4 @@ -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.types import Checkpoint from plenum.test.checkpoints.conftest import CHK_FREQ from plenum.test.checkpoints.helper import chkChkpoints @@ -14,8 +14,7 @@ def testDiscardCheckpointMsgForStableCheckpoint(chkFreqPatched, looper, node1 = txnPoolNodeSet[0] rep1 = node1.replicas[0] _, stableChk = rep1.firstCheckPoint - oldChkpointMsg = Checkpoint(rep1.instId, rep1.viewNo, stableChk.seqNo, - stableChk.digest) + oldChkpointMsg = Checkpoint(rep1.instId, rep1.viewNo, *_, stableChk.digest) rep1.send(oldChkpointMsg) recvReplicas = [n.replicas[0] for n in txnPoolNodeSet[1:]] looper.run(eventually(checkDiscardMsg, recvReplicas, oldChkpointMsg, diff --git a/plenum/test/checkpoints/test_message_outside_watermark.py b/plenum/test/checkpoints/test_message_outside_watermark.py new file mode 100644 index 0000000000..999a2c0e0a --- /dev/null +++ b/plenum/test/checkpoints/test_message_outside_watermark.py @@ -0,0 +1,58 @@ +from plenum.test import waits +from plenum.test.checkpoints.conftest import CHK_FREQ +from plenum.test.checkpoints.helper import chkChkpoints +from plenum.test.delayers import ppDelay +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, \ + countDiscarded +from plenum.test.node_catchup.helper import checkNodeLedgersForEquality +from plenum.test.test_node import getNonPrimaryReplicas, TestReplica +from stp_core.loop.eventually import eventually + + +def testNonPrimaryRecvs3PhaseMessageOutsideWatermarks(chkFreqPatched, looper, + txnPoolNodeSet, client1, + wallet1, client1Connected): + """ + A node is slow in processing PRE-PREPAREs such that lot of requests happen + and the slow node has started getting 3 phase messages outside of it + watermarks. Check that it queues up requests outside watermarks and once it + has received stable checkpoint it processes more requests. It sends other + nodes 3 phase messages older than their stable checkpoint so they should + discard them. + """ + delay = 15 + instId = 1 + reqsToSend = chkFreqPatched.LOG_SIZE + 2 + npr = getNonPrimaryReplicas(txnPoolNodeSet, instId) + slowReplica = npr[0] + slowNode = slowReplica.node + slowNode.nodeIbStasher.delay(ppDelay(delay, instId)) + + def discardCounts(replicas, pat): + counts = {} + for r in replicas: + counts[r.name] = countDiscarded(r, pat) + return counts + + oldStashCount = slowReplica.spylog.count(TestReplica.stashOutsideWatermarks.__name__) + oldDiscardCounts = discardCounts([n.replicas[instId] for n in + txnPoolNodeSet if n != slowNode], + 'achieved stable checkpoint') + + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, reqsToSend, 1) + timeout =waits.expectedPoolLedgerCheck(len(txnPoolNodeSet)) + looper.run(eventually(checkNodeLedgersForEquality, slowNode, + *[_ for _ in txnPoolNodeSet if _ != slowNode], + retryWait=1, timeout=timeout)) + newStashCount = slowReplica.spylog.count(TestReplica.stashOutsideWatermarks.__name__) + assert newStashCount > oldStashCount + + def chk(): + counts = discardCounts([n.replicas[instId] for n in + txnPoolNodeSet if n != slowNode], + 'achieved stable checkpoint') + for nm, count in counts.items(): + assert count > oldDiscardCounts[nm] + + timeout = waits.expectedNodeToNodeMessageDeliveryTime() * len(txnPoolNodeSet) + looper.run(eventually(chk, retryWait=1, timeout=timeout)) diff --git a/plenum/test/checkpoints/test_message_outside_watermark1.py b/plenum/test/checkpoints/test_message_outside_watermark1.py new file mode 100644 index 0000000000..ca34741130 --- /dev/null +++ b/plenum/test/checkpoints/test_message_outside_watermark1.py @@ -0,0 +1,36 @@ +from stp_core.loop.eventually import eventually + +from plenum.test import waits +from plenum.test.delayers import ppDelay +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies +from plenum.test.test_node import getNonPrimaryReplicas, getPrimaryReplica + + +def testPrimaryRecvs3PhaseMessageOutsideWatermarks(chkFreqPatched, looper, + txnPoolNodeSet, client1, + wallet1, client1Connected): + """ + One of the primary starts getting lot of requests, more than his log size + and queues up requests since they will go beyond its watermarks. This + happens since other nodes are slow in processing its PRE-PREPARE. + Eventually this primary will send PRE-PREPARE for all requests and those + requests will complete + """ + delay = 10 + instId = 1 + reqsToSend = 2*chkFreqPatched.LOG_SIZE + 1 + npr = getNonPrimaryReplicas(txnPoolNodeSet, instId) + pr = getPrimaryReplica(txnPoolNodeSet, instId) + from plenum.server.replica import TPCStat + orderedCount = pr.stats.get(TPCStat.OrderSent) + + for r in npr: + r.node.nodeIbStasher.delay(ppDelay(delay, instId)) + + def chk(): + assert orderedCount + reqsToSend == pr.stats.get(TPCStat.OrderSent) + + print('Sending {} requests'.format(reqsToSend)) + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, reqsToSend, 1) + # TODO Select or create the timeout from 'waits'. Don't use constant. + looper.run(eventually(chk, retryWait=1, timeout=80)) diff --git a/plenum/test/checkpoints/test_stable_checkpoint.py b/plenum/test/checkpoints/test_stable_checkpoint.py index d48d6e53a8..429ddafddb 100644 --- a/plenum/test/checkpoints/test_stable_checkpoint.py +++ b/plenum/test/checkpoints/test_stable_checkpoint.py @@ -1,4 +1,5 @@ -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually +from plenum.test import waits from plenum.test.checkpoints.conftest import CHK_FREQ from plenum.test.checkpoints.helper import chkChkpoints from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies @@ -10,7 +11,7 @@ def checkRequestCounts(nodes, count): for r in node.replicas: assert len(r.commits) == count assert len(r.prepares) == count - assert len(r.ordered) == count + # assert len(r.ordered) == count def testRequestOlderThanStableCheckpointRemoved(chkFreqPatched, looper, @@ -18,13 +19,16 @@ def testRequestOlderThanStableCheckpointRemoved(chkFreqPatched, looper, wallet1, client1Connected): reqs = sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, CHK_FREQ-1, 1) - looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, retryWait=1)) + timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) + looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, retryWait=1, timeout=timeout)) checkRequestCounts(txnPoolNodeSet, len(reqs)) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1, 1) - looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, 0, retryWait=1)) + + looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, 0, retryWait=1, timeout=timeout)) checkRequestCounts(txnPoolNodeSet, 0) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 3*CHK_FREQ + 1, 1) - looper.run(eventually(chkChkpoints, txnPoolNodeSet, 2, 0, retryWait=1)) + + looper.run(eventually(chkChkpoints, txnPoolNodeSet, 2, 0, retryWait=1, timeout=timeout)) checkRequestCounts(txnPoolNodeSet, 1) diff --git a/plenum/test/checkpoints/test_stable_checkpoint1.py b/plenum/test/checkpoints/test_stable_checkpoint1.py new file mode 100644 index 0000000000..6492f47295 --- /dev/null +++ b/plenum/test/checkpoints/test_stable_checkpoint1.py @@ -0,0 +1,24 @@ +from stp_core.loop.eventually import eventually + +from plenum.test import waits +from plenum.test.checkpoints.conftest import CHK_FREQ +from plenum.test.checkpoints.helper import chkChkpoints +from plenum.test.delayers import ppDelay +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies +from plenum.test.test_node import getPrimaryReplica + + +def testStableCheckpointWhenOneInstanceSlow(chkFreqPatched, looper, + txnPoolNodeSet, client1, + wallet1, client1Connected): + delay = 5 + pr = getPrimaryReplica(txnPoolNodeSet, 1) + slowNode = pr.node + otherNodes = [n for n in txnPoolNodeSet if n != slowNode] + for n in otherNodes: + n.nodeIbStasher.delay(ppDelay(delay, 1)) + + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, CHK_FREQ, 1) + timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) + delay + looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, 0, retryWait=1, + timeout=timeout)) diff --git a/plenum/test/cli/conftest.py b/plenum/test/cli/conftest.py index e28e75901b..c7eabbebff 100644 --- a/plenum/test/cli/conftest.py +++ b/plenum/test/cli/conftest.py @@ -1,14 +1,16 @@ +import warnings from collections import OrderedDict +from itertools import groupby import pytest +from _pytest.recwarn import WarningsRecorder -from plenum.common.eventually import eventually -from plenum.common.looper import Looper -from plenum.common.port_dispenser import genHa +from stp_core.loop.eventually import eventually +from stp_core.loop.looper import Looper from plenum.common.util import adict - -from plenum.test.cli.helper import newCLI, checkAllNodesUp, loadPlugin, \ +from plenum.test.cli.helper import newCLI, waitAllNodesUp, loadPlugin, \ doByCtx +from stp_core.network.port_dispenser import genHa @pytest.yield_fixture(scope="module") @@ -35,13 +37,17 @@ def nodeRegsForCLI(nodeNames): @pytest.fixture("module") def cli(cliLooper, tdir, tdirWithPoolTxns, tdirWithDomainTxns, tdirWithNodeKeepInited): - return newCLI(cliLooper, tdir) + cli = newCLI(cliLooper, tdir) + yield cli + cli.close() @pytest.fixture("module") def aliceCli(cliLooper, tdir, tdirWithPoolTxns, tdirWithDomainTxns, tdirWithNodeKeepInited): - return newCLI(cliLooper, tdir, unique_name='alice') + cli = newCLI(cliLooper, tdir, unique_name='alice') + yield cli + cli.close() @pytest.fixture("module") @@ -52,12 +58,10 @@ def validNodeNames(cli): @pytest.fixture("module") def createAllNodes(request, cli): cli.enterCmd("new node all") - cli.looper.run(eventually(checkAllNodesUp, cli, retryWait=1, timeout=20)) - + waitAllNodesUp(cli) def stopNodes(): for node in cli.nodes.values(): node.stop() - request.addfinalizer(stopNodes) diff --git a/plenum/test/cli/helper.py b/plenum/test/cli/helper.py index 9895c66a6f..03404ff56f 100644 --- a/plenum/test/cli/helper.py +++ b/plenum/test/cli/helper.py @@ -6,20 +6,27 @@ import time +import logging + +import sys + import plenum.cli.cli as cli from plenum.client.wallet import Wallet -from plenum.common.eventually import eventually -from plenum.common.log import getlogger -from plenum.common.util import getMaxFailures, Singleton +from stp_core.common.util import Singleton +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger +from plenum.common.util import getMaxFailures from plenum.test.cli.mock_output import MockOutput from plenum.test.cli.test_keyring import createNewKeyring -from plenum.test.helper import checkSufficientRepliesRecvd +from plenum.test.helper import waitForSufficientRepliesForRequests from plenum.test.spy_helpers import getAllArgs from plenum.test.test_client import TestClient from plenum.test.test_node import TestNode, checkPoolReady from plenum.test.testable import Spyable from pygments.token import Token - +from functools import partial +from plenum.test import waits +from plenum.common import util logger = getlogger() @@ -130,6 +137,10 @@ def lastMsg(self): @Spyable(methods=[cli.Cli.print, cli.Cli.printTokens]) class TestCli(cli.Cli, TestCliCore): + # def __init__(self, *args, **kwargs): + # super().__init__(*args, **kwargs) + # new = logging.StreamHandler(sys.stdout) + # self._setHandler('std', new) pass @@ -145,7 +156,7 @@ def isNameToken(token: Token): return token == Token.Name -def checkNodeStarted(cli, nodeName): +def waitNodeStarted(cli, nodeName): # Node name should be in cli.nodes assert nodeName in cli.nodes @@ -160,15 +171,19 @@ def chk(): assert "{} listening for other nodes at {}:{}" \ .format(nodeName, *cli.nodes[nodeName].nodestack.ha) in msgs - cli.looper.run(eventually(chk, retryWait=1, timeout=2)) + startUpTimeout = waits.expectedNodeStartUpTimeout() + cli.looper.run(eventually(chk, timeout=startUpTimeout)) -def checkAllNodesStarted(cli, *nodeNames): +def waitAllNodesStarted(cli, *nodeNames): for name in nodeNames: - checkNodeStarted(cli, name) + waitNodeStarted(cli, name) def checkAllNodesUp(cli): + # TODO: can assertAllNodesCreated be used instead? + # TODO: can waitAllNodesStarted be used instead? + msgs = {stmt['msg'] for stmt in cli.printeds} expected = "{nm}:{inst} selected primary {pri} " \ "for instance {inst} (view 0)" @@ -182,7 +197,19 @@ def checkAllNodesUp(cli): assert expected.format(nm=nm, pri=pri, inst=inst) in msgs +def waitAllNodesUp(cli): + timeout = waits.expectedPoolStartUpTimeout(len(cli.nodes)) + cli.looper.run(eventually(checkAllNodesUp, cli, timeout=timeout)) + + def checkClientConnected(cli, nodeNames, clientName): + """ + Checks whether client connected to nodes. + + If you do not know the moment when it exactly happens consider using + 'waitClientConnected' instead + """ + printedMsgs = set() stackName = cli.clients[clientName].stackName expectedMsgs = {'{} now connected to {}C'.format(stackName, nodeName) @@ -195,6 +222,17 @@ def checkClientConnected(cli, nodeNames, clientName): assert printedMsgs == expectedMsgs +def waitClientConnected(cli, nodeNames, clientName): + """ + Wait for moment when client connected to pool + """ + + fVal = util.getMaxFailures(len(nodeNames)) + timeout = waits.expectedClientConnectionTimeout(fVal) + cli.looper.run(eventually(checkClientConnected, cli, + nodeNames, clientName, + timeout=timeout)) + def checkActiveIdrPrinted(cli): assert 'Identifier:' in cli.lastCmdOutput assert 'Verification key:' in cli.lastCmdOutput @@ -204,16 +242,23 @@ def createClientAndConnect(cli, nodeNames, clientName): cli.enterCmd("new client {}".format(clientName)) createNewKeyring(clientName, cli) cli.enterCmd("new key clientName{}".format("key")) - cli.looper.run(eventually(checkClientConnected, cli, nodeNames, - clientName, retryWait=1, timeout=3)) + + from plenum.common import util + + fVal = util.getMaxFailures(len(cli.nodeReg)) + timeout = waits.expectedClientConnectionTimeout(fVal) + + waitClientConnected(cli, nodeNames, clientName) def checkRequest(cli, operation): cName = "Joe" cli.enterCmd("new client {}".format(cName)) # Let client connect to the nodes - cli.looper.run(eventually(checkClientConnected, cli, list(cli.nodes.keys()), - cName, retryWait=1, timeout=5)) + + nodeNames = list(cli.nodes.keys()) + waitClientConnected(cli, nodeNames, cName) + # Send request to all nodes createNewKeyring(cName, cli) @@ -224,16 +269,11 @@ def checkRequest(cli, operation): cli.enterCmd('client {} send {}'.format(cName, operation)) client = cli.clients[cName] wallet = cli.wallets[cName] # type: Wallet - f = getMaxFailures(len(cli.nodes)) # Ensure client gets back the replies lastReqId = wallet._getIdData().lastReqId - cli.looper.run(eventually( - checkSufficientRepliesRecvd, - client.inBox, - lastReqId, - f, - retryWait=2, - timeout=10)) + + waitForSufficientRepliesForRequests(cli.looper, client, + requestIds=[lastReqId]) txn, status = client.getReply(wallet.defaultId, lastReqId) @@ -328,7 +368,6 @@ def newKeyPair(cli: TestCli, alias: str=None): return pubKey - pluginLoadedPat = re.compile("plugin [A-Za-z0-9_]+ successfully loaded from module") @@ -396,6 +435,7 @@ def checkReply(cli, count, clbk): result = ast.literal_eval(m.groups(0)[0].strip()) if clbk(result): done += 1 + logger.warning("Done = {}, Count = {}".format(done, count)) assert done == count @@ -410,6 +450,23 @@ def checkBalance(balance, data): return result.get('balance') == balance +def waitForReply(cli, nodeCount, replyChecker, customTimeout=None): + timeout = customTimeout or \ + waits.expectedTransactionExecutionTime(nodeCount) + cli.looper.run(eventually(checkReply, cli, + nodeCount, replyChecker, + timeout=timeout)) + + +def waitRequestSuccess(cli, nodeCount, customTimeout=None): + waitForReply(cli, nodeCount, checkSuccess, customTimeout) + + +def waitBalanceChange(cli, nodeCount, balanceValue, customTimeout=None): + waitForReply(cli, nodeCount, + partial(checkBalance, balanceValue), customTimeout) + + def loadPlugin(cli, pluginPkgName): curPath = os.path.dirname(os.path.dirname(__file__)) fullPath = os.path.join(curPath, 'plugin', pluginPkgName) @@ -451,7 +508,7 @@ def _(attempt, expect=None, within=None, mapper=None, not_expect=None): if attempt: attempt = attempt.format(**mapper) if mapper else attempt - checkCmdValid(cli, attempt) # TODO this needs to be renamed, because it's not clear that here is where we are actually calling the cli command + cli.enterCmd(attempt) def getAssertErrorMsg(e, cli, exp:bool, actual:bool): length = 80 @@ -582,4 +639,4 @@ def restartCliAndAssert(cli, do, expectedRestoredWalletName, 'Active keyring set to "{}"'.format(expectedRestoredWalletName) ], within=5) assert cli._activeWallet is not None - assert len(cli._activeWallet.identifiers) == expectedIdentifiers \ No newline at end of file + assert len(cli._activeWallet.identifiers) == expectedIdentifiers diff --git a/plenum/test/cli/test_basic_client_commands.py b/plenum/test/cli/test_basic_client_commands.py index 848a27cc69..22872e1730 100644 --- a/plenum/test/cli/test_basic_client_commands.py +++ b/plenum/test/cli/test_basic_client_commands.py @@ -1,7 +1,5 @@ -from plenum.common.eventually import eventually from plenum.common.util import randomString -from plenum.test.cli.helper import checkClientConnected - +from plenum.test.cli.helper import waitClientConnected def testClientNames(cli, validNodeNames, createAllNodes): """ @@ -28,9 +26,8 @@ def checkClientNotAddedWithNodeName(name): assert len(cli.clients) == 1 # Client name should be in cli.client assert cName in cli.clients + waitClientConnected(cli, validNodeNames, cName) - cli.looper.run(eventually(checkClientConnected, cli, validNodeNames, cName, - retryWait=1, timeout=5)) # Add clients with name same as a node name or starting with a node name for i, nm in enumerate(validNodeNames): diff --git a/plenum/test/cli/test_basic_node_commands.py b/plenum/test/cli/test_basic_node_commands.py index 23398c2292..2c44fe0140 100644 --- a/plenum/test/cli/test_basic_node_commands.py +++ b/plenum/test/cli/test_basic_node_commands.py @@ -1,5 +1,5 @@ from plenum.common.util import randomString -from plenum.test.cli.helper import isErrorToken, checkNodeStarted +from plenum.test.cli.helper import isErrorToken, waitNodeStarted from plenum.test.test_node import checkPoolReady @@ -8,7 +8,7 @@ def addNodes(be, do, cli, validNodeNames): be(cli) for i, nm in enumerate(validNodeNames): do("new node {}".format(nm)) - checkNodeStarted(cli, nm) + waitNodeStarted(cli, nm) def testNodeNames(be, do, cli, validNodeNames): diff --git a/plenum/test/cli/test_cli_client_ip_port.py b/plenum/test/cli/test_cli_client_ip_port.py index 4f7960ca4a..b00209c488 100644 --- a/plenum/test/cli/test_cli_client_ip_port.py +++ b/plenum/test/cli/test_cli_client_ip_port.py @@ -11,7 +11,9 @@ def cli1(cliLooper, tdir, tdirWithPoolTxns, tdirWithDomainTxns, tdirWithNodeKeepInited, tconf): tempDir = os.path.join(tdir, "cl1") initDirWithGenesisTxns(tempDir, tconf, tdirWithPoolTxns, tdirWithDomainTxns) - return newCLI(cliLooper, tempDir) + cli = newCLI(cliLooper, tempDir) + yield cli + cli.close() @pytest.fixture(scope="module") @@ -19,7 +21,9 @@ def cli2(cliLooper, tdir, tdirWithPoolTxns, tdirWithDomainTxns, tdirWithNodeKeepInited, tconf): tempDir = os.path.join(tdir, "cl2") initDirWithGenesisTxns(tempDir, tconf, tdirWithPoolTxns, tdirWithDomainTxns) - return newCLI(cliLooper, tempDir) + cli = newCLI(cliLooper, tempDir) + yield cli + cli.close() def testEachClientOnDifferentPort(cli1, cli2): diff --git a/plenum/test/cli/test_cli_startup.py b/plenum/test/cli/test_cli_startup.py index a6351e1bf1..52280c50ec 100644 --- a/plenum/test/cli/test_cli_startup.py +++ b/plenum/test/cli/test_cli_startup.py @@ -1,6 +1,6 @@ import pytest -from plenum.common.looper import Looper +from stp_core.loop.looper import Looper from plenum.common.util import firstValue from plenum.test.cli.helper import newCLI @@ -43,7 +43,9 @@ def reincarnatedCLI(nodeRegsForCLI, newLooper, tdir, cli): """ Creating a new cli instance is equivalent to starting and stopping a cli """ - return newCLI(nodeRegsForCLI, newLooper, tdir, unique_name='reincarnate') + cli = newCLI(nodeRegsForCLI, newLooper, tdir, unique_name='reincarnate') + yield cli + cli.close() @pytest.mark.skip(reason="SOV-542. Implementation changed") diff --git a/plenum/test/cli/test_cli_with_auction_req_plugin.py b/plenum/test/cli/test_cli_with_auction_req_plugin.py index e11333f4ec..e44f4a27c7 100644 --- a/plenum/test/cli/test_cli_with_auction_req_plugin.py +++ b/plenum/test/cli/test_cli_with_auction_req_plugin.py @@ -3,9 +3,9 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.test.cli.helper import loadPlugin, assertNoClient, \ - createClientAndConnect, checkReply, checkSuccess, checkBalance + createClientAndConnect, waitRequestSuccess, waitBalanceChange @pytest.fixture("module") @@ -29,7 +29,7 @@ def testReqForNonExistentClient(cli, loadAuctionReqPlugin, createAllNodes): # TODO: Have a test for non existent auction id -@pytest.mark.skipif('sys.platform == "win32"', reason='SOV-457') +# @pytest.mark.skipif('sys.platform == "win32"', reason='SOV-457') def testTransactions(cli, loadAuctionReqPlugin, createAllNodes, validNodeNames): nodeCount = len(validNodeNames) auctionId = str(uuid4()) @@ -37,99 +37,75 @@ def testTransactions(cli, loadAuctionReqPlugin, createAllNodes, validNodeNames): for name in names: createClientAndConnect(cli, validNodeNames, name) jason, tyler, les, john, timothy = names + cli.enterCmd("client {} start auction {}".format(jason, auctionId)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 1, - checkSuccess, retryWait=1, timeout=5)) + waitRequestSuccess(cli, nodeCount * 1) + cli.enterCmd("client {} place bid 20 on {}".format(tyler, auctionId)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 2, - checkSuccess, retryWait=1, timeout=5)) + waitRequestSuccess(cli, nodeCount * 2) + cli.enterCmd("client {} balance".format(tyler)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 3, - checkSuccess, retryWait=1, timeout=5)) - cli.looper.run(eventually(checkReply, cli, nodeCount, - partial(checkBalance, 980), retryWait=1, - timeout=5)) + waitRequestSuccess(cli, nodeCount * 3) + + waitBalanceChange(cli, nodeCount, 980) + cli.enterCmd("client {} place bid 40 on {}".format(les, auctionId)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 4, - checkSuccess, retryWait=1, timeout=5)) + waitRequestSuccess(cli, nodeCount * 4) + cli.enterCmd("client {} balance".format(tyler)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 5, - checkSuccess, retryWait=1, timeout=5)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 1, - partial(checkBalance, 1000), retryWait=1, - timeout=5)) + waitRequestSuccess(cli, nodeCount * 5) + waitBalanceChange(cli, nodeCount, 1000) + cli.enterCmd("client {} balance".format(les)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 6, - checkSuccess, retryWait=1, timeout=5)) - cli.looper.run(eventually(checkReply, cli, nodeCount, - partial(checkBalance, 960), retryWait=1, - timeout=5)) + waitRequestSuccess(cli, nodeCount * 6) + waitBalanceChange(cli, nodeCount, 960) # This bid would fail so `success` would be false and thus success count # wont increase cli.enterCmd("client {} place bid 30 on {}".format(john, auctionId)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 6, - checkSuccess, retryWait=1, timeout=5)) + waitRequestSuccess(cli, nodeCount * 6) + cli.enterCmd("client {} balance".format(john)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 7, - checkSuccess, retryWait=1, timeout=5)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 2, - partial(checkBalance, 1000), retryWait=1, - timeout=5)) + waitRequestSuccess(cli, nodeCount * 7) + waitBalanceChange(cli, nodeCount * 2, 1000) + cli.enterCmd("client {} balance".format(les)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 8, - checkSuccess, retryWait=1, timeout=5)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 2, - partial(checkBalance, 960), retryWait=1, - timeout=5)) + waitRequestSuccess(cli, nodeCount * 8) + waitBalanceChange(cli, nodeCount * 2, 960) + cli.enterCmd("client {} place bid 200 on {}".format(timothy, auctionId)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 9, - checkSuccess, retryWait=1, timeout=5)) + waitRequestSuccess(cli, nodeCount * 9) + cli.enterCmd("client {} balance".format(timothy)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 10, - checkSuccess, retryWait=1, timeout=5)) - cli.looper.run(eventually(checkReply, cli, nodeCount, - partial(checkBalance, 800), retryWait=1, - timeout=5)) + waitRequestSuccess(cli, nodeCount * 10) + waitBalanceChange(cli, nodeCount, 800) + cli.enterCmd("client {} balance".format(les)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 11, - checkSuccess, retryWait=1, timeout=5)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 3, - partial(checkBalance, 1000), retryWait=1, - timeout=5)) + waitRequestSuccess(cli, nodeCount * 11) + waitBalanceChange(cli, nodeCount * 3, 1000) + cli.enterCmd("client {} end auction {}".format(jason, auctionId)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 12, - checkSuccess, retryWait=1, timeout=5)) + waitRequestSuccess(cli, nodeCount * 12) + cli.enterCmd("client {} place bid 300 on {}".format(john, auctionId)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 12, - checkSuccess, retryWait=1, timeout=5)) + waitRequestSuccess(cli, nodeCount * 12) + cli.enterCmd("client {} balance".format(john)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 13, - checkSuccess, retryWait=1, timeout=5)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 4, - partial(checkBalance, 1000), retryWait=1, - timeout=5)) + waitRequestSuccess(cli, nodeCount * 13) + waitBalanceChange(cli, nodeCount * 4, 1000) + cli.enterCmd("client {} balance".format(tyler)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 14, - checkSuccess, retryWait=1, timeout=5)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 5, - partial(checkBalance, 1000), retryWait=1, - timeout=5)) + waitRequestSuccess(cli, nodeCount * 14) + waitBalanceChange(cli, nodeCount * 5, 1000) + cli.enterCmd("client {} balance".format(john)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 15, - checkSuccess, retryWait=1, timeout=5)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 6, - partial(checkBalance, 1000), retryWait=1, - timeout=5)) + waitRequestSuccess(cli, nodeCount * 15) + waitBalanceChange(cli, nodeCount * 6, 1000) + cli.enterCmd("client {} balance".format(les)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 16, - checkSuccess, retryWait=1, timeout=5)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 7, - partial(checkBalance, 1000), retryWait=1, - timeout=5)) + waitRequestSuccess(cli, nodeCount * 16) + waitBalanceChange(cli, nodeCount * 7, 1000) + cli.enterCmd("client {} balance".format(timothy)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 17, - checkSuccess, retryWait=1, timeout=5)) - cli.looper.run(eventually(checkReply, cli, nodeCount * 2, - partial(checkBalance, 800), retryWait=1, - timeout=5)) + waitRequestSuccess(cli, nodeCount * 17) + waitBalanceChange(cli, nodeCount * 2, 800) diff --git a/plenum/test/cli/test_cli_with_bank_req_plugin.py b/plenum/test/cli/test_cli_with_bank_req_plugin.py index 329d245b2d..aea3d23a78 100644 --- a/plenum/test/cli/test_cli_with_bank_req_plugin.py +++ b/plenum/test/cli/test_cli_with_bank_req_plugin.py @@ -1,13 +1,10 @@ -from functools import partial - import pytest -from plenum.common.eventually import eventually -from plenum.test.cli.helper import checkReply, \ - checkSuccess, checkBalance, assertNoClient, loadPlugin, \ +from plenum.test.cli.helper import \ + waitRequestSuccess, waitBalanceChange, \ + assertNoClient, loadPlugin, \ createClientAndConnect - @pytest.fixture("module") def loadBankReqPlugin(cli): loadPlugin(cli, 'bank_req_validation') @@ -24,63 +21,51 @@ def testReqForNonExistentClient(cli, loadBankReqPlugin, createAllNodes): assertNoClient(cli) -@pytest.mark.skipif('sys.platform == "win32"', reason='SOV-457') +# @pytest.mark.skipif('sys.platform == "win32"', reason='SOV-457') def testTransactions(cli, loadBankReqPlugin, createAllNodes, validNodeNames): - nodeCount = len(validNodeNames) + numOfNodes = len(validNodeNames) + createClientAndConnect(cli, validNodeNames, "Alice") createClientAndConnect(cli, validNodeNames, "Bob") + cli.enterCmd("client Alice credit 500 to Bob") - cli.looper.run(eventually(checkReply, cli, nodeCount * 1, - checkSuccess, retryWait=1, timeout=5)) + waitRequestSuccess(cli, numOfNodes) + cli.enterCmd("client Alice balance") - cli.looper.run(eventually(checkReply, cli, nodeCount * 2, - checkSuccess, retryWait=1, timeout=5)) - cli.looper.run(eventually(checkReply, cli, len(validNodeNames), - partial(checkBalance, 500), retryWait=1, - timeout=5)) + waitRequestSuccess(cli, numOfNodes * 2) + waitBalanceChange(cli, numOfNodes, 500) + cli.enterCmd("client Bob balance") - cli.looper.run(eventually(checkReply, cli, nodeCount * 3, - checkSuccess, retryWait=1, timeout=5)) - cli.looper.run(eventually(checkReply, cli, nodeCount, - partial(checkBalance, 1500), retryWait=1, - timeout=5)) + waitRequestSuccess(cli, numOfNodes * 3) + waitBalanceChange(cli, numOfNodes, 1500) + cli.enterCmd("client Bob credit 10 to Alice") - cli.looper.run(eventually(checkReply, cli, nodeCount * 4, - checkSuccess, retryWait=1, timeout=5)) + waitRequestSuccess(cli, numOfNodes * 4) + cli.enterCmd("client Bob balance") - cli.looper.run(eventually(checkReply, cli, nodeCount * 5, - checkSuccess, retryWait=1, timeout=5)) - cli.looper.run(eventually(checkReply, cli, nodeCount, - partial(checkBalance, 1490), retryWait=1, - timeout=5)) + waitRequestSuccess(cli, numOfNodes * 5) + waitBalanceChange(cli, numOfNodes, 1490) + cli.enterCmd("client Bob credit 100 to Alice") - cli.looper.run(eventually(checkReply, cli, nodeCount * 6, - checkSuccess, retryWait=1, timeout=5)) + waitRequestSuccess(cli, numOfNodes * 6) + cli.enterCmd("client Alice balance") - cli.looper.run(eventually(checkReply, cli, nodeCount * 7, - checkSuccess, retryWait=1, timeout=5)) - cli.looper.run(eventually(checkReply, cli, nodeCount, - partial(checkBalance, 610), retryWait=1, - timeout=10)) + waitRequestSuccess(cli, numOfNodes * 7) + waitBalanceChange(cli, numOfNodes, 610) + cli.enterCmd("client Bob balance") - cli.looper.run(eventually(checkReply, cli, nodeCount * 8, - checkSuccess, retryWait=1, timeout=5)) - cli.looper.run(eventually(checkReply, cli, nodeCount, - partial(checkBalance, 1390), retryWait=1, - timeout=5)) + waitRequestSuccess(cli, numOfNodes * 8) + waitBalanceChange(cli, numOfNodes, 1390) + createClientAndConnect(cli, validNodeNames, "Carol") + cli.enterCmd("client Carol credit 50 to Bob") - cli.looper.run(eventually(checkReply, cli, nodeCount * 9, - checkSuccess, retryWait=1, timeout=5)) + waitRequestSuccess(cli, numOfNodes * 9) + cli.enterCmd("client Bob balance") - cli.looper.run(eventually(checkReply, cli, nodeCount * 10, - checkSuccess, retryWait=1, timeout=5)) - cli.looper.run(eventually(checkReply, cli, nodeCount, - partial(checkBalance, 1440), retryWait=1, - timeout=10)) + waitRequestSuccess(cli, numOfNodes * 10) + waitBalanceChange(cli, numOfNodes, 1440) + cli.enterCmd("client Carol balance") - cli.looper.run(eventually(checkReply, cli, nodeCount * 11, - checkSuccess, retryWait=1, timeout=5)) - cli.looper.run(eventually(checkReply, cli, nodeCount, - partial(checkBalance, 950), retryWait=1, - timeout=5)) + waitRequestSuccess(cli, numOfNodes * 11) + waitBalanceChange(cli, numOfNodes, 950) diff --git a/plenum/test/cli/test_log_filtering.py b/plenum/test/cli/test_log_filtering.py index 415942bcb2..c5c87545c4 100644 --- a/plenum/test/cli/test_log_filtering.py +++ b/plenum/test/cli/test_log_filtering.py @@ -1,9 +1,7 @@ import types -from plenum.common.eventually import eventually -from plenum.common.util import getMaxFailures from plenum.test.cli.helper import checkRequest -from plenum.test.helper import checkSufficientRepliesRecvd +from plenum.test.helper import waitForSufficientRepliesForRequests def testLogFiltering(cli, validNodeNames, createAllNodes): @@ -19,11 +17,9 @@ def handleOneNodeMsg(self, wrappedMsg, excludeFromCli=None): client.nodestack.msgHandler = client.handleOneNodeMsg msg = '{"Hello": "Where", "type": "greeting"}' cli.enterCmd('client {} send {}'.format(client.name, msg)) - cli.looper.run(eventually( - checkSufficientRepliesRecvd, - client.inBox, - wallet._getIdData().lastReqId, - getMaxFailures(len(cli.nodes)), - retryWait=2, - timeout=10)) + + lastRequestId = wallet._getIdData().lastReqId + waitForSufficientRepliesForRequests(cli.looper,client, + requestIds=[lastRequestId]) + assert "got msg from node" not in cli.lastCmdOutput diff --git a/plenum/test/cli/test_save_and_restore_wallet.py b/plenum/test/cli/test_save_and_restore_wallet.py index 66a7e10b89..a3d21deea9 100644 --- a/plenum/test/cli/test_save_and_restore_wallet.py +++ b/plenum/test/cli/test_save_and_restore_wallet.py @@ -1,6 +1,7 @@ import pytest from plenum.cli.cli import Exit, Cli +from plenum.common.util import normalizedWalletFileName, getWalletFilePath from plenum.test.cli.helper import createAndAssertNewCreation, \ createAndAssertNewKeyringCreation, useAndAssertKeyring, exitFromCli @@ -11,7 +12,7 @@ def performExit(do): def testPersistentWalletName(): - walletFileName = Cli._normalizedWalletFileName("Default") + walletFileName = normalizedWalletFileName("Default") assert "default.wallet" == walletFileName assert "default" == Cli.getWalletKeyName(walletFileName) @@ -35,10 +36,10 @@ def testSaveAndRestoreWallet(do, be, cli, aliceCli): createNewKey(do, cli, keyringName="Default") createNewKeyring("mykr0", do) useKeyring("Default", do) - filePath = Cli.getWalletFilePath( + filePath = getWalletFilePath( cli.getContextBasedKeyringsBaseDir(), cli.walletFileName) exitFromCli(do) be(aliceCli) useKeyring(filePath, do, expectedName="Default") - useKeyring("mykr0", do, expectedName="mykr0") \ No newline at end of file + useKeyring("mykr0", do, expectedName="mykr0") diff --git a/plenum/test/cli/test_status_command.py b/plenum/test/cli/test_status_command.py index d26f6d0e29..bee02c7073 100644 --- a/plenum/test/cli/test_status_command.py +++ b/plenum/test/cli/test_status_command.py @@ -1,9 +1,13 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.util import getMaxFailures -from plenum.test.cli.helper import isNameToken, checkNodeStarted, \ - checkClientConnected, checkActiveIdrPrinted +from plenum.test.cli.helper import isNameToken, \ + waitNodeStarted, \ + checkActiveIdrPrinted +from plenum.test import waits +from plenum.common import util +from plenum.test.cli.helper import waitClientConnected def checkForNamedTokens(printedTokens, expectedNames): @@ -42,7 +46,7 @@ def testStatusAfterOneNodeCreated(cli, validNodeNames): nodeName = validNodeNames[0] cli.enterCmd("new node {}".format(nodeName)) # Let the node start up - checkNodeStarted(cli, nodeName) + waitNodeStarted(cli, nodeName) cli.enterCmd("status") startedNodeToken = cli.printedTokens[1] @@ -113,12 +117,17 @@ def testStatusAfterAllNodesUp(cli, validNodeNames, createAllNodes): def testStatusAfterClientAdded(cli, validNodeNames, createAllNodes): clientName = "Joe" cli.enterCmd("new client {}".format(clientName)) - cli.looper.run(eventually(checkClientConnected, cli, validNodeNames, - clientName, retryWait=1, timeout=3)) + + fVal = util.getMaxFailures(len(validNodeNames)) + connectionTimeout = waits.expectedClientConnectionTimeout(fVal) + + waitClientConnected(cli, validNodeNames, clientName) + cli.enterCmd("new key") cli.enterCmd("status client {}".format(clientName)) - cli.looper.run(eventually(checkActiveIdrPrinted, cli, retryWait=1, - timeout=3)) + cli.looper.run(eventually(checkActiveIdrPrinted, cli, + retryWait=1, timeout=connectionTimeout)) + for name in validNodeNames: # Checking the output after command `status node `. Testing # the node status here after the client is connected diff --git a/plenum/test/client/test_client.py b/plenum/test/client/test_client.py index e195198f75..a0faaee1da 100644 --- a/plenum/test/client/test_client.py +++ b/plenum/test/client/test_client.py @@ -1,18 +1,20 @@ import pytest -from plenum.common.crypto import Signer +from plenum.common.keygen_utils import initRemoteKeys + +from stp_core.loop.eventually import eventually from plenum.common.exceptions import EmptySignature from plenum.common.exceptions import NotConnectedToAny -from plenum.test.helper import * -from plenum.test.helper import checkResponseCorrectnessFromNodes -from plenum.test.helper import randomOperation, \ - checkLastClientReqForNode, \ - getRepliesFromClientInbox -from plenum.test.helper import sendRandomRequest, checkSufficientRepliesRecvd, \ - assertLength -from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies +from stp_core.common.log import getlogger +from plenum.common.constants import OP_FIELD_NAME, REPLY, REQACK, TXN_ID +from plenum.common.types import f +from plenum.server.node import Node +from plenum.test import waits +from plenum.test.helper import checkResponseCorrectnessFromNodes, getMaxFailures, \ + randomOperation, checkLastClientReqForNode, getRepliesFromClientInbox, \ + sendRandomRequest, waitForSufficientRepliesForRequests, assertLength, \ + sendReqsToNodesAndVerifySuffReplies + from plenum.test.test_client import genTestClient -from plenum.test.test_node import TestNodeSet -from raet.raeting import AutoMode nodeCount = 7 @@ -21,7 +23,13 @@ whitelist = ['signer not configured so not signing', 'for EmptySignature', 'discarding message', - 'found legacy entry'] # warnings + 'found legacy entry', + 'public key from disk', + 'verification key from disk', + 'got error while verifying message'] # warnings + + +logger = getlogger() def checkResponseRecvdFromNodes(client, expectedCount: int, @@ -46,36 +54,6 @@ def checkResponseRecvdFromNodes(client, expectedCount: int, assert len(replies) == len(acks) == expectedCount -# noinspection PyIncorrectDocstring -@pytest.mark.skip(reason="SOV-550. Implementation changed") -def testGeneratedRequestSequencing(tdir_for_func): - """ - Request ids must be generated in an increasing order - """ - with TestNodeSet(count=4, tmpdir=tdir_for_func) as nodeSet: - w = Wallet("test") - w.addIdentifier() - - operation = randomOperation() - - request = w.signOp(operation) - assert request.reqId == 1 - - request = w.signOp(operation) - assert request.reqId == 2 - - request = w.signOp(randomOperation()) - assert request.reqId == 3 - - idr, _ = w.addIdentifier() - - request = w.signOp(randomOperation(), idr) - assert request.reqId == 1 - - request = w.signOp(randomOperation()) - assert request.reqId == 4 - - # noinspection PyIncorrectDocstring def testClientShouldNotBeAbleToConnectToNodesNodeStack(pool): """ @@ -83,11 +61,16 @@ def testClientShouldNotBeAbleToConnectToNodesNodeStack(pool): """ async def go(ctx): - for n in ctx.nodeset: - n.nodestack.keep.auto = AutoMode.never + # for n in ctx.nodeset: + # n.nodestack.keep.auto = AutoMode.never nodestacksVersion = {k: v.ha for k, v in ctx.nodeset.nodeReg.items()} client1, _ = genTestClient(nodeReg=nodestacksVersion, tmpdir=ctx.tmpdir) + for node in ctx.nodeset: + stack = node.nodestack + args = (client1.name, stack.name, ctx.tmpdir, stack.verhex, True) + initRemoteKeys(*args) + ctx.looper.add(client1) with pytest.raises(NotConnectedToAny): await client1.ensureConnectedToNodes() @@ -114,24 +97,26 @@ async def go(ctx): request = wallet.signOp(op=randomOperation()) request.signature = None request = client1.submitReqs(request)[0] + timeout = waits.expectedClientRequestPropagationTime(nodeCount) + with pytest.raises(AssertionError): for node in ctx.nodeset: await eventually( checkLastClientReqForNode, node, request, - retryWait=1, timeout=10) + retryWait=1, timeout=timeout) for n in ctx.nodeset: params = n.spylog.getLastParams(Node.handleInvalidClientMsg) ex = params['ex'] - _, frm = params['wrappedMsg'] + msg, _ = params['wrappedMsg'] assert isinstance(ex, EmptySignature) - assert frm == client1.stackName + assert msg.get(f.IDENTIFIER.nm) == request.identifier params = n.spylog.getLastParams(Node.discard) reason = params["reason"] (msg, frm) = params["msg"] - assert msg == request.__dict__ - assert frm == client1.stackName + assert msg == request.as_dict + assert msg.get(f.IDENTIFIER.nm) == request.identifier assert "EmptySignature" in reason pool.run(go) @@ -173,10 +158,11 @@ def testReplyWhenRepliesFromAllNodesAreSame(looper, client1, wallet1): nodes. """ request = sendRandomRequest(wallet1, client1) + responseTimeout = waits.expectedTransactionExecutionTime(nodeCount) looper.run( eventually(checkResponseRecvdFromNodes, client1, nodeCount, request.reqId, - retryWait=1, timeout=20)) + retryWait=1, timeout=responseTimeout)) checkResponseCorrectnessFromNodes(client1.inBox, request.reqId, F) @@ -192,10 +178,11 @@ def testReplyWhenRepliesFromExactlyFPlusOneNodesAreSame(looper, # exactly f + 1 => (3) nodes have correct responses # modify some (numOfResponses of type REPLY - (f + 1)) => 4 responses to # have a different operations + responseTimeout = waits.expectedTransactionExecutionTime(nodeCount) looper.run( eventually(checkResponseRecvdFromNodes, client1, nodeCount, request.reqId, - retryWait=1, timeout=20)) + retryWait=1, timeout=responseTimeout)) replies = (msg for msg, frm in client1.inBox if msg[OP_FIELD_NAME] == REPLY and @@ -216,13 +203,9 @@ def testReplyWhenRequestAlreadyExecuted(looper, nodeSet, client1, sent1): will be sent again to the client. An acknowledgement will not be sent for a repeated request. """ - # Since view no is always zero in the current setup - looper.run(eventually(checkSufficientRepliesRecvd, - client1.inBox, - sent1.reqId, - 2, - retryWait=.5, - timeout=5)) + waitForSufficientRepliesForRequests(looper, client1, + requests=[sent1], fVal=2) + originalRequestResponsesLen = nodeCount * 2 duplicateRequestRepliesLen = nodeCount # for a duplicate request we need to client1.nodestack._enqueueIntoAllRemotes(sent1, None) @@ -235,10 +218,8 @@ def chk(): response[0].get(f.REQ_ID.nm) == sent1.reqId)], originalRequestResponsesLen + duplicateRequestRepliesLen) - looper.run(eventually( - chk, - retryWait=1, - timeout=20)) + responseTimeout = waits.expectedTransactionExecutionTime(nodeCount) + looper.run(eventually( chk, retryWait=1, timeout=responseTimeout)) # noinspection PyIncorrectDocstring @@ -270,7 +251,6 @@ def makeClient(id): clients.add(client) for i in range(1, numOfRequests + 1): - # sending requests requests = {} for client in clients: @@ -281,21 +261,31 @@ def makeClient(id): requests[client] = (request.reqId, request.operation['amount']) # checking results + responseTimeout =waits.expectedTransactionExecutionTime(nodeCount) for client, (reqId, sentAmount) in requests.items(): looper.run(eventually(checkResponseRecvdFromNodes, client, nodeCount, reqId, retryWait=1, - timeout=25)) + timeout=responseTimeout)) print("Expected amount for request {} is {}". format(reqId, sentAmount)) - replies = [r[0]['result']['amount'] - for r in client.inBox - if r[0]['op'] == 'REPLY' - and r[0]['result']['reqId'] == reqId] + # This looks like it fails on some python versions + # replies = [r[0]['result']['amount'] + # for r in client.inBox + # if r[0]['op'] == 'REPLY' + # and r[0]['result']['reqId'] == reqId] + + replies = [] + for r in client.inBox: + if r[0]['op'] == 'REPLY' and r[0]['result']['reqId'] == reqId: + if 'amount' not in r[0]['result']: + logger.debug('{} cannot find amount in {}'. + format(client, r[0]['result'])) + replies.append(r[0]['result']['amount']) assert all(replies[0] == r for r in replies) assert replies[0] == sentAmount @@ -311,34 +301,3 @@ def testReplyReceivedOnlyByClientWhoSentRequest(looper, nodeSet, tdir, sendReqsToNodesAndVerifySuffReplies(looper, wallet1, newClient, 1) assert len(client1.inBox) == client1InboxSize assert len(newClient.inBox) > newClientInboxSize - - -def testClientCanSendMessagesIfAnotherClientSendsMessage(looper, nodeSet, - tdir, another_tdir, - wallet1): - assert tdir != another_tdir - client1 = createClientSendMessageAndRemove(looper, nodeSet, - tdir, wallet1, 'TestClient1') - client2 = createClientSendMessageAndRemove(looper, nodeSet, - another_tdir, wallet1, - 'TestClient1') - clientSendMessageAndRemove(client1, looper, wallet1) - - -def testClientCanSendMessagesIfInitWithSighex(looper, nodeSet, - tdir, another_tdir, - wallet1): - assert tdir != another_tdir - signer1 = Signer() - sighex1 = signer1.keyhex - client1 = createClientSendMessageAndRemove(looper, nodeSet, - tdir, wallet1, - 'TestClient1', sighex=sighex1) - - signer2 = Signer() - sighex2 = signer2.keyhex - assert sighex2 != sighex1 - client2 = createClientSendMessageAndRemove(looper, nodeSet, - another_tdir, wallet1, - 'TestClient1', sighex=sighex2) - clientSendMessageAndRemove(client1, looper, wallet1) \ No newline at end of file diff --git a/plenum/test/client/test_client_request_nack.py b/plenum/test/client/test_client_request_nack.py index 74965e6b54..8ca9e7b141 100644 --- a/plenum/test/client/test_client_request_nack.py +++ b/plenum/test/client/test_client_request_nack.py @@ -2,7 +2,9 @@ import pytest -from plenum.common.eventually import eventuallyAll +from stp_core.loop.eventually import eventuallyAll + +from plenum.test import waits from plenum.test.helper import checkReqNack whitelist = ['discarding message'] @@ -40,4 +42,5 @@ def testRequestFullRoundTrip(restrictiveVerifier, coros2 = [partial(checkReqNack, client1, node, sent1.identifier, sent1.reqId, update) for node in nodeSet] - looper.run(eventuallyAll(*coros2, totalTimeout=5)) + timeout = waits.expectedReqAckQuorumTime() + looper.run(eventuallyAll(*coros2, totalTimeout=timeout)) diff --git a/plenum/test/client/test_client_retry.py b/plenum/test/client/test_client_retry.py index 05f0a258e3..7da11da9a6 100644 --- a/plenum/test/client/test_client_retry.py +++ b/plenum/test/client/test_client_retry.py @@ -3,10 +3,11 @@ import pytest -from plenum.common.eventually import eventually, eventuallyAll +from stp_core.loop.eventually import eventually, eventuallyAll from plenum.common.request import Request from plenum.common.types import Reply, RequestNack -from plenum.test.helper import sendRandomRequest, checkReqAck, checkReplyCount +from plenum.test.helper import sendRandomRequest, checkReqAck, waitReplyCount +from plenum.test import waits whitelist = ['AlphaC unable to send message', ] @@ -21,8 +22,18 @@ def testClientRetryRequestWhenAckNotReceived(looper, nodeSet, client1, """ alpha = nodeSet.Alpha - r = alpha.clientstack.getRemote(client1.stackName) - alpha.clientstack.removeRemote(r) + skipped = False + origPr = alpha.processRequest + + def skipReqOnce(msg, remoteName): + nonlocal skipped + if isinstance(msg, Request) and not skipped: + skipped = True + return + origPr(msg, remoteName) + + alpha.clientMsgRouter.routes[Request] = skipReqOnce + req = sendRandomRequest(wallet1, client1) def chkAcks(): @@ -33,10 +44,10 @@ def chkAcks(): with pytest.raises(AssertionError): checkReqAck(client1, node, *req.key) - looper.run(eventually(chkAcks, retryWait=1, timeout=3)) - - looper.run(eventually(checkReplyCount, client1, *req.key, 4, retryWait=1, - timeout=tconf.CLIENT_REQACK_TIMEOUT+10)) + timeout = waits.expectedReqAckQuorumTime() + looper.run(eventually(chkAcks, retryWait=1, timeout=timeout)) + idr, reqId = req.key + waitReplyCount(looper, client1, idr, reqId, 4) def testClientRetryRequestWhenReplyNotReceived(looper, nodeSet, client1, @@ -60,11 +71,11 @@ def skipReplyOnce(msg, remoteName): alpha.transmitToClient = skipReplyOnce req = sendRandomRequest(wallet1, client1) coros = [partial(checkReqAck, client1, node, *req.key) for node in nodeSet] - looper.run(eventuallyAll(*coros, retryWait=.5, totalTimeout=3)) - looper.run(eventually(checkReplyCount, client1, *req.key, 3, retryWait=1, - timeout=3)) - looper.run(eventually(checkReplyCount, client1, *req.key, 4, retryWait=1, - timeout=tconf.CLIENT_REPLY_TIMEOUT + 5)) + timeout = waits.expectedReqAckQuorumTime() + looper.run(eventuallyAll(*coros, retryWait=.5, totalTimeout=timeout)) + idr, reqId = req.key + waitReplyCount(looper, client1, idr, reqId, 3) + waitReplyCount(looper, client1, idr, reqId, 4) def testClientNotRetryRequestWhenReqnackReceived(looper, nodeSet, client1, @@ -73,6 +84,8 @@ def testClientNotRetryRequestWhenReqnackReceived(looper, nodeSet, client1, A node sends REQNACK. The client does not resend Request. """ + numOfNodes = len(nodeSet) + alpha = nodeSet.Alpha origProcReq = alpha.processRequest origTrans = alpha.transmitToClient @@ -90,25 +103,37 @@ def onlyTransNack(msg, remoteName): totalResends = client1.spylog.count(client1.resendRequests.__name__) req = sendRandomRequest(wallet1, client1) + + reqAckTimeout = waits.expectedReqAckQuorumTime() + executionTimeout = waits.expectedTransactionExecutionTime(numOfNodes) + # Wait till ACK timeout - looper.runFor(tconf.CLIENT_REQACK_TIMEOUT+1) + looper.runFor(reqAckTimeout + 1) assert client1.spylog.count(client1.resendRequests.__name__) == totalResends + # Wait till REPLY timeout - looper.runFor(tconf.CLIENT_REPLY_TIMEOUT - tconf.CLIENT_REQACK_TIMEOUT + 1) + retryTimeout = executionTimeout - reqAckTimeout + 1 + looper.runFor(retryTimeout) + assert client1.spylog.count(client1.resendRequests.__name__) == totalResends - looper.run(eventually(checkReplyCount, client1, *req.key, 3, retryWait=1, - timeout=3)) + idr, reqId = req.key + waitReplyCount(looper, client1, idr, reqId, 3) + alpha.clientMsgRouter.routes[Request] = origProcReq alpha.transmitToClient = origTrans -def testClientNotRetryingRequestAfterMaxTriesDone(looper, nodeSet, client1, - wallet1, tconf): +def testClientNotRetryingRequestAfterMaxTriesDone(looper, + nodeSet, + client1, + wallet1, + tconf): """ A client sends Request to a node but the node never responds to client. The client resends the request but only the number of times defined in its configuration and no more """ + alpha = nodeSet.Alpha origTrans = alpha.transmitToClient @@ -121,10 +146,15 @@ def dontTransmitReply(msg, remoteName): totalResends = client1.spylog.count(client1.resendRequests.__name__) req = sendRandomRequest(wallet1, client1) + # Wait for more than REPLY timeout - looper.runFor((tconf.CLIENT_MAX_RETRY_REPLY+2)*tconf.CLIENT_REPLY_TIMEOUT+2) - looper.run(eventually(checkReplyCount, client1, *req.key, 3, retryWait=1, - timeout=3)) + timeout = waits.expectedTransactionExecutionTime(len(nodeSet)) + \ + tconf.CLIENT_REQACK_TIMEOUT * tconf.CLIENT_MAX_RETRY_REPLY + looper.runFor(timeout) + + idr, reqId = req.key + waitReplyCount(looper, client1, idr, reqId, 3) + assert client1.spylog.count(client1.resendRequests.__name__) == \ (totalResends + tconf.CLIENT_MAX_RETRY_REPLY) assert req.key not in client1.expectingAcksFor diff --git a/plenum/test/common/test_throttler.py b/plenum/test/common/test_throttler.py index a7d5f64def..1e0c99d693 100644 --- a/plenum/test/common/test_throttler.py +++ b/plenum/test/common/test_throttler.py @@ -1,6 +1,7 @@ import time + from plenum.common.throttler import Throttler -from plenum.common.ratchet import Ratchet +from stp_core.ratchet import Ratchet def test_throttler_case1(): @@ -29,7 +30,8 @@ def test_throttler_case2(): testIterations = windowSize - 2 ratchet = Ratchet(a=2, b=0.05, c=1, base=2, peak=windowSize) throttler = Throttler(windowSize, ratchet.get) - cooldowns = [time.sleep(1) or throttler.acquire()[1] for i in range(testIterations)] + cooldowns = [time.sleep(1) or throttler.acquire()[1] + for i in range(testIterations)] middle = len(cooldowns) // 2 firstIteration, secondIteration = cooldowns[:middle], cooldowns[middle:] for a, b in zip(firstIteration, secondIteration): diff --git a/plenum/test/conftest.py b/plenum/test/conftest.py index 660c900657..2477c216d6 100644 --- a/plenum/test/conftest.py +++ b/plenum/test/conftest.py @@ -5,41 +5,108 @@ import logging import os import re +import warnings +from contextlib import ExitStack from copy import copy from functools import partial from typing import Dict, Any +from plenum.test import waits +import gc import pip import pytest +from plenum.common.keygen_utils import initNodeKeysForBothStacks +from stp_core.crypto.util import randomSeed +from stp_core.network.port_dispenser import genHa +from stp_core.types import HA +from _pytest.recwarn import WarningsRecorder from ledger.compact_merkle_tree import CompactMerkleTree from ledger.ledger import Ledger from ledger.serializers.compact_serializer import CompactSerializer from plenum.common.config_util import getConfig -from plenum.common.eventually import eventually, eventuallyAll +from stp_core.loop.eventually import eventually, eventuallyAll from plenum.common.exceptions import BlowUp -from plenum.common.log import getlogger, TestingHandler -from plenum.common.looper import Looper -from plenum.common.port_dispenser import genHa -from plenum.common.raet import initLocalKeep +from stp_core.common.log import getlogger +from stp_core.common.logging.handlers import TestingHandler +from stp_core.loop.looper import Looper, Prodable from plenum.common.constants import TXN_TYPE, DATA, NODE, ALIAS, CLIENT_PORT, \ CLIENT_IP, NODE_PORT, NYM, CLIENT_STACK_SUFFIX, PLUGIN_BASE_DIR_PATH from plenum.common.txn_util import getTxnOrderedFields -from plenum.common.types import HA, PLUGIN_TYPE_STATS_CONSUMER +from plenum.common.types import PLUGIN_TYPE_STATS_CONSUMER from plenum.common.util import getNoInstances, getMaxFailures from plenum.server.notifier_plugin_manager import PluginManager from plenum.test.helper import randomOperation, \ - checkReqAck, checkLastClientReqForNode, checkSufficientRepliesRecvd, \ - checkViewNoForNodes, requestReturnedToNode, randomText, \ + checkReqAck, checkLastClientReqForNode, waitForSufficientRepliesForRequests, \ + waitForViewChange, requestReturnedToNode, randomText, \ mockGetInstalledDistributions, mockImportModule from plenum.test.node_request.node_request_helper import checkPrePrepared, \ - checkPropagated, checkPrepared, checkCommited + checkPropagated, checkPrepared, checkCommitted from plenum.test.plugin.helper import getPluginPath from plenum.test.test_client import genTestClient, TestClient from plenum.test.test_node import TestNode, TestNodeSet, Pool, \ checkNodesConnected, ensureElectionsDone, genNodeReg logger = getlogger() +config = getConfig() + + +@pytest.fixture(scope="session") +def warnfilters(): + def _(): + warnings.filterwarnings('ignore', category=DeprecationWarning, module='jsonpickle\.pickler', message='encodestring\(\) is a deprecated alias') + warnings.filterwarnings('ignore', category=DeprecationWarning, module='jsonpickle\.unpickler', message='decodestring\(\) is a deprecated alias') + warnings.filterwarnings('ignore', category=DeprecationWarning, module='plenum\.client\.client', message="The 'warn' method is deprecated") + warnings.filterwarnings('ignore', category=DeprecationWarning, module='plenum\.common\.stacked', message="The 'warn' method is deprecated") + warnings.filterwarnings('ignore', category=DeprecationWarning, module='plenum\.test\.test_testable', message='Please use assertEqual instead.') + warnings.filterwarnings('ignore', category=DeprecationWarning, module='prompt_toolkit\.filters\.base', message='inspect\.getargspec\(\) is deprecated') + warnings.filterwarnings('ignore', category=ResourceWarning, message='unclosed event loop') + warnings.filterwarnings('ignore', category=ResourceWarning, message='unclosed.*socket\.socket') + return _ + + +@pytest.yield_fixture(scope="session", autouse=True) +def warncheck(warnfilters): + with WarningsRecorder() as record: + warnfilters() + yield + gc.collect() + to_prints = [] + + def keyfunc(_): + return _.category.__name__, _.filename, _.lineno + + _sorted = sorted(record, key=keyfunc) + _grouped = itertools.groupby(_sorted, keyfunc) + for k, g in _grouped: + to_prints.append("\n" + "category: {}\n" + "filename: {}\n" + " lineno: {}".format(*k)) + messages = itertools.groupby(g, lambda _: str(_.message)) + for k2, g2 in messages: + count = sum(1 for _ in g2) + count_str = ' ({} times)'.format(count) if count > 1 else '' + to_prints.append(" msg: {}{}".format(k2, count_str)) + if to_prints: + to_prints.insert(0, 'Warnings found:') + pytest.fail('\n'.join(to_prints)) + + +@pytest.fixture(scope="session", autouse=True) +def setResourceLimits(): + try: + import resource + except ImportError: + print('Module resource is not available, maybe i am running on Windows') + return + flimit = 65535 + plimit = 65535 + try: + resource.setrlimit(resource.RLIMIT_NOFILE, (flimit, flimit)) + resource.setrlimit(resource.RLIMIT_NPROC, (plimit, plimit)) + except Exception as ex: + print('Could not set resource limits due to {}'.format(ex)) def getValueFromModule(request, name: str, default: Any = None): @@ -83,8 +150,8 @@ def allPluginsPath(): @pytest.fixture(scope="module") def keySharedNodes(startedNodes): - for n in startedNodes: - n.startKeySharing() + # for n in startedNodes: + # n.startKeySharing() return startedNodes @@ -118,7 +185,13 @@ def logcapture(request, whitelist, concerningLogLevels): 'not trying any more because', # TODO: This is too specific, move it to the particular test "Beta discarding message INSTANCE_CHANGE(viewNo='BAD') " - "because field viewNo has incorrect type: " + "because field viewNo has incorrect type: ", + 'got exception while closing hash store', + # TODO: Remove these once the relevant bugs are fixed + '.+ failed to ping .+ at', + 'discarding message (NOMINATE|PRIMARY)', + '.+ rid .+ has been removed', + 'last try...' ] wlfunc = inspect.isfunction(whitelist) @@ -141,12 +214,22 @@ def tester(record): if re.search(w, msg)]) if not (isBenign or isTest or isWhiteListed): + # Stopping all loopers, so prodables like nodes, clients, etc stop. + # This helps in freeing ports + for fv in request._fixture_values.values(): + if isinstance(fv, Looper): + fv.stopall() + if isinstance(fv, Prodable): + fv.stop() raise BlowUp("{}: {} ".format(record.levelname, record.msg)) ch = TestingHandler(tester) logging.getLogger().addHandler(ch) - request.addfinalizer(lambda: logging.getLogger().removeHandler(ch)) + def cleanup(): + logging.getLogger().removeHandler(ch) + + request.addfinalizer(cleanup) config = getConfig(tdir) for k, v in overriddenConfigValues.items(): setattr(config, k, v) @@ -209,7 +292,7 @@ def ready(looper, keySharedNodes): @pytest.fixture(scope="module") def up(looper, ready): - ensureElectionsDone(looper=looper, nodes=ready, retryWait=1, timeout=30) + ensureElectionsDone(looper=looper, nodes=ready) # noinspection PyIncorrectDocstring @@ -218,7 +301,8 @@ def ensureView(nodeSet, looper, up): """ Ensure that all the nodes in the nodeSet are in the same view. """ - return looper.run(eventually(checkViewNoForNodes, nodeSet, timeout=3)) + + return waitForViewChange(looper, nodeSet) @pytest.fixture("module") @@ -229,7 +313,9 @@ def delayedPerf(nodeSet): @pytest.fixture(scope="module") def clientAndWallet1(looper, nodeSet, tdir, up): - return genTestClient(nodeSet, tmpdir=tdir) + client, wallet = genTestClient(nodeSet, tmpdir=tdir) + yield client, wallet + client.stop() @pytest.fixture(scope="module") @@ -260,18 +346,24 @@ def sent1(client1, request1): @pytest.fixture(scope="module") def reqAcked1(looper, nodeSet, client1, sent1, faultyNodes): + + numerOfNodes = len(nodeSet) + + # Wait until request received by all nodes + propTimeout = waits.expectedClientRequestPropagationTime(numerOfNodes) coros = [partial(checkLastClientReqForNode, node, sent1) for node in nodeSet] looper.run(eventuallyAll(*coros, - totalTimeout=10, + totalTimeout=propTimeout, acceptableFails=faultyNodes)) + # Wait until sufficient number of acks received coros2 = [partial(checkReqAck, client1, node, sent1.identifier, sent1.reqId) for node in nodeSet] + ackTimeout = waits.expectedReqAckQuorumTime() looper.run(eventuallyAll(*coros2, - totalTimeout=5, + totalTimeout=ackTimeout, acceptableFails=faultyNodes)) - return sent1 @@ -328,31 +420,33 @@ def prepared1(looper, nodeSet, client1, preprepared1, faultyNodes): @pytest.fixture(scope="module") def committed1(looper, nodeSet, client1, prepared1, faultyNodes): - checkCommited(looper, - nodeSet, - prepared1, - range(getNoInstances(len(nodeSet))), - faultyNodes) + checkCommitted(looper, + nodeSet, + prepared1, + range(getNoInstances(len(nodeSet))), + faultyNodes) return prepared1 @pytest.fixture(scope="module") def replied1(looper, nodeSet, client1, committed1, wallet1, faultyNodes): + numOfNodes = len(nodeSet) + numOfInstances = getNoInstances(numOfNodes) + quorum = numOfInstances * (numOfNodes - faultyNodes) def checkOrderedCount(): - instances = getNoInstances(len(nodeSet)) - resp = [requestReturnedToNode(node, wallet1.defaultId, - committed1.reqId, instId) for - node in nodeSet for instId in range(instances)] - assert resp.count(True) >= (len(nodeSet) - faultyNodes)*instances - - looper.run(eventually(checkOrderedCount, retryWait=1, timeout=30)) - looper.run(eventually( - checkSufficientRepliesRecvd, - client1.inBox, - committed1.reqId, - getMaxFailures(len(nodeSet)), - retryWait=2, - timeout=30)) + resp = [requestReturnedToNode(node, + wallet1.defaultId, + committed1.reqId, + instId) + for node in nodeSet for instId in range(numOfInstances)] + assert resp.count(True) >= quorum + + orderingTimeout = waits.expectedOrderingTime(numOfInstances) + looper.run(eventually(checkOrderedCount, + retryWait=1, + timeout=orderingTimeout)) + + waitForSufficientRepliesForRequests(looper, client1, requests=[committed1]) return committed1 @@ -402,12 +496,13 @@ def nodeAndClientInfoFilePath(dirName): @pytest.fixture(scope="module") def poolTxnData(nodeAndClientInfoFilePath): - data = json.loads(open(nodeAndClientInfoFilePath).read().strip()) - for txn in data["txns"]: - if txn[TXN_TYPE] == NODE: - txn[DATA][NODE_PORT] = genHa()[1] - txn[DATA][CLIENT_PORT] = genHa()[1] - return data + with open(nodeAndClientInfoFilePath) as f: + data = json.loads(f.read().strip()) + for txn in data["txns"]: + if txn[TXN_TYPE] == NODE: + txn[DATA][NODE_PORT] = genHa()[1] + txn[DATA][CLIENT_PORT] = genHa()[1] + return data @pytest.fixture(scope="module") @@ -447,7 +542,8 @@ def tdirWithDomainTxns(poolTxnData, tdir, tconf, domainTxnOrderedFields): def tdirWithNodeKeepInited(tdir, poolTxnData, poolTxnNodeNames): seeds = poolTxnData["seeds"] for nName in poolTxnNodeNames: - initLocalKeep(nName, tdir, seeds[nName], override=True) + seed = seeds[nName] + initNodeKeysForBothStacks(nName, tdir, seed, override=True) @pytest.fixture(scope="module") @@ -496,16 +592,19 @@ def txnPoolNodeSet(patchPluginManager, allPluginsPath, tdirWithNodeKeepInited, testNodeClass): - nodes = [] - for nm in poolTxnNodeNames: - node = testNodeClass(nm, basedirpath=tdirWithPoolTxns, - config=tconf, pluginPaths=allPluginsPath) - txnPoolNodesLooper.add(node) - nodes.append(node) - txnPoolNodesLooper.run(checkNodesConnected(nodes)) - ensureElectionsDone(looper=txnPoolNodesLooper, nodes=nodes, retryWait=1, - timeout=20) - return nodes + with ExitStack() as exitStack: + nodes = [] + for nm in poolTxnNodeNames: + node = exitStack.enter_context( + testNodeClass(nm, + basedirpath=tdirWithPoolTxns, + config=tconf, + pluginPaths=allPluginsPath)) + txnPoolNodesLooper.add(node) + nodes.append(node) + txnPoolNodesLooper.run(checkNodesConnected(nodes)) + ensureElectionsDone(looper=txnPoolNodesLooper, nodes=nodes) + yield nodes @pytest.fixture(scope="module") @@ -582,6 +681,9 @@ def testNode(pluginManager, tdir): name = randomText(20) nodeReg = genNodeReg(names=[name]) ha, cliname, cliha = nodeReg[name] - return TestNode(name=name, ha=ha, cliname=cliname, cliha=cliha, + node = TestNode(name=name, ha=ha, cliname=cliname, cliha=cliha, nodeRegistry=copy(nodeReg), basedirpath=tdir, - primaryDecider=None, pluginPaths=None) + primaryDecider=None, pluginPaths=None, seed=randomSeed()) + node.start(None) + yield node + node.stop() diff --git a/plenum/test/helper.py b/plenum/test/helper.py index b1fcc63ee6..bea42cdf94 100644 --- a/plenum/test/helper.py +++ b/plenum/test/helper.py @@ -1,3 +1,4 @@ +import itertools import os import random import string @@ -7,28 +8,25 @@ from shutil import copyfile from sys import executable from time import sleep -from typing import Tuple, Iterable, Dict, Optional, NamedTuple,\ - List, Any, Sequence -from typing import Union - -import itertools from psutil import Popen - -from plenum.common.config_util import getConfig -from plenum.config import poolTransactionsFile, domainTransactionsFile -from raet.raeting import TrnsKind, PcktKind +from typing import Tuple, Iterable, Dict, Optional, NamedTuple, \ + List, Any, Sequence +from typing import Union from plenum.client.client import Client from plenum.client.wallet import Wallet -from plenum.common.eventually import eventually, eventuallyAll -from plenum.common.log import getlogger -from plenum.common.looper import Looper +from stp_core.common.log import getlogger +from stp_core.loop.looper import Looper from plenum.common.request import Request from plenum.common.constants import REPLY, REQACK, TXN_ID, REQNACK, OP_FIELD_NAME from plenum.common.types import Reply, f, PrePrepare from plenum.common.util import getMaxFailures, \ - checkIfMoreThanFSameItems, checkPortAvailable + checkIfMoreThanFSameItems +from plenum.config import poolTransactionsFile, domainTransactionsFile +from stp_core.loop.eventually import eventuallyAll, eventually + +from stp_core.network.util import checkPortAvailable from plenum.server.node import Node from plenum.test.msgs import randomMsg from plenum.test.spy_helpers import getLastClientReqReceivedForNode, getAllArgs, \ @@ -36,17 +34,13 @@ from plenum.test.test_client import TestClient, genTestClient from plenum.test.test_node import TestNode, TestReplica, TestNodeSet, \ checkPoolReady, checkNodesConnected, ensureElectionsDone, NodeRef +from plenum.test import waits + DelayRef = NamedTuple("DelayRef", [ ("op", Optional[str]), ("frm", Optional[str])]) -RaetDelay = NamedTuple("RaetDelay", [ - ("tk", Optional[TrnsKind]), - ("pk", Optional[PcktKind]), - ("fromPort", Optional[int])]) - - logger = getlogger() @@ -58,9 +52,22 @@ def ordinal(n): n, "tsnrhtdd"[(n / 10 % 10 != 1) * (n % 10 < 4) * n % 10::4]) -def checkSufficientRepliesRecvd(receivedMsgs: Iterable, reqId: int, - fValue: int): - receivedReplies = getRepliesFromClientInbox(receivedMsgs, reqId) +def checkSufficientRepliesReceived(receivedMsgs: Iterable, + reqId: int, + fValue: int): + """ + Checks number of replies for request with specified id in given inbox and + if this number is lower than number of malicious nodes (fValue) - + raises exception + + If you do not need response ponder on using + waitForSufficientRepliesForRequests instead + + :returns: response for request + """ + + receivedReplies = getRepliesFromClientInbox(inbox=receivedMsgs, + reqId=reqId) logger.debug("received replies for reqId {}: {}". format(reqId, receivedReplies)) assert len(receivedReplies) > fValue, "Received {} replies but expected " \ @@ -75,30 +82,60 @@ def checkSufficientRepliesRecvd(receivedMsgs: Iterable, reqId: int, # TODO add test case for what happens when replies don't have the same data -def checkSufficientRepliesForRequests(looper, client, requests, fVal=None, - timeoutPerReq=None): +def waitForSufficientRepliesForRequests(looper, + client, + *, # To force usage of names + requests = None, + requestIds = None, + fVal=None, + customTimeoutPerReq=None): + """ + Checks number of replies for given requests of specific client and + raises exception if quorum not reached at least for one + + :requests: list of requests; mutually exclusive with 'requestIds' + :requestIds: list of request ids; mutually exclusive with 'requests' + :returns: nothing + """ + + if requests is not None and requestIds is not None: + raise ValueError("Args 'requests' and 'requestIds' are " + "mutually exclusive") + requestIds = requestIds or [request.reqId for request in requests] + nodeCount = len(client.nodeReg) fVal = fVal or getMaxFailures(nodeCount) - timeoutPerReq = timeoutPerReq or 5 * nodeCount + + timeoutPerRequest = customTimeoutPerReq or \ + waits.expectedTransactionExecutionTime(nodeCount) + + totalTimeout = timeoutPerRequest * len(requestIds) + coros = [] - for request in requests: - coros.append(partial(checkSufficientRepliesRecvd, client.inBox, - request.reqId, fVal)) - looper.run(eventuallyAll(*coros, retryWait=1, - totalTimeout=timeoutPerReq * len(requests))) + for requestId in requestIds: + coros.append(partial(checkSufficientRepliesReceived, + client.inBox, + requestId, + fVal)) + looper.run(eventuallyAll(*coros, + retryWait=1, + totalTimeout=totalTimeout)) -def sendReqsToNodesAndVerifySuffReplies(looper: Looper, wallet: Wallet, + +def sendReqsToNodesAndVerifySuffReplies(looper: Looper, + wallet: Wallet, client: TestClient, - numReqs: int, fVal: int=None, - timeoutPerReq: float=None): + numReqs: int, + fVal: int=None, + customTimeoutPerReq: float=None): nodeCount = len(client.nodeReg) fVal = fVal or getMaxFailures(nodeCount) - timeoutPerReq = timeoutPerReq or 5 * nodeCount - requests = sendRandomRequests(wallet, client, numReqs) - checkSufficientRepliesForRequests(looper, client, requests, fVal, - timeoutPerReq) + waitForSufficientRepliesForRequests(looper, client, + requests=requests, + customTimeoutPerReq=customTimeoutPerReq, + fVal=fVal) return requests @@ -125,7 +162,7 @@ def getRepliesFromClientInbox(inbox, reqId) -> list: def checkLastClientReqForNode(node: TestNode, expectedRequest: Request): recvRequest = getLastClientReqReceivedForNode(node) assert recvRequest - assert expectedRequest.__dict__ == recvRequest.__dict__ + assert expectedRequest.as_dict == recvRequest.as_dict # noinspection PyIncorrectDocstring @@ -148,18 +185,10 @@ def assertEquality(observed: Any, expected: Any): "was {}".format(observed, expected) -def checkNodesReadyForRequest(looper: Looper, nodes: Sequence[TestNode], - timeout: int = 20): - checkPoolReady(looper, nodes, timeout) - # checkNodesCanRespondToClients(nodes) - - def setupNodesAndClient(looper: Looper, nodes: Sequence[TestNode], nodeReg=None, tmpdir=None): looper.run(checkNodesConnected(nodes)) - timeout = 15 + 2 * (len(nodes)) - ensureElectionsDone(looper=looper, nodes=nodes, retryWait=1, - timeout=timeout) + ensureElectionsDone(looper=looper, nodes=nodes) return setupClient(looper, nodes, nodeReg=nodeReg, tmpdir=tmpdir) @@ -218,18 +247,6 @@ async def aSetupClient(looper: Looper, return client1 -def getPrimaryReplica(nodes: Sequence[TestNode], - instId: int = 0) -> TestReplica: - preplicas = [node.replicas[instId] for node in nodes if - node.replicas[instId].isPrimary] - if len(preplicas) > 1: - raise RuntimeError('More than one primary node found') - elif len(preplicas) < 1: - raise RuntimeError('No primary node found') - else: - return preplicas[0] - - def randomOperation(): return { "type": "buy", @@ -257,25 +274,40 @@ async def msgAll(nodes: TestNodeSet): # test sending messages from every node to every other node # TODO split send and check so that the messages can be sent concurrently for p in permutations(nodes.nodeNames, 2): - await sendMsgAndCheck(nodes, p[0], p[1], timeout=3) + await sendMessageAndCheckDelivery(nodes, p[0], p[1]) + +async def sendMessageAndCheckDelivery(nodes: TestNodeSet, + frm: NodeRef, + to: NodeRef, + msg: Optional[Tuple]=None, + customTimeout=None): + """ + Sends message from one node to another and checks that it was delivered + + :param nodes: + :param frm: sender + :param to: recepient + :param msg: optional message - by default random one generated + :param customTimeout: + :return: + """ -async def sendMsgAndCheck(nodes: TestNodeSet, - frm: NodeRef, - to: NodeRef, - msg: Optional[Tuple]=None, - timeout: Optional[int]=15 - ): logger.debug("Sending msg from {} to {}".format(frm, to)) msg = msg if msg else randomMsg() - frmnode = nodes.getNode(frm) - rid = frmnode.nodestack.getRemote(nodes.getNodeName(to)).uid - frmnode.nodestack.send(msg, rid) - await eventually(checkMsg, msg, nodes, to, retryWait=.1, timeout=timeout, + sender = nodes.getNode(frm) + rid = sender.nodestack.getRemote(nodes.getNodeName(to)).uid + sender.nodestack.send(msg, rid) + + timeout = customTimeout or waits.expectedNodeToNodeMessageDeliveryTime() + + await eventually(checkMessageReceived, msg, nodes, to, + retryWait=.1, + timeout=timeout, ratchetSteps=10) -def checkMsg(msg, nodes, to, method: str = None): +def checkMessageReceived(msg, nodes, to, method: str = None): allMsgs = nodes.getAllMsgReceived(to, method) assert msg in allMsgs @@ -386,6 +418,11 @@ def checkReplyCount(client, idr, reqId, count): senders.add(sdr) assertLength(senders, count) +def waitReplyCount(looper, client, idr, reqId, count): + numOfNodes = len(client.nodeReg) + timeout = waits.expectedTransactionExecutionTime(numOfNodes) + looper.run(eventually(checkReplyCount, client, idr, reqId, count, + timeout=timeout)) def checkReqNackWithReason(client, reason: str, sender: str): found = False @@ -400,10 +437,12 @@ def checkReqNackWithReason(client, reason: str, sender: str): def checkViewNoForNodes(nodes: Iterable[TestNode], expectedViewNo: int = None): """ Checks if all the given nodes have the expected view no + :param nodes: The nodes to check for :param expectedViewNo: the view no that the nodes are expected to have :return: """ + viewNos = set() for node in nodes: logger.debug("{}'s view no is {}".format(node, node.viewNo)) @@ -411,10 +450,24 @@ def checkViewNoForNodes(nodes: Iterable[TestNode], expectedViewNo: int = None): assert len(viewNos) == 1 vNo, = viewNos if expectedViewNo: - assert vNo == expectedViewNo + assert vNo == expectedViewNo, ','.join(['{} -> Ratio: {}'.format( + node.name, node.monitor.masterThroughputRatio()) for node in nodes]) return vNo +def waitForViewChange(looper, nodeSet, expectedViewNo=None, customTimeout = None): + """ + Waits for nodes to come to same view. + Raises exception when time is out + """ + + timeout = customTimeout or waits.expectedViewChangeTime(len(nodeSet)) + return looper.run(eventually(checkViewNoForNodes, + nodeSet, + expectedViewNo, + timeout=timeout)) + + def getNodeSuspicions(node: TestNode, code: int = None): params = getAllArgs(node, TestNode.reportSuspiciousNode) if params and code is not None: @@ -434,6 +487,14 @@ def checkDiscardMsg(processors, discardedMsg, assert reasonRegexp in last['reason'] +def countDiscarded(processor, reasonPat): + c = 0 + for entry in processor.spylog.getAll(processor.discard): + if 'reason' in entry.params and reasonPat in entry.params['reason']: + c += 1 + return c + + def filterNodeSet(nodeSet, exclude: List[Union[str, Node]]): """ Return a set of nodes with the nodes in exclude removed. @@ -475,22 +536,6 @@ def checkAllLedgersEqual(*ledgers): checkLedgerEquality(l1, l2) -def createClientSendMessageAndRemove(looper, nodeSet, tdir, wallet, name=None, - tries=None, sighex=None): - client, _ = genTestClient(nodeSet, tmpdir=tdir, name=name, sighex=sighex) - clientSendMessageAndRemove(client, looper, wallet, tries) - return client - - -def clientSendMessageAndRemove(client, looper, wallet, tries=None): - looper.add(client) - looper.run(client.ensureConnectedToNodes()) - clientInboxSize = len(client.inBox) - sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 1, tries) - assert len(client.inBox) > clientInboxSize - looper.removeProdable(client) - - def randomText(size): return ''.join(random.choice(string.ascii_letters) for _ in range(size)) @@ -530,17 +575,17 @@ def stopNodes(nodes: List[TestNode], looper=None, ensurePortsFreedUp=True): if ensurePortsFreedUp: ports = [[n.nodestack.ha[1], n.clientstack.ha[1]] for n in nodes] - waitUntillPortIsAvailable(looper, ports) + waitUntilPortIsAvailable(looper, ports) -def waitUntillPortIsAvailable(looper, ports): +def waitUntilPortIsAvailable(looper, ports, timeout=5): ports = itertools.chain(*ports) def chk(): for port in ports: checkPortAvailable(("", port)) - looper.run(eventually(chk, retryWait=.5)) + looper.run(eventually(chk, retryWait=.5, timeout=timeout)) def run_script(script, *args): @@ -553,3 +598,22 @@ def run_script(script, *args): p.send_signal(SIGINT) p.wait(timeout=1) assert p.poll() == 0, 'script failed' + +def viewNoForNodes(nodes): + viewNos = {node.viewNo for node in nodes} + assert 1 == len(viewNos) + return next(iter(viewNos)) + + +def primaryNodeNameForInstance(nodes, instanceId): + primaryNames = {node.replicas[instanceId].primaryName for node in nodes} + assert 1 == len(primaryNames) + primaryReplicaName = next(iter(primaryNames)) + return primaryReplicaName[:-2] + + +def nodeByName(nodes, name): + for node in nodes: + if node.name == name: + return node + raise Exception("Node with the name '{}' has not been found.".format(name)) \ No newline at end of file diff --git a/plenum/test/instances/test_commit_digest.py b/plenum/test/instances/test_commit_digest.py index 1ac285773a..e4d45fe8ca 100644 --- a/plenum/test/instances/test_commit_digest.py +++ b/plenum/test/instances/test_commit_digest.py @@ -2,14 +2,16 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.types import Commit from plenum.common.util import adict from plenum.server.suspicion_codes import Suspicions -from plenum.test.helper import getPrimaryReplica, getNodeSuspicions +from plenum.test.helper import getNodeSuspicions from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ send3PhaseMsgWithIncorrectDigest -from plenum.test.test_node import getNonPrimaryReplicas +from plenum.test.test_node import getNonPrimaryReplicas, getPrimaryReplica +from plenum.test import waits + whitelist = [Suspicions.CM_DIGEST_WRONG.reason, 'cannot process incoming COMMIT'] @@ -48,4 +50,6 @@ def chkSusp(): Suspicions.CM_DIGEST_WRONG.code) assert len(susps) == 1 - looper.run(eventually(chkSusp, retryWait=1, timeout=20)) + numOfNodes = len(primaryRep.node.nodeReg) + timeout = waits.expectedTransactionExecutionTime(numOfNodes) + looper.run(eventually(chkSusp, retryWait=1, timeout=timeout)) diff --git a/plenum/test/instances/test_instance_cannot_become_active_with_less_than_four_servers.py b/plenum/test/instances/test_instance_cannot_become_active_with_less_than_four_servers.py index 333082c58d..2328fa93bf 100644 --- a/plenum/test/instances/test_instance_cannot_become_active_with_less_than_four_servers.py +++ b/plenum/test/instances/test_instance_cannot_become_active_with_less_than_four_servers.py @@ -1,14 +1,17 @@ from typing import Iterable -from plenum.common.eventually import eventually -from plenum.common.log import getlogger -from plenum.common.looper import Looper +import pytest +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger +from stp_core.loop.looper import Looper from plenum.common.startable import Status from plenum.test.greek import genNodeNames from plenum.test.helper import addNodeBack, ordinal from plenum.test.test_node import TestNodeSet, checkNodesConnected, \ checkNodeRemotes from plenum.test.test_stack import CONNECTED, JOINED_NOT_ALLOWED +from plenum.test import waits + whitelist = ['discarding message'] @@ -16,6 +19,7 @@ # noinspection PyIncorrectDocstring +@pytest.mark.skip(reason="SOV-940") def testProtocolInstanceCannotBecomeActiveWithLessThanFourServers( tdir_for_func): """ @@ -31,8 +35,8 @@ def testProtocolInstanceCannotBecomeActiveWithLessThanFourServers( with TestNodeSet(names=nodeNames, tmpdir=tdir_for_func) as nodeSet: with Looper(nodeSet) as looper: - for n in nodeSet: - n.startKeySharing() + # for n in nodeSet: + # n.startKeySharing() # helpers @@ -52,10 +56,13 @@ def addNodeBackAndCheck(nodeIdx: int, expectedStatus: Status): logger.info("Add back the {} node and see status of {}". format(ordinal(nodeIdx + 1), expectedStatus)) addNodeBack(nodeSet, looper, nodeNames[nodeIdx]) - looper.run( - eventually(checkNodeStatusRemotesAndF, expectedStatus, - nodeIdx, - retryWait=1, timeout=30)) + + timeout = waits.expectedNodeStartUpTimeout() + \ + waits.expectedNodeInterconnectionTime(len(nodeSet)) + looper.run(eventually(checkNodeStatusRemotesAndF, + expectedStatus, + nodeIdx, + retryWait=1, timeout=timeout)) # tests @@ -67,6 +74,8 @@ def addNodeBackAndCheck(nodeIdx: int, expectedStatus: Status): looper.removeProdable(nodeSet.nodes[n]) nodeSet.removeNode(n, shouldClean=False) + looper.runFor(10) + logger.debug("Add nodes back one at a time") for i in range(nodeCount): nodes = i + 1 diff --git a/plenum/test/instances/test_msgs_from_slow_instances.py b/plenum/test/instances/test_msgs_from_slow_instances.py index 82ff6b63cd..f30d2aa4e6 100644 --- a/plenum/test/instances/test_msgs_from_slow_instances.py +++ b/plenum/test/instances/test_msgs_from_slow_instances.py @@ -1,10 +1,12 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.types import Commit from plenum.server.replica import Replica from plenum.test.delayers import delayerMsgTuple from plenum.test.test_node import TestNode +from plenum.test import waits + nodeCount = 4 @@ -39,4 +41,6 @@ def checkPresence(): assert len(commReqs) > 0 assert Replica.generateName(A.name, 1) in commReqs[0][0] - looper.run(eventually(checkPresence, retryWait=.5, timeout=10)) + numOfNodes = len(configNodeSet.nodes) + timeout = waits.expectedClientRequestPropagationTime(numOfNodes) + looper.run(eventually(checkPresence, retryWait=.5, timeout=timeout)) diff --git a/plenum/test/instances/test_multiple_commit.py b/plenum/test/instances/test_multiple_commit.py index 477e085065..7d87937da2 100644 --- a/plenum/test/instances/test_multiple_commit.py +++ b/plenum/test/instances/test_multiple_commit.py @@ -2,15 +2,15 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.types import Commit from plenum.common.util import adict from plenum.server.suspicion_codes import Suspicions -from plenum.test.helper import getPrimaryReplica, \ - getNodeSuspicions, whitelistNode +from plenum.test.helper import getNodeSuspicions, whitelistNode from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ sendDuplicate3PhaseMsg -from plenum.test.test_node import getNonPrimaryReplicas +from plenum.test.test_node import getNonPrimaryReplicas, getPrimaryReplica +from plenum.test import waits whitelist = [Suspicions.DUPLICATE_CM_SENT.reason, 'cannot process incoming COMMIT'] @@ -58,4 +58,6 @@ def chkSusp(): Suspicions.DUPLICATE_CM_SENT.code)) \ == 2 - looper.run(eventually(chkSusp, retryWait=1, timeout=20)) + numOfNodes = len(primaryRep.node.nodeReg) + timeout = waits.expectedTransactionExecutionTime(numOfNodes) + looper.run(eventually(chkSusp, retryWait=1, timeout=timeout)) diff --git a/plenum/test/instances/test_multiple_instance_change_msgs.py b/plenum/test/instances/test_multiple_instance_change_msgs.py index 5200116c0f..8aae93d9e2 100644 --- a/plenum/test/instances/test_multiple_instance_change_msgs.py +++ b/plenum/test/instances/test_multiple_instance_change_msgs.py @@ -1,17 +1,19 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.exceptions import SuspiciousNode from plenum.common.types import InstanceChange from plenum.server.node import Node from plenum.server.suspicion_codes import Suspicions from plenum.test.helper import getNodeSuspicions from plenum.test.spy_helpers import getAllArgs +from plenum.test import waits + nodeCount = 7 -@pytest.mark.xfail(reason="Not yet implemented") +@pytest.mark.skip(reason="Not yet implemented") def testMultipleInstanceChangeMsgsMarkNodeAsSuspicious(looper, nodeSet, up): maliciousNode = nodeSet.Alpha for i in range(0, 5): @@ -25,8 +27,11 @@ def chk(instId): for arg in args: assert arg['frm'] == maliciousNode.name + numOfNodes = len(nodeSet) + instanceChangeTimeout = waits.expectedViewChangeTime(numOfNodes) + for i in range(0, 5): - looper.run(eventually(chk, i, retryWait=1, timeout=20)) + looper.run(eventually(chk, i, retryWait=1, timeout=instanceChangeTimeout)) def g(): for node in nodeSet: @@ -34,7 +39,10 @@ def g(): frm, reason, code = getAllArgs(node, Node.reportSuspiciousNode) assert frm == maliciousNode.name assert isinstance(reason, SuspiciousNode) - assert len(getNodeSuspicions(node, - Suspicions.FREQUENT_INST_CHNG.code)) == 13 + suspectingNodes = \ + getNodeSuspicions(node, + Suspicions.FREQUENT_INST_CHNG.code) + assert len(suspectingNodes) == 13 - looper.run(eventually(g, retryWait=1, timeout=20)) + timeout = waits.expectedTransactionExecutionTime(numOfNodes) + looper.run(eventually(g, retryWait=1, timeout=timeout)) diff --git a/plenum/test/instances/test_multiple_pre_prepare.py b/plenum/test/instances/test_multiple_pre_prepare.py index 87f8d7947f..d6572cf994 100644 --- a/plenum/test/instances/test_multiple_pre_prepare.py +++ b/plenum/test/instances/test_multiple_pre_prepare.py @@ -2,15 +2,17 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.types import PrePrepare from plenum.common.util import adict from plenum.server.suspicion_codes import Suspicions -from plenum.test.helper import getPrimaryReplica, getNodeSuspicions +from plenum.test.helper import getNodeSuspicions from plenum.test.instances.helper import sentPrepare from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ sendDuplicate3PhaseMsg -from plenum.test.test_node import getNonPrimaryReplicas +from plenum.test.test_node import getNonPrimaryReplicas, getPrimaryReplica +from plenum.test import waits + whitelist = [Suspicions.DUPLICATE_PPR_SENT.reason, 'cannot process incoming PRE-PREPARE', @@ -52,9 +54,15 @@ def chkSusp(): for r in nonPrimaryReps: # Every node with non primary replicas of instance 0 should raise # suspicion twice, once for each extra PRE-PREPARE request - assert len(getNodeSuspicions(r.node, - Suspicions.DUPLICATE_PPR_SENT.code)) == 2 + + suspectingNodes = \ + getNodeSuspicions(r.node, + Suspicions.DUPLICATE_PPR_SENT.code) + assert len(suspectingNodes) == 2 + # Each non primary replica should just send one PREPARE assert len(sentPrepare(r)) == 1 - looper.run(eventually(chkSusp, retryWait=1, timeout=20)) + numOfNodes = len(primaryRep.node.nodeReg) + timeout = waits.expectedTransactionExecutionTime(numOfNodes) + looper.run(eventually(chkSusp, retryWait=1, timeout=timeout)) diff --git a/plenum/test/instances/test_multiple_prepare.py b/plenum/test/instances/test_multiple_prepare.py index d35a4f107c..7ef9080d82 100644 --- a/plenum/test/instances/test_multiple_prepare.py +++ b/plenum/test/instances/test_multiple_prepare.py @@ -2,15 +2,15 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.types import Prepare from plenum.common.util import adict from plenum.server.suspicion_codes import Suspicions -from plenum.test.helper import getPrimaryReplica, \ - getNodeSuspicions, whitelistNode +from plenum.test.helper import getNodeSuspicions, whitelistNode from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ sendDuplicate3PhaseMsg -from plenum.test.test_node import getNonPrimaryReplicas +from plenum.test.test_node import getNonPrimaryReplicas, getPrimaryReplica +from plenum.test import waits whitelist = [Suspicions.DUPLICATE_PR_SENT.reason, 'Invalid prepare message received', @@ -58,8 +58,12 @@ def chkSusp(): # Every node except the one from which duplicate PREPARE was # sent should raise suspicion twice, once for each extra # PREPARE request - assert len(getNodeSuspicions(r.node, - Suspicions.DUPLICATE_PR_SENT.code)) \ - == 2 - looper.run(eventually(chkSusp, retryWait=1, timeout=20)) + suspectingNodes = \ + getNodeSuspicions(r.node, + Suspicions.DUPLICATE_PR_SENT.code) + assert len(suspectingNodes) == 2 + + numOfNodes = len(primaryRep.node.nodeReg) + timeout = waits.expectedTransactionExecutionTime(numOfNodes) + looper.run(eventually(chkSusp, retryWait=1, timeout=timeout)) diff --git a/plenum/test/instances/test_pre_prepare_digest.py b/plenum/test/instances/test_pre_prepare_digest.py index 2331fd5cac..e908bacdf9 100644 --- a/plenum/test/instances/test_pre_prepare_digest.py +++ b/plenum/test/instances/test_pre_prepare_digest.py @@ -2,15 +2,17 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.types import PrePrepare from plenum.common.util import adict from plenum.server.suspicion_codes import Suspicions -from plenum.test.helper import getPrimaryReplica, getNodeSuspicions +from plenum.test.helper import getNodeSuspicions from plenum.test.instances.helper import sentPrepare from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ send3PhaseMsgWithIncorrectDigest -from plenum.test.test_node import getNonPrimaryReplicas +from plenum.test.test_node import getNonPrimaryReplicas, getPrimaryReplica +from plenum.test import waits + whitelist = [Suspicions.PPR_DIGEST_WRONG.reason, 'cannot process incoming PRE-PREPARE'] @@ -46,4 +48,6 @@ def chkSusp(): # No non primary replica should send any PREPARE assert len(sentPrepare(r)) == 0 - looper.run(eventually(chkSusp, retryWait=1, timeout=20)) + numOfNodes = len(primaryRep.node.nodeReg) + timeout = waits.expectedTransactionExecutionTime(numOfNodes) + looper.run(eventually(chkSusp, retryWait=1, timeout=timeout)) diff --git a/plenum/test/instances/test_prepare_digest.py b/plenum/test/instances/test_prepare_digest.py index ad6d2b2dd0..fd56d07ed7 100644 --- a/plenum/test/instances/test_prepare_digest.py +++ b/plenum/test/instances/test_prepare_digest.py @@ -2,14 +2,16 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.types import Prepare from plenum.common.util import adict from plenum.server.suspicion_codes import Suspicions -from plenum.test.helper import getPrimaryReplica, getNodeSuspicions +from plenum.test.helper import getNodeSuspicions from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ send3PhaseMsgWithIncorrectDigest -from plenum.test.test_node import getNonPrimaryReplicas +from plenum.test.test_node import getNonPrimaryReplicas, getPrimaryReplica +from plenum.test import waits + whitelist = [Suspicions.PR_DIGEST_WRONG.reason, 'Invalid prepare message received', @@ -54,4 +56,6 @@ def chkSusp(): assert len(getNodeSuspicions(r.node, Suspicions.PR_DIGEST_WRONG.code)) == 1 - looper.run(eventually(chkSusp, retryWait=1, timeout=20)) + numOfNodes = len(primaryRep.node.nodeReg) + timeout = waits.expectedTransactionExecutionTime(numOfNodes) + looper.run(eventually(chkSusp, retryWait=1, timeout=timeout)) diff --git a/plenum/test/malicious_behaviors_client.py b/plenum/test/malicious_behaviors_client.py index 3f306606ba..6fc4892b9c 100644 --- a/plenum/test/malicious_behaviors_client.py +++ b/plenum/test/malicious_behaviors_client.py @@ -30,7 +30,7 @@ def inner(client: Client) -> Client: ovrdRids = [rid for rid in client.nodestack.remotes.keys() if rid not in skipIds] else: - ovrdRids = client.nodestack.remotes.keys()[skipCount:] + ovrdRids = list(client.nodestack.remotes.keys())[skipCount:] def evilSend(self, msg, *rids, signer=None) -> None: logger.debug("EVIL: sending to less nodes {}, ignoring passed " diff --git a/plenum/test/malicious_behaviors_node.py b/plenum/test/malicious_behaviors_node.py index b83e23e0f3..dd18358fee 100644 --- a/plenum/test/malicious_behaviors_node.py +++ b/plenum/test/malicious_behaviors_node.py @@ -11,7 +11,7 @@ from plenum.common import util from plenum.common.util import updateNamedTuple -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.server.replica import TPCStat from plenum.test.helper import TestReplica from plenum.test.test_node import TestNode, TestReplica @@ -27,11 +27,13 @@ def makeNodeFaulty(node, *behaviors): def changesRequest(node): def evilCreatePropagate(self, - request: Request, clientName: str) -> Propagate: + request: Request, identifier: str) -> Propagate: logger.debug("EVIL: Creating propagate request for client request {}". format(request)) request.operation["amount"] += random.random() - return Propagate(request.__getstate__(), clientName) + if isinstance(identifier, bytes): + identifier = identifier.decode() + return Propagate(request.__getstate__(), identifier) evilMethod = types.MethodType(evilCreatePropagate, node) node.createPropagate = evilMethod diff --git a/plenum/test/monitoring/conftest.py b/plenum/test/monitoring/conftest.py index 5d81857ae2..a6abd51536 100644 --- a/plenum/test/monitoring/conftest.py +++ b/plenum/test/monitoring/conftest.py @@ -1,7 +1,7 @@ import pytest -from plenum.common.eventually import eventually -from plenum.test.helper import sendRandomRequest, checkSufficientRepliesRecvd +from plenum.test.helper import sendRandomRequest, \ + waitForSufficientRepliesForRequests @pytest.fixture(scope="module") @@ -9,7 +9,7 @@ def requests(looper, wallet1, client1): requests = [] for i in range(5): req = sendRandomRequest(wallet1, client1) - looper.run(eventually(checkSufficientRepliesRecvd, client1.inBox, req.reqId, 1, - retryWait=1, timeout=5)) + waitForSufficientRepliesForRequests(looper, client1, + requests=[req], fVal=1) requests.append(req) - return requests \ No newline at end of file + return requests diff --git a/plenum/test/monitoring/test_avg_latency.py b/plenum/test/monitoring/test_avg_latency.py index ac77b06956..b8550a2b4a 100644 --- a/plenum/test/monitoring/test_avg_latency.py +++ b/plenum/test/monitoring/test_avg_latency.py @@ -1,9 +1,9 @@ -from plenum.common.eventually import eventually -from plenum.common.log import getlogger -from plenum.common.looper import Looper +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger +from stp_core.loop.looper import Looper from plenum.server.node import Node from plenum.test.helper import sendRandomRequest, \ - checkSufficientRepliesRecvd + waitForSufficientRepliesForRequests from plenum.test.test_node import TestNodeSet nodeCount = 4 @@ -20,9 +20,8 @@ def testAvgReqLatency(looper: Looper, nodeSet: TestNodeSet, wallet1, client1): for i in range(5): req = sendRandomRequest(wallet1, client1) - looper.run(eventually(checkSufficientRepliesRecvd, - client1.inBox, req.reqId, 1, - retryWait=1, timeout=5)) + waitForSufficientRepliesForRequests(looper, client1, + requests=[req], fVal=1) for node in nodeSet: # type: Node mLat = node.monitor.getAvgLatencyForClient(wallet1.defaultId, diff --git a/plenum/test/monitoring/test_instance_change_with_Delta.py b/plenum/test/monitoring/test_instance_change_with_Delta.py index 98a06c990d..7bc80697d2 100644 --- a/plenum/test/monitoring/test_instance_change_with_Delta.py +++ b/plenum/test/monitoring/test_instance_change_with_Delta.py @@ -2,13 +2,17 @@ import pytest -from plenum.common.eventually import eventually -from plenum.common.log import getlogger +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger from plenum.common.types import PrePrepare from plenum.common.util import adict from plenum.server.node import Node -from plenum.test.helper import checkViewNoForNodes, \ - getPrimaryReplica, sendReqsToNodesAndVerifySuffReplies +from plenum.test import waits +from plenum.test.helper import waitForViewChange, \ + sendReqsToNodesAndVerifySuffReplies, sendRandomRequests, \ + checkViewNoForNodes +from plenum.test.test_node import getPrimaryReplica + nodeCount = 7 whitelist = ["discarding message"] @@ -47,11 +51,10 @@ def ensureAnotherPerfCheck(): assert cur[c].endtime > previousPerfChecks[c].endtime return cur - perfCheckFreq = max(n.perfCheckFreq for n in nodes) - + timeout = waits.expectedNextPerfCheck(nodes) newPerfChecks = looper.run(eventually(ensureAnotherPerfCheck, retryWait=1, - timeout=perfCheckFreq + 1)) + timeout=timeout)) return newPerfChecks @@ -101,11 +104,11 @@ def step2(step1, looper): def step3(step2): # make P (primary replica on master) faulty, i.e., slow to send PRE-PREPAREs - def by3IfPrePrepare(msg): + def ifPrePrepare(msg): if isinstance(msg, PrePrepare): - return 3 + return 5 - step2.P.outBoxTestStasher.delay(by3IfPrePrepare) + step2.P.outBoxTestStasher.delay(ifPrePrepare) # send requests to client return step2 @@ -118,4 +121,13 @@ def testInstChangeWithLowerRatioThanDelta(looper, step3, wallet1, client1): waitForNextPerfCheck(looper, step3.nodes, step3.perfChecks) # verify all nodes have undergone an instance change - looper.run(eventually(checkViewNoForNodes, step3.nodes, 1, timeout=10)) + for i in range(20): + try: + waitForViewChange(looper, step3.nodes, expectedViewNo=1) + except AssertionError as ex: + # send additional request and check view change + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1) + else: + break + else: + assert False, ex diff --git a/plenum/test/monitoring/test_instance_change_with_req_Lambda.py b/plenum/test/monitoring/test_instance_change_with_req_Lambda.py index b1b05ac98d..20c9644145 100644 --- a/plenum/test/monitoring/test_instance_change_with_req_Lambda.py +++ b/plenum/test/monitoring/test_instance_change_with_req_Lambda.py @@ -2,11 +2,12 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.types import PrePrepare, f from plenum.common.util import adict -from plenum.test.helper import checkViewNoForNodes, getPrimaryReplica, \ +from plenum.test.helper import waitForViewChange, \ sendReqsToNodesAndVerifySuffReplies +from plenum.test.test_node import getPrimaryReplica from plenum.test.spy_helpers import getAllReturnVals nodeCount = 7 @@ -45,9 +46,12 @@ def by65SpecificPrePrepare(msg): return 65 P.outBoxTestStasher.delay(by65SpecificPrePrepare) - - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, - numReqs=5, timeoutPerReq=80) + # TODO select or create a timeout for this case in 'waits' + sendReqsToNodesAndVerifySuffReplies(looper, + wallet1, + client1, + numReqs=5, + customTimeoutPerReq=80) return adict(nodes=startedNodes) @@ -58,5 +62,5 @@ def testInstChangeWithMoreReqLat(looper, setup): node.checkPerformance() assert any(getAllReturnVals(node.monitor, node.monitor.isMasterReqLatencyTooHigh)) - looper.run(eventually(partial(checkViewNoForNodes, nodes, 1), - retryWait=1, timeout=20)) + + waitForViewChange(looper, nodes) diff --git a/plenum/test/monitoring/test_monitoring_params_with_zfn.py b/plenum/test/monitoring/test_monitoring_params_with_zfn.py index 8d1d0ed85e..4e04a667d2 100644 --- a/plenum/test/monitoring/test_monitoring_params_with_zfn.py +++ b/plenum/test/monitoring/test_monitoring_params_with_zfn.py @@ -7,7 +7,7 @@ nodeCount = 7 -def testThroughtputThreshold(nodeSet, requests): +def testThroughputThreshold(nodeSet, requests): for node in nodeSet: # type: Node masterThroughput, avgBackupThroughput = node.monitor.getThroughputs( node.instances.masterId) diff --git a/plenum/test/monitoring/test_post_monitoring_stats.py b/plenum/test/monitoring/test_post_monitoring_stats.py index e2b2f34034..4e64feb5ea 100644 --- a/plenum/test/monitoring/test_post_monitoring_stats.py +++ b/plenum/test/monitoring/test_post_monitoring_stats.py @@ -1,6 +1,6 @@ from plenum.common.config_util import getConfig -from plenum.common.eventually import eventually -from plenum.common.looper import Looper +from stp_core.loop.eventually import eventually +from stp_core.loop.looper import Looper from plenum.server.monitor import Monitor from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies from plenum.test.test_node import TestNodeSet @@ -29,8 +29,11 @@ def testPostingThroughput(postingStatsEnabled, looper: Looper, assert node.monitor.highResThroughput == 0 assert node.monitor.totalRequests == 0 - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, reqCount, nodeSet.f, - timeoutPerReq=20) + sendReqsToNodesAndVerifySuffReplies(looper, + wallet1, + client1, + reqCount, + nodeSet.f) for node in nodeSet: assert len(node.monitor.orderedRequestsInLast) == reqCount @@ -55,7 +58,8 @@ def chk(): assert node.monitor.highResThroughput == 0 assert node.monitor.totalRequests == reqCount - looper.run(eventually(chk, retryWait=1, timeout=10)) + timeout = config.ThroughputWindowSize + looper.run(eventually(chk, retryWait=1, timeout=timeout)) def testPostingLatency(postingStatsEnabled, looper: Looper, @@ -76,9 +80,11 @@ def testPostingLatency(postingStatsEnabled, looper: Looper, assert node.monitor.masterLatency == 0 assert node.monitor.avgBackupLatency == 0 - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, reqCount, - nodeSet.f, - timeoutPerReq=20) + sendReqsToNodesAndVerifySuffReplies(looper, + wallet1, + client1, + reqCount, + nodeSet.f) for node in nodeSet: assert node.monitor.masterLatency > 0 @@ -98,4 +104,5 @@ def chk(): assert node.monitor.masterLatency == 0 assert node.monitor.avgBackupLatency == 0 - looper.run(eventually(chk, retryWait=1, timeout=10)) + timeout = config.LatencyWindowSize + looper.run(eventually(chk, retryWait=1, timeout=timeout)) diff --git a/plenum/test/monitoring/test_throughput.py b/plenum/test/monitoring/test_throughput.py index f7a0c36fbf..415bc8d556 100644 --- a/plenum/test/monitoring/test_throughput.py +++ b/plenum/test/monitoring/test_throughput.py @@ -1,10 +1,10 @@ from typing import Iterable -from plenum.common.eventually import eventually -from plenum.common.log import getlogger +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger from plenum.server.node import Node from plenum.test.helper import sendRandomRequest, \ - checkSufficientRepliesRecvd + waitForSufficientRepliesForRequests nodeCount = 4 @@ -18,9 +18,8 @@ def testThroughput(looper, nodeSet: Iterable[Node], wallet1, client1): """ for i in range(5): req = sendRandomRequest(wallet1, client1) - looper.run(eventually(checkSufficientRepliesRecvd, - client1.inBox, req.reqId, 1, - retryWait=1, timeout=5)) + waitForSufficientRepliesForRequests(looper, client1, + requests=[req], fVal=1) for node in nodeSet: masterThroughput, avgBackupThroughput = node.monitor.getThroughputs(node.instances.masterId) diff --git a/plenum/test/node_catchup/conftest.py b/plenum/test/node_catchup/conftest.py index aa815eb0cb..0f841b76fb 100644 --- a/plenum/test/node_catchup/conftest.py +++ b/plenum/test/node_catchup/conftest.py @@ -1,37 +1,52 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger from plenum.common.util import randomString from plenum.test.conftest import getValueFromModule from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies -from plenum.test.node_catchup.helper import checkNodeLedgersForEquality +from plenum.test.node_catchup.helper import waitNodeLedgersEquality from plenum.test.pool_transactions.helper import \ addNewStewardAndNode, buildPoolClientAndWallet +from plenum.test.pool_transactions.conftest import stewardAndWallet1, \ + steward1, stewardWallet from plenum.test.test_client import TestClient from plenum.test.test_node import checkNodesConnected +def whitelist(): + return ['got error while verifying message'] + + +logger = getlogger() + + +@pytest.yield_fixture(scope="module") +def looper(txnPoolNodesLooper): + yield txnPoolNodesLooper + + @pytest.yield_fixture("module") -def nodeCreatedAfterSomeTxns(txnPoolNodesLooper, txnPoolNodeSet, +def nodeCreatedAfterSomeTxns(looper, txnPoolNodesLooper, txnPoolNodeSet, tdirWithPoolTxns, poolTxnStewardData, tconf, allPluginsPath, request): - # with Looper(debug=True) as looper: client, wallet = buildPoolClientAndWallet(poolTxnStewardData, tdirWithPoolTxns, clientClass=TestClient) - txnPoolNodesLooper.add(client) - txnPoolNodesLooper.run(client.ensureConnectedToNodes()) + looper.add(client) + looper.run(client.ensureConnectedToNodes()) txnCount = getValueFromModule(request, "txnCount", 5) - sendReqsToNodesAndVerifySuffReplies(txnPoolNodesLooper, wallet, client, - txnCount, timeoutPerReq=25) - + sendReqsToNodesAndVerifySuffReplies(txnPoolNodesLooper, + wallet, + client, + txnCount) newStewardName = randomString() newNodeName = "Epsilon" newStewardClient, newStewardWallet, newNode = addNewStewardAndNode( - txnPoolNodesLooper, client, wallet, newStewardName, newNodeName, + looper, client, wallet, newStewardName, newNodeName, tdirWithPoolTxns, tconf, allPluginsPath=allPluginsPath, autoStart=True) - yield txnPoolNodesLooper, newNode, client, wallet, newStewardClient, \ - newStewardWallet + yield looper, newNode, client, wallet, newStewardClient, \ + newStewardWallet @pytest.fixture("module") @@ -39,7 +54,7 @@ def nodeSetWithNodeAddedAfterSomeTxns(txnPoolNodeSet, nodeCreatedAfterSomeTxns): looper, newNode, client, wallet, newStewardClient, newStewardWallet = \ nodeCreatedAfterSomeTxns txnPoolNodeSet.append(newNode) - looper.run(checkNodesConnected(txnPoolNodeSet, overrideTimeout=10)) + looper.run(checkNodesConnected(txnPoolNodeSet)) looper.run(newStewardClient.ensureConnectedToNodes()) looper.run(client.ensureConnectedToNodes()) return looper, newNode, client, wallet, newStewardClient, newStewardWallet @@ -48,6 +63,6 @@ def nodeSetWithNodeAddedAfterSomeTxns(txnPoolNodeSet, nodeCreatedAfterSomeTxns): @pytest.fixture("module") def newNodeCaughtUp(txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns): looper, newNode, _, _, _, _ = nodeSetWithNodeAddedAfterSomeTxns - looper.run(eventually(checkNodeLedgersForEquality, newNode, - *txnPoolNodeSet[:4], retryWait=1, timeout=10)) + waitNodeLedgersEquality(looper, newNode, *txnPoolNodeSet[:4]) + return newNode diff --git a/plenum/test/node_catchup/helper.py b/plenum/test/node_catchup/helper.py index cd0048d6e4..d1c162df83 100644 --- a/plenum/test/node_catchup/helper.py +++ b/plenum/test/node_catchup/helper.py @@ -1,11 +1,12 @@ from typing import Iterable -from plenum.common.eventually import eventually -from plenum.common.types import HA +from stp_core.loop.eventually import eventually +from stp_core.types import HA from plenum.test.helper import checkLedgerEquality from plenum.test.test_client import TestClient from plenum.test.test_node import TestNode - +from plenum.test import waits +from plenum.common import util # TODO: This should just take an arbitrary number of nodes and check for their # ledgers to be equal @@ -16,6 +17,24 @@ def checkNodeLedgersForEquality(node: TestNode, checkLedgerEquality(node.poolLedger, n.poolLedger) +def waitNodeLedgersEquality(looper, + referenceNode: TestNode, + *otherNodes: Iterable[TestNode], + customTimeout = None): + """ + Wait for node ledger to become equal + + :param referenceNode: node whose ledger used as a reference + """ + + numOfNodes = len(otherNodes) + 1 + timeout = customTimeout or waits.expectedPoolLedgerCheck(numOfNodes) + looper.run(eventually(checkNodeLedgersForEquality, + referenceNode, + *otherNodes, + retryWait=1, timeout=timeout)) + + def ensureNewNodeConnectedClient(looper, client: TestClient, node: TestNode): stackParams = node.clientStackParams client.nodeReg[stackParams['name']] = HA('127.0.0.1', stackParams['ha'][1]) @@ -28,9 +47,13 @@ def checkClientPoolLedgerSameAsNodes(client: TestClient, checkLedgerEquality(client.ledger, n.poolLedger) -def ensureClientConnectedToNodesAndPoolLedgerSame(looper, client: TestClient, +def ensureClientConnectedToNodesAndPoolLedgerSame(looper, + client: TestClient, *nodes:Iterable[TestNode]): - looper.run(eventually(checkClientPoolLedgerSameAsNodes, client, - *nodes, retryWait=1, - timeout=3*len(nodes))) + fVal = util.getMaxFailures(len(nodes)) + poolCheckTimeout = waits.expectedPoolLedgerCheck(fVal) + looper.run(eventually(checkClientPoolLedgerSameAsNodes, + client, + *nodes, + timeout=poolCheckTimeout)) looper.run(client.ensureConnectedToNodes()) diff --git a/plenum/test/node_catchup/test_catchup_delayed_nodes.py b/plenum/test/node_catchup/test_catchup_delayed_nodes.py index 2365ad057d..aea887e22b 100644 --- a/plenum/test/node_catchup/test_catchup_delayed_nodes.py +++ b/plenum/test/node_catchup/test_catchup_delayed_nodes.py @@ -1,10 +1,12 @@ import pytest -from plenum.common.eventually import eventually -from plenum.common.log import getlogger +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger + +from plenum.test import waits from plenum.test.delayers import cpDelay from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies -from plenum.test.node_catchup.helper import checkNodeLedgersForEquality +from plenum.test.node_catchup.helper import waitNodeLedgersEquality from plenum.test.pool_transactions.helper import addNewStewardAndNode from plenum.test.test_node import checkNodesConnected @@ -34,6 +36,8 @@ def testCatchupDelayedNodes(txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns, nodeYName = "Eta" stewardZName = "testClientStewardZ" nodeZName = "Theta" + delayX = 45 + delayY = 2 stewardX, nodeX = addNewStewardAndNode(looper, client, stewardXName, nodeXName, tdirWithPoolTxns, tconf, @@ -42,14 +46,15 @@ def testCatchupDelayedNodes(txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns, nodeYName, tdirWithPoolTxns, tconf, allPluginsPath, autoStart=False) - nodeX.nodeIbStasher.delay(cpDelay(45)) - nodeY.nodeIbStasher.delay(cpDelay(2)) + nodeX.nodeIbStasher.delay(cpDelay(delayX)) + nodeY.nodeIbStasher.delay(cpDelay(delayY)) looper.add(nodeX) looper.add(nodeY) txnPoolNodeSet.append(nodeX) txnPoolNodeSet.append(nodeY) - looper.run(checkNodesConnected(txnPoolNodeSet, overrideTimeout=60)) + timeout = waits.expectedCatchupTime(len(txnPoolNodeSet)) + delayX + delayY + looper.run(checkNodesConnected(txnPoolNodeSet, customTimeout=timeout)) logger.debug("Stopping 2 newest nodes, {} and {}".format(nodeX.name, nodeY.name)) nodeX.stop() @@ -60,7 +65,5 @@ def testCatchupDelayedNodes(txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns, nodeY.name)) nodeX.start(looper.loop) nodeY.start(looper.loop) - looper.run(eventually(checkNodeLedgersForEquality, nodeX, - *txnPoolNodeSet[:5], retryWait=1, timeout=15)) - looper.run(eventually(checkNodeLedgersForEquality, nodeY, - *txnPoolNodeSet[:5], retryWait=1, timeout=15)) + waitNodeLedgersEquality(looper, nodeX, *txnPoolNodeSet[:5]) + waitNodeLedgersEquality(looper, nodeY, *txnPoolNodeSet[:5]) diff --git a/plenum/test/node_catchup/test_catchup_scenarios.py b/plenum/test/node_catchup/test_catchup_scenarios.py index b2315442b0..f4b36a093b 100644 --- a/plenum/test/node_catchup/test_catchup_scenarios.py +++ b/plenum/test/node_catchup/test_catchup_scenarios.py @@ -1,13 +1,15 @@ import pytest -from plenum.common.eventually import eventually -from plenum.common.log import getlogger +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger from plenum.common.startable import Mode from plenum.test.delayers import crDelay from plenum.test.helper import sendRandomRequests from plenum.test.node_catchup.helper import \ ensureClientConnectedToNodesAndPoolLedgerSame from plenum.test.test_node import checkNodesConnected +from plenum.test import waits + logger = getlogger() @@ -23,14 +25,15 @@ def nodeStashingOrderedRequests(txnPoolNodeSet, nodeCreatedAfterSomeTxns): ensureClientConnectedToNodesAndPoolLedgerSame(looper, client, *txnPoolNodeSet[:-1]) sendRandomRequests(wallet, client, 10) - looper.run(checkNodesConnected(txnPoolNodeSet, overrideTimeout=15)) + looper.run(checkNodesConnected(txnPoolNodeSet)) def stashing(): assert newNode.mode != Mode.participating assert len(newNode.stashedOrderedReqs) > 0 assert len(newNode.reqsFromCatchupReplies) > 0 - looper.run(eventually(stashing, retryWait=1, timeout=20)) + timeout = waits.expectedRequestStashingTime() + looper.run(eventually(stashing, retryWait=1, timeout=timeout)) @pytest.mark.skip(reason="SOV-552. Incomplete") diff --git a/plenum/test/node_catchup/test_catchup_while_new_request_incoming.py b/plenum/test/node_catchup/test_catchup_while_new_request_incoming.py new file mode 100644 index 0000000000..dd440ecfbb --- /dev/null +++ b/plenum/test/node_catchup/test_catchup_while_new_request_incoming.py @@ -0,0 +1,53 @@ +import types + +from plenum.common.types import CatchupReq, f +from plenum.common.util import randomString +from plenum.test.delayers import crDelay +from plenum.test.helper import sendRandomRequests, \ + sendReqsToNodesAndVerifySuffReplies +from plenum.test.node_catchup.helper import checkNodeLedgersForEquality +from plenum.test.pool_transactions.helper import addNewStewardAndNode +from plenum.test.test_node import checkNodesConnected, TestNode +from stp_core.loop.eventually import eventually + + +def testNewNodeCatchupWhileIncomingRequests(looper, txnPoolNodeSet, + tdirWithPoolTxns, tconf, + steward1, stewardWallet, + allPluginsPath): + """ + A new node joins while transactions are happening, its catchup requests + include till where it has to catchup, which would be less than the other + node's ledger size. In the meantime, the new node will stash all requests + """ + + sendReqsToNodesAndVerifySuffReplies(looper, stewardWallet, steward1, 5, 1) + + def chkAfterCall(self, req, frm): + r = self.processCatchupReq(req, frm) + typ = getattr(req, f.LEDGER_TYPE.nm) + if typ == 1: + ledger = self.getLedgerForMsg(req) + assert req.catchupTill < ledger.size + return r + + for node in txnPoolNodeSet: + node.nodeMsgRouter.routes[CatchupReq] = types.MethodType( + chkAfterCall, node.ledgerManager) + node.nodeIbStasher.delay(crDelay(3)) + + print('Sending 10 requests') + sendRandomRequests(stewardWallet, steward1, 5) + looper.runFor(1) + newStewardName = randomString() + newNodeName = "Epsilon" + newStewardClient, newStewardWallet, newNode = addNewStewardAndNode( + looper, steward1, stewardWallet, newStewardName, newNodeName, + tdirWithPoolTxns, tconf, allPluginsPath=allPluginsPath, autoStart=True) + txnPoolNodeSet.append(newNode) + looper.runFor(2) + sendRandomRequests(stewardWallet, steward1, 5) + # TODO select or create a timeout for this case in 'waits' + looper.run(eventually(checkNodeLedgersForEquality, newNode, + *txnPoolNodeSet[:-1], retryWait=1, timeout=80)) + assert newNode.spylog.count(TestNode.processStashedOrderedReqs) > 0 diff --git a/plenum/test/node_catchup/test_discard_view_no.py b/plenum/test/node_catchup/test_discard_view_no.py index 1eec28a010..b34b83859b 100644 --- a/plenum/test/node_catchup/test_discard_view_no.py +++ b/plenum/test/node_catchup/test_discard_view_no.py @@ -3,16 +3,18 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.types import Nomination, PrePrepare from plenum.common.util import randomString from plenum.test.delayers import delayNonPrimaries from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, \ - checkViewNoForNodes, checkDiscardMsg -from plenum.test.node_catchup.helper import checkNodeLedgersForEquality + waitForViewChange, checkDiscardMsg +from plenum.test.node_catchup.helper import waitNodeLedgersEquality from plenum.test.pool_transactions.helper import addNewStewardAndNode from plenum.test.test_node import checkNodesConnected, \ checkProtocolInstanceSetup +from plenum.test import waits + whitelist = ['found legacy entry'] # warnings @@ -34,8 +36,7 @@ def testNodeDiscardMessageFromUnknownView(txnPoolNodeSet, # so master's performance falls and view changes delayNonPrimaries(txnPoolNodeSet, 0, 10) sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 4) - looper.run(eventually(partial(checkViewNoForNodes, txnPoolNodeSet, - viewNo + 1), retryWait=1, timeout=20)) + waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=viewNo+1) newStewardName = "testClientSteward" + randomString(3) nodeName = "Theta" @@ -48,10 +49,9 @@ def testNodeDiscardMessageFromUnknownView(txnPoolNodeSet, txnPoolNodeSet.append(nodeTheta) looper.run(checkNodesConnected(txnPoolNodeSet)) looper.run(client.ensureConnectedToNodes()) - looper.run(eventually(checkNodeLedgersForEquality, nodeTheta, - *txnPoolNodeSet[:-1], retryWait=1, timeout=5)) - checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1, - timeout=10) + + waitNodeLedgersEquality(looper, nodeTheta, *txnPoolNodeSet[:-1]) + checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) electMsg = Nomination(nodeX.name, 0, viewNo) threePMsg = PrePrepare( 0, @@ -66,8 +66,12 @@ def testNodeDiscardMessageFromUnknownView(txnPoolNodeSet, nodeX.send(electMsg, ridTheta) nodeX.send(threePMsg, ridTheta) nodeX.send(electMsg, ridTheta) + + messageTimeout = waits.expectedNodeToNodeMessageDeliveryTime() looper.run(eventually(checkDiscardMsg, [nodeTheta, ], electMsg, - 'un-acceptable viewNo', retryWait=1, timeout=5)) + 'un-acceptable viewNo', + retryWait=1, timeout=messageTimeout)) nodeX.send(threePMsg, ridTheta) looper.run(eventually(checkDiscardMsg, [nodeTheta, ], threePMsg, - 'un-acceptable viewNo', retryWait=1, timeout=5)) + 'un-acceptable viewNo', + retryWait=1, timeout=messageTimeout)) diff --git a/plenum/test/node_catchup/test_new_node_catchup.py b/plenum/test/node_catchup/test_new_node_catchup.py index 4b5473f4b6..b0f28fc703 100644 --- a/plenum/test/node_catchup/test_new_node_catchup.py +++ b/plenum/test/node_catchup/test_new_node_catchup.py @@ -1,18 +1,23 @@ import pytest -from plenum.common.eventually import eventually -from plenum.common.log import getlogger +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies -from plenum.test.node_catchup.helper import checkNodeLedgersForEquality +from plenum.test.node_catchup.helper import waitNodeLedgersEquality from plenum.test.pool_transactions.helper import ensureNodeDisconnectedFromPool from plenum.test.test_ledger_manager import TestLedgerManager -from plenum.test.test_node import checkNodesConnected +from plenum.test.test_node import checkNodesConnected, ensureElectionsDone, \ + TestNode +from plenum.test import waits -logger = getlogger() +# Do not remove the next import +from plenum.test.node_catchup.conftest import whitelist +logger = getlogger() txnCount = 5 +@pytest.mark.skip(reason="SOV-939") def testNewNodeCatchup(newNodeCaughtUp): """ A new node that joins after some transactions should eventually get @@ -24,6 +29,7 @@ def testNewNodeCatchup(newNodeCaughtUp): pass +@pytest.mark.skip(reason="SOV-939") def testPoolLegerCatchupBeforeDomainLedgerCatchup(txnPoolNodeSet, newNodeCaughtUp): """ @@ -66,18 +72,20 @@ def testDelayedLedgerStatusNotChangingState(): # but its weird since prepares and commits are received which are sent before # and after prepares, respectively. Here is the pivotal link # https://www.pivotaltracker.com/story/show/127897273 +@pytest.mark.skip(reason='fails, SOV-928, SOV-939') def testNodeCatchupAfterRestart(newNodeCaughtUp, txnPoolNodeSet, - nodeSetWithNodeAddedAfterSomeTxns): + nodeSetWithNodeAddedAfterSomeTxns, + tdirWithPoolTxns, tconf, allPluginsPath): """ A node that restarts after some transactions should eventually get the transactions which happened while it was down :return: """ - looper, newNode, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns logger.debug("Stopping node {} with pool ledger size {}". format(newNode, newNode.poolManager.txnSeqNo)) ensureNodeDisconnectedFromPool(looper, txnPoolNodeSet, newNode) + looper.removeProdable(newNode) # for n in txnPoolNodeSet[:4]: # for r in n.nodestack.remotes.values(): # if r.name == newNode.name: @@ -87,13 +95,20 @@ def testNodeCatchupAfterRestart(newNodeCaughtUp, txnPoolNodeSet, # TODO: Check if the node has really stopped processing requests? logger.debug("Sending requests") sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5) - logger.debug("Starting the stopped node, {}".format(newNode)) - newNode.start(looper.loop) - looper.run(checkNodesConnected(txnPoolNodeSet)) - looper.run(eventually(checkNodeLedgersForEquality, newNode, - *txnPoolNodeSet[:4], retryWait=1, timeout=15)) + restartedNewNode = TestNode(newNode.name, + basedirpath=tdirWithPoolTxns, + config=tconf, + ha=newNode.nodestack.ha, + cliha=newNode.clientstack.ha, + pluginPaths=allPluginsPath) + logger.debug("Starting the stopped node, {}".format(restartedNewNode)) + looper.add(restartedNewNode) + looper.run(checkNodesConnected(txnPoolNodeSet[:4] + [restartedNewNode])) + waitNodeLedgersEquality(looper, restartedNewNode, *txnPoolNodeSet[:4]) + restartedNewNode.stop() +@pytest.mark.skip(reason='fails, SOV-928, SOV-939') def testNodeDoesNotParticipateUntilCaughtUp(txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns): """ diff --git a/plenum/test/node_catchup/test_node_reject_invalid_txn_during_catchup.py b/plenum/test/node_catchup/test_node_reject_invalid_txn_during_catchup.py index 8d7e014857..61c1ababda 100644 --- a/plenum/test/node_catchup/test_node_reject_invalid_txn_during_catchup.py +++ b/plenum/test/node_catchup/test_node_reject_invalid_txn_during_catchup.py @@ -3,18 +3,25 @@ import pytest -from plenum.common.eventually import eventually -from plenum.common.log import getlogger +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger from plenum.common.constants import TXN_TYPE from plenum.common.types import CatchupReq, f, CatchupRep from plenum.test.helper import sendRandomRequests -from plenum.test.node_catchup.helper import checkNodeLedgersForEquality +from plenum.test.node_catchup.helper import waitNodeLedgersEquality from plenum.test.test_node import checkNodesConnected +from plenum.test import waits + +# Do not remove the next import +from plenum.test.node_catchup.conftest import whitelist logger = getlogger() -@pytest.mark.skipif('sys.platform == "win32"', reason='SOV-331') +txnCount = 10 + + +@pytest.mark.skip(reason='fails, https://evernym.atlassian.net/browse/SOV-928') def testNodeRejectingInvalidTxns(txnPoolNodeSet, nodeCreatedAfterSomeTxns): """ A newly joined node is catching up and sends catchup requests to other @@ -45,19 +52,21 @@ def sendIncorrectTxns(self, req, frm): if txns[seqNo].get(TXN_TYPE) == "buy": txns[seqNo][TXN_TYPE] = "randomtype" consProof = [b64encode(p).decode() for p in - ledger.tree.consistency_proof(end, ledger.size)] + ledger.tree.consistency_proof(end, ledger.size)] self.sendTo(msg=CatchupRep(getattr(req, f.LEDGER_TYPE.nm), txns, consProof), to=frm) else: self.processCatchupReq(req, frm) - # One of the node does not process catchup request. + # One of the node sends incorrect txns in catchup reply. txnPoolNodeSet[0].nodeMsgRouter.routes[CatchupReq] = types.MethodType( sendIncorrectTxns, txnPoolNodeSet[0].ledgerManager) + logger.debug( + 'Catchup request processor of {} patched'.format(txnPoolNodeSet[0])) sendRandomRequests(wallet, client, 10) - looper.run(checkNodesConnected(txnPoolNodeSet, overrideTimeout=60)) - looper.run(eventually(checkNodeLedgersForEquality, newNode, - *txnPoolNodeSet[:-1], retryWait=1, timeout=45)) + looper.run(checkNodesConnected(txnPoolNodeSet)) + + waitNodeLedgersEquality(looper, newNode, *txnPoolNodeSet[:-1]) assert newNode.isNodeBlacklisted(txnPoolNodeSet[0].name) diff --git a/plenum/test/node_catchup/test_node_request_consistency_proof.py b/plenum/test/node_catchup/test_node_request_consistency_proof.py index 3f7427b4f1..b77904709d 100644 --- a/plenum/test/node_catchup/test_node_request_consistency_proof.py +++ b/plenum/test/node_catchup/test_node_request_consistency_proof.py @@ -4,15 +4,23 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger from plenum.common.types import LedgerStatus from plenum.test.helper import sendRandomRequests -from plenum.test.node_catchup.helper import checkNodeLedgersForEquality +from plenum.test.node_catchup.helper import waitNodeLedgersEquality from plenum.test.test_ledger_manager import TestLedgerManager from plenum.test.test_node import checkNodesConnected +from plenum.test import waits +# Do not remove the next import +from plenum.test.node_catchup.conftest import whitelist -@pytest.mark.skipif('sys.platform == "win32"', reason='SOV-465') + +logger = getlogger() + + +@pytest.mark.skip(reason='fails, https://evernym.atlassian.net/browse/SOV-928') def testNodeRequestingConsProof(txnPoolNodeSet, nodeCreatedAfterSomeTxns): """ All of the 4 old nodes delay the processing of LEDGER_STATUS from the newly @@ -29,7 +37,7 @@ def testNodeRequestingConsProof(txnPoolNodeSet, nodeCreatedAfterSomeTxns): node.sendPoolInfoToClients = types.MethodType(lambda x, y: None, node) txnPoolNodeSet.append(newNode) - # The new node does not sends different ledger statuses to every node so it + # The new node sends different ledger statuses to every node so it # does not get enough similar consistency proofs sentSizes = set() @@ -50,15 +58,17 @@ def sendDLStatus(self, name): sentSizes.add(newSize) newNode.sendDomainLedgerStatus = types.MethodType(sendDLStatus, newNode) + logger.debug( + 'Domain Ledger status sender of {} patched'.format(newNode)) - print("sending 10 requests") sendRandomRequests(wallet, client, 10) - looper.run(checkNodesConnected(txnPoolNodeSet, overrideTimeout=60)) + looper.run(checkNodesConnected(txnPoolNodeSet)) + + # wait more than `ConsistencyProofsTimeout` + # TODO: apply configurable timeout here + + waitNodeLedgersEquality(looper, newNode, *txnPoolNodeSet[:-1]) - # `ConsistencyProofsTimeout` is set to 60 sec, so need to wait more than - # 60 sec. - looper.run(eventually(checkNodeLedgersForEquality, newNode, - *txnPoolNodeSet[:-1], retryWait=1, timeout=75)) for node in txnPoolNodeSet[:-1]: assert node.ledgerManager.spylog.count( TestLedgerManager.processConsistencyProofReq.__name__) > 0 diff --git a/plenum/test/node_catchup/test_node_request_missing_transactions.py b/plenum/test/node_catchup/test_node_request_missing_transactions.py index 2479e22e07..5769a01e69 100644 --- a/plenum/test/node_catchup/test_node_request_missing_transactions.py +++ b/plenum/test/node_catchup/test_node_request_missing_transactions.py @@ -1,15 +1,23 @@ import types -from plenum.common.eventually import eventually -from plenum.common.log import getlogger +import pytest + +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger from plenum.common.types import CatchupReq from plenum.test.helper import sendRandomRequests -from plenum.test.node_catchup.helper import checkNodeLedgersForEquality +from plenum.test.node_catchup.helper import waitNodeLedgersEquality from plenum.test.test_node import checkNodesConnected +from plenum.test import waits + +# Do not remove the next import +from plenum.test.node_catchup.conftest import whitelist + logger = getlogger() +@pytest.mark.skip(reason='fails, https://evernym.atlassian.net/browse/SOV-928') def testNodeRequestingTxns(txnPoolNodeSet, nodeCreatedAfterSomeTxns): """ A newly joined node is catching up and sends catchup requests to other @@ -33,6 +41,6 @@ def ignoreCatchupReq(self, req, frm): txnPoolNodeSet[0].nodeMsgRouter.routes[CatchupReq] = types.MethodType( ignoreCatchupReq, txnPoolNodeSet[0].ledgerManager) sendRandomRequests(wallet, client, 10) - looper.run(checkNodesConnected(txnPoolNodeSet, overrideTimeout=60)) - looper.run(eventually(checkNodeLedgersForEquality, newNode, - *txnPoolNodeSet[:-1], retryWait=1, timeout=90)) + looper.run(checkNodesConnected(txnPoolNodeSet)) + + waitNodeLedgersEquality(looper, newNode, *txnPoolNodeSet[:-1]) diff --git a/plenum/test/node_request/node_request_helper.py b/plenum/test/node_request/node_request_helper.py index deffb5c54e..595e753200 100644 --- a/plenum/test/node_request/node_request_helper.py +++ b/plenum/test/node_request/node_request_helper.py @@ -1,15 +1,15 @@ import time from functools import partial -from plenum.common.eventually import eventuallyAll +from stp_core.loop.eventually import eventuallyAll from plenum.common.types import PrePrepare, OPERATION, f from plenum.common.util import getMaxFailures from plenum.server.node import Node from plenum.server.replica import Replica -from plenum.test.helper import getPrimaryReplica +from plenum.test import waits from plenum.test.spy_helpers import getAllArgs from plenum.test.test_node import TestNode, getNonPrimaryReplicas, \ - getAllReplicas + getAllReplicas, getPrimaryReplica def checkPropagated(looper, nodeSet, request, faultyNodes=0): @@ -38,9 +38,10 @@ def g(node: TestNode): numOfMsgsWithZFN, numOfMsgsWithFaults) + timeout = waits.expectedPropagateTime(len(nodeSet)) coros = [partial(g, node) for node in nodeSet] looper.run(eventuallyAll(*coros, - totalTimeout=10, + totalTimeout=timeout, acceptableFails=faultyNodes)) @@ -48,7 +49,8 @@ def checkPrePrepared(looper, nodeSet, propagated1, instIds, - faultyNodes=0): + faultyNodes=0, + timeout=30): nodesSize = len(list(nodeSet)) def g(instId): @@ -80,7 +82,8 @@ def nonPrimarySeesCorrectNumberOfPREPREPAREs(): propagated1.identifier, propagated1.reqId, propagated1.digest, - time.time()) + time.time() + ) passes = 0 for npr in nonPrimaryReplicas: @@ -161,10 +164,12 @@ def nonPrimaryReceivesCorrectNumberOfPREPREPAREs(): nonPrimaryReceivesCorrectNumberOfPREPREPAREs() coros = [partial(g, instId) for instId in instIds] - looper.run(eventuallyAll(*coros, retryWait=1, totalTimeout=30)) + # TODO Select or create the timeout from 'waits'. Don't use constant. + looper.run(eventuallyAll(*coros, retryWait=1, totalTimeout=timeout)) -def checkPrepared(looper, nodeSet, preprepared1, instIds, faultyNodes=0): +def checkPrepared(looper, nodeSet, preprepared1, instIds, faultyNodes=0, + timeout=30): nodeCount = len(list(nodeSet.nodes)) f = getMaxFailures(nodeCount) @@ -267,10 +272,12 @@ def nonPrimaryReplicasReceiveCorrectNumberOfPREPAREs(): nonPrimaryReplicasReceiveCorrectNumberOfPREPAREs() coros = [partial(g, instId) for instId in instIds] - looper.run(eventuallyAll(*coros, retryWait=1, totalTimeout=30)) + # TODO Select or create the timeout from 'waits'. Don't use constant. + looper.run(eventuallyAll(*coros, retryWait=1, totalTimeout=timeout)) -def checkCommited(looper, nodeSet, prepared1, instIds, faultyNodes=0): +def checkCommitted(looper, nodeSet, prepared1, instIds, faultyNodes=0, + timeout=60): nodeCount = len((list(nodeSet))) f = getMaxFailures(nodeCount) @@ -334,7 +341,8 @@ def replicasReceivesCorrectNumberOfCOMMITs(): replicasSeesCorrectNumOfCOMMITs() coros = [partial(g, instId) for instId in instIds] - looper.run(eventuallyAll(*coros, retryWait=1, totalTimeout=60)) + # TODO Select or create the timeout from 'waits'. Don't use constant. + looper.run(eventuallyAll(*coros, retryWait=1, totalTimeout=timeout)) def msgCountOK(nodesSize, diff --git a/plenum/test/node_request/test_commit/test_num_of_commit_with_f_plus_one_faults.py b/plenum/test/node_request/test_commit/test_num_of_commit_with_f_plus_one_faults.py index 994e33d373..d2f6059a62 100644 --- a/plenum/test/node_request/test_commit/test_num_of_commit_with_f_plus_one_faults.py +++ b/plenum/test/node_request/test_commit/test_num_of_commit_with_f_plus_one_faults.py @@ -3,7 +3,7 @@ import pytest from plenum.common.util import getNoInstances, adict -from plenum.test.node_request.node_request_helper import checkCommited +from plenum.test.node_request.node_request_helper import checkCommitted from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ delaysPrePrepareProcessing, \ changesRequest @@ -38,8 +38,8 @@ def afterElection(setup, up): def testNumOfCommitMsgsWithFPlusOneFaults(afterElection, looper, nodeSet, prepared1, noRetryReq): with pytest.raises(AssertionError): - checkCommited(looper, - nodeSet, - prepared1, - range(getNoInstances(len(nodeSet))), - faultyNodes) + checkCommitted(looper, + nodeSet, + prepared1, + range(getNoInstances(len(nodeSet))), + faultyNodes) diff --git a/plenum/test/node_request/test_order/test_ordering_when_pre_prepare_not_received.py b/plenum/test/node_request/test_order/test_ordering_when_pre_prepare_not_received.py index faf228627e..7f76575146 100644 --- a/plenum/test/node_request/test_order/test_ordering_when_pre_prepare_not_received.py +++ b/plenum/test/node_request/test_order/test_ordering_when_pre_prepare_not_received.py @@ -1,6 +1,8 @@ import types -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually + +from plenum.test import waits from plenum.test.delayers import ppDelay from plenum.test.helper import sendRandomRequest from plenum.test.test_node import getNonPrimaryReplicas @@ -13,11 +15,11 @@ def testOrderingWhenPrePrepareNotReceived(looper, nodeSet, up, client1, and commits are received, now the request should not be ordered until pre-prepare is received and ordering should just happen once, """ + delay = 10 nonPrimReps = getNonPrimaryReplicas(nodeSet, 0) slowRep = nonPrimReps[0] slowNode = slowRep.node - slowNode.nodeIbStasher.delay(ppDelay(10, 0)) - sendRandomRequest(wallet1, client1) + slowNode.nodeIbStasher.delay(ppDelay(delay, 0)) stash = [] origMethod = slowRep.processReqDigest @@ -31,7 +33,9 @@ def patched(self, msg): def chk1(): assert len(slowRep.commitsWaitingForPrepare) > 0 - looper.run(eventually(chk1, timeout=4)) + sendRandomRequest(wallet1, client1) + timeout = waits.expectedPrePrepareTime(len(nodeSet)) + delay + looper.run(eventually(chk1, timeout=timeout)) for item in stash: origMethod(item) @@ -40,5 +44,6 @@ def chk2(): assert len(slowRep.commitsWaitingForPrepare) == 0 assert slowRep.spylog.count(slowRep.doOrder.__name__) == 1 - looper.run(eventually(chk2, timeout=12)) + timeout = waits.expectedOrderingTime(len(nonPrimReps) + 1) + delay + looper.run(eventually(chk2, timeout=timeout)) diff --git a/plenum/test/node_request/test_order/test_request_ordering_1.py b/plenum/test/node_request/test_order/test_request_ordering_1.py index 83151de006..6a735bb5b2 100644 --- a/plenum/test/node_request/test_order/test_request_ordering_1.py +++ b/plenum/test/node_request/test_order/test_request_ordering_1.py @@ -1,6 +1,6 @@ import types -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.request import ReqDigest from plenum.test.helper import sendRandomRequest from plenum.test.malicious_behaviors_node import delaysPrePrepareProcessing @@ -19,8 +19,9 @@ def testOrderingCase1(looper, nodeSet, up, client1, wallet1): forwarded request to replica and delay reception of PRE-PREPARE sufficiently so that enough COMMITs reach to trigger ordering. """ + delay = 10 replica = getNonPrimaryReplicas(nodeSet, instId=0)[0] - delaysPrePrepareProcessing(replica.node, delay=10, instId=0) + delaysPrePrepareProcessing(replica.node, delay=delay, instId=0) def doNotProcessReqDigest(self, rd: ReqDigest): pass @@ -32,5 +33,7 @@ def chk(n): assert replica.spylog.count(replica.doOrder.__name__) == n sendRandomRequest(wallet1, client1) - looper.run(eventually(chk, 0, retryWait=1, timeout=5)) - looper.run(eventually(chk, 1, retryWait=1, timeout=15)) + timeout = delay - 5 + looper.run(eventually(chk, 0, retryWait=1, timeout=timeout)) + timeout = delay + 5 + looper.run(eventually(chk, 1, retryWait=1, timeout=timeout)) diff --git a/plenum/test/node_request/test_order/test_request_ordering_2.py b/plenum/test/node_request/test_order/test_request_ordering_2.py index ded72ec840..1115014608 100644 --- a/plenum/test/node_request/test_order/test_request_ordering_2.py +++ b/plenum/test/node_request/test_order/test_request_ordering_2.py @@ -1,10 +1,9 @@ -from plenum.common.eventually import eventually -from plenum.common.log import getlogger +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger from plenum.common.types import Commit, PrePrepare from plenum.test.helper import sendRandomRequests, \ - checkSufficientRepliesForRequests, checkLedgerEquality, getPrimaryReplica, \ - checkAllLedgersEqual -from plenum.test.test_node import getNonPrimaryReplicas + waitForSufficientRepliesForRequests, checkLedgerEquality, checkAllLedgersEqual +from plenum.test.test_node import getNonPrimaryReplicas, getPrimaryReplica nodeCount = 7 @@ -44,7 +43,6 @@ def testOrderingCase2(looper, nodeSet, up, client1, wallet1): delayedPpSeqNos = set() requestCount = 15 - requests = sendRandomRequests(wallet1, client1, requestCount) def specificCommits(wrappedMsg): nonlocal node3, node4, node5 @@ -63,14 +61,18 @@ def specificCommits(wrappedMsg): logger.debug('{} would be delaying commits'.format(node)) node.nodeIbStasher.delay(specificCommits) - checkSufficientRepliesForRequests(looper, client1, requests) + requests = sendRandomRequests(wallet1, client1, requestCount) + waitForSufficientRepliesForRequests(looper, client1, requests=requests) def ensureSlowNodesHaveAllTxns(): nonlocal node1, node2 for node in node1, node2: assert len(node.domainLedger) == requestCount - looper.run(eventually(ensureSlowNodesHaveAllTxns, retryWait=1, timeout=15)) + from plenum.test import waits + timeout = waits.expectedCatchupTime(len(nodeSet)) + looper.run(eventually(ensureSlowNodesHaveAllTxns, + retryWait=1, timeout=timeout)) checkAllLedgersEqual((n.domainLedger for n in (node0, node3, node4, node5, node6))) diff --git a/plenum/test/node_request/test_pre_prepare/test_non_primary_sends_a_pre_prepare.py b/plenum/test/node_request/test_pre_prepare/test_non_primary_sends_a_pre_prepare.py index 88a32f5f93..d25a7598e4 100644 --- a/plenum/test/node_request/test_pre_prepare/test_non_primary_sends_a_pre_prepare.py +++ b/plenum/test/node_request/test_pre_prepare/test_non_primary_sends_a_pre_prepare.py @@ -3,14 +3,15 @@ import pytest as pytest -from plenum.common.eventually import eventually -from plenum.common.log import getlogger +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger from plenum.common.request import ReqDigest from plenum.common.types import PrePrepare from plenum.server.suspicion_codes import Suspicions -from plenum.test.helper import getPrimaryReplica, getNodeSuspicions +from plenum.test.helper import getNodeSuspicions +from plenum.test import waits from plenum.test.instances.helper import recvdPrePrepare -from plenum.test.test_node import getNonPrimaryReplicas +from plenum.test.test_node import getNonPrimaryReplicas, getPrimaryReplica logger = getlogger() @@ -62,8 +63,9 @@ def chk(): r.node, Suspicions.PPR_FRM_NON_PRIMARY.code)) assert nodeSuspicions == 1 + timeout = waits.expectedClientRequestPropagationTime(len(nodeSet)) looper.run(eventually(chk, - retryWait=.5, timeout=5)) + retryWait=.5, timeout=timeout)) # TODO Why is this here? Why would a suspicious PRE-PREPARE from a # non-primary warrant a view change? Need more of a story about the scenario diff --git a/plenum/test/node_request/test_pre_prepare/test_num_of_pre_prepare_with_f_plus_one_faults.py b/plenum/test/node_request/test_pre_prepare/test_num_of_pre_prepare_with_f_plus_one_faults.py index c97ee737fe..44565233ca 100644 --- a/plenum/test/node_request/test_pre_prepare/test_num_of_pre_prepare_with_f_plus_one_faults.py +++ b/plenum/test/node_request/test_pre_prepare/test_num_of_pre_prepare_with_f_plus_one_faults.py @@ -2,11 +2,14 @@ from functools import partial import pytest -from plenum.test.malicious_behaviors_node import makeNodeFaulty, delaysPrePrepareProcessing, \ - changesRequest -from plenum.common.util import adict -from plenum.common.log import getlogger +from plenum.test import waits +from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ + delaysPrePrepareProcessing, changesRequest +from plenum.common.util import adict, getNoInstances +from stp_core.common.log import getlogger + +from plenum.test.node_request.node_request_helper import checkPrePrepared from plenum.test.test_node import TestNodeSet nodeCount = 7 @@ -18,6 +21,8 @@ logger = getlogger() +delayPrePrepareSec = 60 + @pytest.fixture(scope="module") def setup(startedNodes): @@ -27,7 +32,7 @@ def setup(startedNodes): for node in A, B, G: makeNodeFaulty(node, changesRequest, - partial(delaysPrePrepareProcessing, delay=60)) + partial(delaysPrePrepareProcessing, delay=delayPrePrepareSec)) node.delaySelfNomination(10) return adict(faulties=(A, B, G)) @@ -39,10 +44,23 @@ def afterElection(setup, up): assert not r.isPrimary -def testNumOfPrePrepareWithFPlusOneFaults(afterElection, +@pytest.fixture(scope="module") +def preprepared1WithDelay(looper, nodeSet, propagated1, faultyNodes): + timeouts = waits.expectedPrePrepareTime(len(nodeSet)) + delayPrePrepareSec + checkPrePrepared(looper, + nodeSet, + propagated1, + range(getNoInstances(len(nodeSet))), + faultyNodes, + timeout=timeouts) + + +@pytest.mark.skip(reason='SOV-944') +def testNumOfPrePrepareWithFPlusOneFaults( + afterElection, noRetryReq, - preprepared1, - nodeSet): + nodeSet, + preprepared1WithDelay): for n in nodeSet: for r in n.replicas: if r.isPrimary: diff --git a/plenum/test/node_request/test_pre_prepare/test_primary_sends_preprepare_of_high_num.py b/plenum/test/node_request/test_pre_prepare/test_primary_sends_preprepare_of_high_num.py index 5036cae25e..a132f30501 100644 --- a/plenum/test/node_request/test_pre_prepare/test_primary_sends_preprepare_of_high_num.py +++ b/plenum/test/node_request/test_pre_prepare/test_primary_sends_preprepare_of_high_num.py @@ -2,12 +2,14 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.request import ReqDigest from plenum.common.types import PrePrepare from plenum.server.replica import TPCStat from plenum.server.suspicion_codes import Suspicions -from plenum.test.helper import getPrimaryReplica, getNodeSuspicions +from plenum.test.helper import getNodeSuspicions +from plenum.test.test_node import getNonPrimaryReplicas, getPrimaryReplica +from plenum.test import waits from plenum.test.test_node import getNonPrimaryReplicas instId = 0 @@ -30,11 +32,12 @@ def checkPreprepare(replica, viewNo, ppSeqNo, req, numOfPrePrepares): nonPrimaryReplicas = getNonPrimaryReplicas(nodeSet, instId) req = propagated1.reqDigest primary.doPrePrepare(req) + timeout = waits.expectedPrePrepareTime(len(nodeSet)) for np in nonPrimaryReplicas: looper.run( eventually(checkPreprepare, np, primary.viewNo, primary.lastPrePrepareSeqNo - 1, req, 1, - retryWait=.5, timeout=10)) + retryWait=.5, timeout=timeout)) newReqDigest = ReqDigest(req.identifier, req.reqId + 1, req.digest) incorrectPrePrepareReq = PrePrepare(instId, @@ -42,5 +45,7 @@ def checkPreprepare(replica, viewNo, ppSeqNo, req, numOfPrePrepares): primary.lastPrePrepareSeqNo + 2, *newReqDigest, time.time()) - primary.send(incorrectPrePrepareReq,TPCStat.PrePrepareSent) - looper.run(eventually(chk, retryWait=1, timeout=50)) + primary.send(incorrectPrePrepareReq, TPCStat.PrePrepareSent) + + timeout = waits.expectedPrePrepareTime(len(nodeSet)) + looper.run(eventually(chk, retryWait=1, timeout=timeout)) diff --git a/plenum/test/node_request/test_prepare/test_num_of_prepare_with_f_plus_one_faults.py b/plenum/test/node_request/test_prepare/test_num_of_prepare_with_f_plus_one_faults.py index 59224b4bc4..d98a1f535a 100644 --- a/plenum/test/node_request/test_prepare/test_num_of_prepare_with_f_plus_one_faults.py +++ b/plenum/test/node_request/test_prepare/test_num_of_prepare_with_f_plus_one_faults.py @@ -1,17 +1,20 @@ from functools import partial import pytest -from plenum.common.util import adict +from plenum.common.util import adict, getNoInstances +from plenum.test import waits from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ delaysPrePrepareProcessing, \ changesRequest +from plenum.test.node_request.node_request_helper import checkPrePrepared nodeCount = 7 # f + 1 faults, i.e, num of faults greater than system can tolerate faultyNodes = 3 whitelist = ['InvalidSignature', 'cannot process incoming PREPARE'] +delayPrePrepareSec = 60 @pytest.fixture(scope="module") @@ -22,7 +25,7 @@ def setup(startedNodes): for node in A, B, G: makeNodeFaulty(node, changesRequest, - partial(delaysPrePrepareProcessing, delay=60)) + partial(delaysPrePrepareProcessing, delay=delayPrePrepareSec)) node.delaySelfNomination(10) return adict(faulties=(A, B, G)) @@ -34,5 +37,17 @@ def afterElection(setup, up): assert not r.isPrimary -def testNumOfPrepareWithFPlusOneFaults(afterElection, noRetryReq, prepared1): +@pytest.fixture(scope="module") +def preprepared1WithDelay(looper, nodeSet, propagated1, faultyNodes): + timeouts = waits.expectedPrePrepareTime(len(nodeSet)) + delayPrePrepareSec + checkPrePrepared(looper, + nodeSet, + propagated1, + range(getNoInstances(len(nodeSet))), + faultyNodes, + timeout=timeouts) + + +@pytest.mark.skip(reason='SOV-944') +def testNumOfPrepareWithFPlusOneFaults(afterElection, noRetryReq, preprepared1WithDelay): pass diff --git a/plenum/test/node_request/test_prepare/test_num_of_sufficient_prepare.py b/plenum/test/node_request/test_prepare/test_num_of_sufficient_prepare.py index cbb30d8fc5..a87ab85b42 100644 --- a/plenum/test/node_request/test_prepare/test_num_of_sufficient_prepare.py +++ b/plenum/test/node_request/test_prepare/test_num_of_sufficient_prepare.py @@ -5,7 +5,7 @@ from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ delaysPrePrepareProcessing from plenum.common.util import adict -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.test.test_node import TestNodeSet diff --git a/plenum/test/plugin/auction_req_processor/plugin_auction_req_processor.py b/plenum/test/plugin/auction_req_processor/plugin_auction_req_processor.py index dade6086fb..963b6cf7df 100644 --- a/plenum/test/plugin/auction_req_processor/plugin_auction_req_processor.py +++ b/plenum/test/plugin/auction_req_processor/plugin_auction_req_processor.py @@ -4,7 +4,7 @@ from plenum.cli.constants import getPipedRegEx from plenum.common.constants import TXN_TYPE, TARGET_NYM, DATA from plenum.common.types import PLUGIN_TYPE_PROCESSING -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.test.plugin.has_cli_commands import HasCliCommands logger = getlogger() diff --git a/plenum/test/plugin/bank_req_processor/plugin_bank_req_processor.py b/plenum/test/plugin/bank_req_processor/plugin_bank_req_processor.py index 8af5d65e6d..6641597091 100644 --- a/plenum/test/plugin/bank_req_processor/plugin_bank_req_processor.py +++ b/plenum/test/plugin/bank_req_processor/plugin_bank_req_processor.py @@ -6,7 +6,7 @@ from plenum.common.constants import TXN_TYPE, TARGET_NYM, DATA from plenum.common.types import PLUGIN_TYPE_PROCESSING -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.test.plugin.has_cli_commands import HasCliCommands logger = getlogger() diff --git a/plenum/test/plugin/test_auction_req_processor_plugin.py b/plenum/test/plugin/test_auction_req_processor_plugin.py index f99ed85414..3b104d38f7 100644 --- a/plenum/test/plugin/test_auction_req_processor_plugin.py +++ b/plenum/test/plugin/test_auction_req_processor_plugin.py @@ -2,9 +2,9 @@ import pytest -from plenum.common.eventually import eventually from plenum.common.constants import TXN_TYPE, DATA, TARGET_NYM -from plenum.test.helper import setupClients, checkSufficientRepliesRecvd +from plenum.test.helper import setupClients, \ + waitForSufficientRepliesForRequests from plenum.test.plugin.auction_req_processor.plugin_auction_req_processor import \ AUCTION_START, ID, AUCTION_END, GET_BAL, BALANCE, PLACE_BID, AMOUNT from plenum.test.plugin.conftest import AUCTION_REQ_VALIDATION_PLUGIN_PATH_VALUE, \ @@ -61,9 +61,8 @@ def auction(self, aucId, start=True): ID: aucId } }) - self.looper.run(eventually(checkSufficientRepliesRecvd, - self.client.inBox, req.reqId, - 1, retryWait=1, timeout=10)) + waitForSufficientRepliesForRequests(self.looper, self.client, + requests=[req], fVal=1) return req def getBalance(self) -> int: @@ -71,9 +70,8 @@ def getBalance(self) -> int: TXN_TYPE: GET_BAL, TARGET_NYM: self.wallet.defaultId }) - self.looper.run(eventually(checkSufficientRepliesRecvd, - self.client.inBox, req.reqId, - 1, retryWait=1, timeout=10)) + waitForSufficientRepliesForRequests(self.looper, self.client, + requests=[req], fVal=1) return self.client.hasConsensus(*req.key)[BALANCE] def bid(self, aucId, amount): @@ -84,9 +82,8 @@ def bid(self, aucId, amount): AMOUNT: amount } }) - self.looper.run(eventually(checkSufficientRepliesRecvd, - self.client.inBox, req.reqId, - 1, retryWait=1, timeout=10)) + waitForSufficientRepliesForRequests(self.looper, self.client, + requests=[req], fVal=1) return req diff --git a/plenum/test/plugin/test_auction_req_validation_plugin.py b/plenum/test/plugin/test_auction_req_validation_plugin.py index 4f56c62c22..b125d1dd42 100644 --- a/plenum/test/plugin/test_auction_req_validation_plugin.py +++ b/plenum/test/plugin/test_auction_req_validation_plugin.py @@ -3,11 +3,12 @@ import pytest -from plenum.common.eventually import eventuallyAll +from stp_core.loop.eventually import eventuallyAll from plenum.common.constants import TXN_TYPE, DATA from plenum.common.types import PLUGIN_TYPE_VERIFICATION from plenum.server.node import Node from plenum.server.plugin_loader import PluginLoader +from plenum.test import waits from plenum.test.helper import checkReqNack from plenum.test.plugin.auction_req_validation.plugin_auction_req_validation \ import AMOUNT, PLACE_BID, AUCTION_START, ID, AUCTION_END @@ -154,8 +155,8 @@ def testAuctionReqValidationPlugin(looper, nodeSet, wallet1, client1, tdir, allCoros += [partial(checkReqNack, client1, node, req.identifier, req.reqId, update) for node in nodeSet] - - looper.run(eventuallyAll(*allCoros, totalTimeout=5)) + timeout = waits.expectedReqAckQuorumTime() + looper.run(eventuallyAll(*allCoros, totalTimeout=timeout)) for n in nodeSet: # type: Node opVerifier, = n.opVerifiers diff --git a/plenum/test/plugin/test_bank_req_processor_plugin.py b/plenum/test/plugin/test_bank_req_processor_plugin.py index 67febb5e70..7649ab3b69 100644 --- a/plenum/test/plugin/test_bank_req_processor_plugin.py +++ b/plenum/test/plugin/test_bank_req_processor_plugin.py @@ -1,10 +1,11 @@ import pytest -from plenum.common.eventually import eventually -from plenum.common.log import getlogger +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger from plenum.common.constants import TARGET_NYM, TXN_TYPE, DATA -from plenum.test.helper import checkSufficientRepliesRecvd, \ - checkReqNack, setupClients +from plenum.test import waits +from plenum.test.helper import waitForSufficientRepliesForRequests, \ + setupClients, checkReqNack from plenum.test.plugin.bank_req_processor.plugin_bank_req_processor import \ BALANCE, ALL_TXNS from plenum.test.plugin.bank_req_validation.plugin_bank_req_validation import \ @@ -69,14 +70,14 @@ def sendMoney(self, to: str, amount: int, nodes, expected: bool = True): AMOUNT: amount }}) if expected: - self.looper.run(eventually(checkSufficientRepliesRecvd, - self.client.inBox, req.reqId, 1, - retryWait=1, timeout=5)) + waitForSufficientRepliesForRequests(self.looper, self.client, + requests=[req], fVal=1) else: + timeout = waits.expectedReqNAckQuorumTime() for node in nodes: self.looper.run(eventually(checkReqNack, self.client, node, req.identifier, req.reqId, None, - retryWait=1, timeout=5)) + retryWait=1, timeout=timeout)) return req def getBalance(self) -> int: @@ -84,9 +85,9 @@ def getBalance(self) -> int: TXN_TYPE: GET_BAL, TARGET_NYM: self.wallet.defaultId }) - self.looper.run(eventually(checkSufficientRepliesRecvd, - self.client.inBox, req.reqId, - 1, retryWait=1, timeout=10)) + waitForSufficientRepliesForRequests(self.looper, self.client, + requests=[req], fVal=1) + return self.client.hasConsensus(*req.key)[BALANCE] def checkTxns(self): @@ -94,9 +95,9 @@ def checkTxns(self): TXN_TYPE: GET_ALL_TXNS, TARGET_NYM: self.wallet.defaultId }) - self.looper.run( - eventually(checkSufficientRepliesRecvd, self.client.inBox, - req.reqId, 1, retryWait=1, timeout=5)) + waitForSufficientRepliesForRequests(self.looper, self.client, + requests=[req], fVal=1) + return req diff --git a/plenum/test/plugin/test_bank_req_validation_plugin.py b/plenum/test/plugin/test_bank_req_validation_plugin.py index 1b95fe0ee8..73913db45e 100644 --- a/plenum/test/plugin/test_bank_req_validation_plugin.py +++ b/plenum/test/plugin/test_bank_req_validation_plugin.py @@ -2,13 +2,14 @@ import pytest -from plenum.common.eventually import eventuallyAll, eventually +from stp_core.loop.eventually import eventuallyAll, eventually from plenum.common.constants import TXN_TYPE, TARGET_NYM, DATA from plenum.common.types import PLUGIN_TYPE_VERIFICATION from plenum.server.node import Node from plenum.server.plugin_loader import PluginLoader +from plenum.test import waits from plenum.test.helper import setupClient, \ - checkReqNack, checkSufficientRepliesRecvd + checkReqNack, waitForSufficientRepliesForRequests from plenum.test.plugin.bank_req_validation.plugin_bank_req_validation import \ AMOUNT, CREDIT from plenum.test.plugin.conftest import BANK_REQ_VALIDATION_PLUGIN_PATH_VALUE @@ -106,7 +107,8 @@ def testBankReqValidationPlugin(looper, nodeSet, client1, wallet1, tdir, coros4 = [partial(checkReqNack, client1, node, req.identifier, req.reqId, update) for node in nodeSet] - looper.run(eventuallyAll(*(coros1+coros2+coros3+coros4), totalTimeout=5)) + timeout = waits.expectedReqAckQuorumTime() + looper.run(eventuallyAll(*(coros1+coros2+coros3+coros4), totalTimeout=timeout)) req = submitOp(wallet1, client1, { TXN_TYPE: CREDIT, @@ -114,9 +116,9 @@ def testBankReqValidationPlugin(looper, nodeSet, client1, wallet1, tdir, DATA: { AMOUNT: 30 }}) - looper.run(eventually(checkSufficientRepliesRecvd, client1.inBox, - req.reqId, 1, - retryWait=1, timeout=5)) + + waitForSufficientRepliesForRequests(looper, client1, + requests=[req], fVal=1) for n in nodeSet: # type: Node opVerifier, = n.opVerifiers assert opVerifier.count == 1 diff --git a/plenum/test/pool_transactions/conftest.py b/plenum/test/pool_transactions/conftest.py index a95f68a77a..b177fd550d 100644 --- a/plenum/test/pool_transactions/conftest.py +++ b/plenum/test/pool_transactions/conftest.py @@ -1,6 +1,6 @@ import pytest -from plenum.common.looper import Looper +from stp_core.loop.looper import Looper from plenum.common.util import randomString from plenum.test.test_node import checkNodesConnected from plenum.test.node_catchup.helper import \ @@ -17,7 +17,10 @@ def looper(txnPoolNodesLooper): @pytest.fixture(scope="module") def stewardAndWallet1(looper, txnPoolNodeSet, poolTxnStewardData, tdirWithPoolTxns): - return buildPoolClientAndWallet(poolTxnStewardData, tdirWithPoolTxns) + client, wallet = buildPoolClientAndWallet(poolTxnStewardData, + tdirWithPoolTxns) + yield client, wallet + client.stop() @pytest.fixture(scope="module") @@ -61,7 +64,10 @@ def nodeThetaAdded(looper, txnPoolNodeSet, tdirWithPoolTxns, tconf, steward1, @pytest.fixture(scope="module") def clientAndWallet1(txnPoolNodeSet, poolTxnClientData, tdirWithPoolTxns): - return buildPoolClientAndWallet(poolTxnClientData, tdirWithPoolTxns) + client, wallet = buildPoolClientAndWallet(poolTxnClientData, + tdirWithPoolTxns) + yield client, wallet + client.stop() @pytest.fixture(scope="module") diff --git a/plenum/test/pool_transactions/helper.py b/plenum/test/pool_transactions/helper.py index e47cdfe464..cef270f5e5 100644 --- a/plenum/test/pool_transactions/helper.py +++ b/plenum/test/pool_transactions/helper.py @@ -1,16 +1,17 @@ from typing import Iterable, Union +from plenum.common.keygen_utils import initNodeKeysForBothStacks +from stp_core.network.port_dispenser import genHa + from plenum.client.client import Client from plenum.client.wallet import Wallet -from plenum.common.eventually import eventually -from plenum.common.port_dispenser import genHa -from plenum.common.raet import initLocalKeep +from stp_core.loop.eventually import eventually from plenum.common.signer_simple import SimpleSigner from plenum.common.constants import STEWARD, TXN_TYPE, NYM, ROLE, TARGET_NYM, ALIAS, \ NODE_PORT, CLIENT_IP, NODE_IP, DATA, NODE, CLIENT_PORT, VERKEY, SERVICES, \ VALIDATOR from plenum.common.util import randomString, hexToFriendly -from plenum.test.helper import checkSufficientRepliesRecvd +from plenum.test.helper import waitForSufficientRepliesForRequests from plenum.test.test_client import TestClient, genTestClient from plenum.test.test_node import TestNode @@ -34,10 +35,9 @@ def addNewClient(role, looper, creatorClient: Client, creatorWallet: Wallet, req = creatorWallet.signOp(op) creatorClient.submitReqs(req) - nodeCount = len(creatorClient.nodeReg) - looper.run(eventually(checkSufficientRepliesRecvd, creatorClient.inBox, - req.reqId, 1, - retryWait=1, timeout=3 * nodeCount)) + waitForSufficientRepliesForRequests(looper, creatorClient, + requests=[req], fVal=1) + return wallet @@ -64,11 +64,10 @@ def addNewNode(looper, stewardClient, stewardWallet, newNodeName, tdir, tconf, req = stewardWallet.signOp(op) stewardClient.submitReqs(req) - nodeCount = len(stewardClient.nodeReg) - looper.run(eventually(checkSufficientRepliesRecvd, stewardClient.inBox, - req.reqId, 1, - retryWait=1, timeout=5 * nodeCount)) - initLocalKeep(newNodeName, tdir, sigseed, override=True) + waitForSufficientRepliesForRequests(looper, stewardClient, + requests=[req], fVal=1) + + initNodeKeysForBothStacks(newNodeName, tdir, sigseed, override=True) node = nodeClass(newNodeName, basedirpath=tdir, config=tconf, ha=(nodeIp, nodePort), cliha=(clientIp, clientPort), pluginPaths=allPluginsPath) @@ -96,7 +95,7 @@ def addNewStewardAndNode(looper, creatorClient, creatorWallet, stewardName, def changeNodeHa(looper, stewardClient, stewardWallet, node, nodeHa, clientHa): - nodeNym = hexToFriendly(node.nodestack.local.signer.verhex) + nodeNym = hexToFriendly(node.nodestack.verhex) (nodeIp, nodePort), (clientIp, clientPort) = nodeHa, clientHa op = { TXN_TYPE: NODE, @@ -112,9 +111,10 @@ def changeNodeHa(looper, stewardClient, stewardWallet, node, nodeHa, clientHa): req = stewardWallet.signOp(op) stewardClient.submitReqs(req) - looper.run(eventually(checkSufficientRepliesRecvd, stewardClient.inBox, - req.reqId, 1, - retryWait=1, timeout=5)) + waitForSufficientRepliesForRequests(looper, stewardClient, + requests=[req], fVal=1) + + # TODO: Not needed in ZStack, remove once raet is removed node.nodestack.clearLocalKeep() node.nodestack.clearRemoteKeeps() node.clientstack.clearLocalKeep() @@ -122,7 +122,7 @@ def changeNodeHa(looper, stewardClient, stewardWallet, node, nodeHa, clientHa): def changeNodeKeys(looper, stewardClient, stewardWallet, node, verkey): - nodeNym = hexToFriendly(node.nodestack.local.signer.verhex) + nodeNym = hexToFriendly(node.nodestack.verhex) op = { TXN_TYPE: NODE, @@ -135,9 +135,9 @@ def changeNodeKeys(looper, stewardClient, stewardWallet, node, verkey): req = stewardWallet.signOp(op) stewardClient.submitReqs(req) - looper.run(eventually(checkSufficientRepliesRecvd, stewardClient.inBox, - req.reqId, 1, - retryWait=1, timeout=5)) + waitForSufficientRepliesForRequests(looper, stewardClient, + requests=[req], fVal=1) + node.nodestack.clearLocalRoleKeep() node.nodestack.clearRemoteRoleKeeps() node.nodestack.clearAllDir() @@ -157,10 +157,9 @@ def suspendNode(looper, stewardClient, stewardWallet, nodeNym, nodeName): } req = stewardWallet.signOp(op) stewardClient.submitReqs(req) - looper.run(eventually(checkSufficientRepliesRecvd, stewardClient.inBox, - req.reqId, 1, - retryWait=1, timeout=5)) + waitForSufficientRepliesForRequests(looper, stewardClient, + requests=[req], fVal=1) def cancelNodeSuspension(looper, stewardClient, stewardWallet, nodeNym, nodeName): @@ -175,9 +174,8 @@ def cancelNodeSuspension(looper, stewardClient, stewardWallet, nodeNym, req = stewardWallet.signOp(op) stewardClient.submitReqs(req) - looper.run(eventually(checkSufficientRepliesRecvd, stewardClient.inBox, - req.reqId, 1, - retryWait=1, timeout=10)) + waitForSufficientRepliesForRequests(looper, stewardClient, + requests=[req], fVal=1) def buildPoolClientAndWallet(clientData, tempDir, clientClass=None, @@ -202,7 +200,7 @@ def disconnectPoolNode(poolNodes: Iterable, disconnect: Union[str, TestNode]): if node.name == disconnect: node.stop() else: - node.nodestack.removeRemoteByName(disconnect) + node.nodestack.disconnectByName(disconnect) def checkNodeDisconnectedFrom(needle: str, haystack: Iterable[TestNode]): diff --git a/plenum/test/pool_transactions/test_adding_stewards.py b/plenum/test/pool_transactions/test_adding_stewards.py index 643d1759c8..891da93de4 100644 --- a/plenum/test/pool_transactions/test_adding_stewards.py +++ b/plenum/test/pool_transactions/test_adding_stewards.py @@ -1,12 +1,7 @@ import pytest -from plenum.common.eventually import eventually -from plenum.common.signer_simple import SimpleSigner -from plenum.common.constants import TXN_TYPE, TARGET_NYM, ROLE, STEWARD, NYM, \ - ALIAS -from plenum.common.util import randomSeed -from plenum.test.pool_transactions.helper import buildPoolClientAndWallet, \ - addNewClient +from plenum.common.constants import STEWARD +from plenum.test.pool_transactions.helper import addNewClient @pytest.fixture(scope="module") diff --git a/plenum/test/pool_transactions/test_change_ha_persists_post_nodes_restart.py b/plenum/test/pool_transactions/test_change_ha_persists_post_nodes_restart.py index 625909ff45..2f7f74e75d 100644 --- a/plenum/test/pool_transactions/test_change_ha_persists_post_nodes_restart.py +++ b/plenum/test/pool_transactions/test_change_ha_persists_post_nodes_restart.py @@ -1,17 +1,18 @@ -from plenum.common.eventually import eventually -from plenum.common.log import getlogger -from plenum.common.port_dispenser import genHa -from plenum.test.node_catchup.helper import checkNodeLedgersForEquality, \ +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger +from plenum.test.node_catchup.helper import waitNodeLedgersEquality, \ ensureClientConnectedToNodesAndPoolLedgerSame from plenum.test.pool_transactions.helper import changeNodeHa, \ buildPoolClientAndWallet from plenum.test.test_node import TestNode, checkNodesConnected +from stp_core.network.port_dispenser import genHa logger = getlogger() whitelist = ['found legacy entry', "doesn't match", "reconciling nodeReg", - "missing", "conflicts", "matches", "nodeReg", "conflicting address"] + "missing", "conflicts", "matches", "nodeReg", + "conflicting address", "got error while verifying message"] def testChangeHaPersistsPostNodesRestart(looper, txnPoolNodeSet, @@ -25,7 +26,7 @@ def testChangeHaPersistsPostNodesRestart(looper, txnPoolNodeSet, # Making the change HA txn an confirming its succeeded changeNodeHa(looper, newSteward, newStewardWallet, newNode, - nodeHa=nodeNewHa, clientHa=clientNewHa) + nodeHa=nodeNewHa, clientHa=clientNewHa) # Stopping existing nodes for node in txnPoolNodeSet: @@ -49,8 +50,7 @@ def testChangeHaPersistsPostNodesRestart(looper, txnPoolNodeSet, restartedNodes.append(node) looper.run(checkNodesConnected(restartedNodes)) - looper.run(eventually(checkNodeLedgersForEquality, node, - *restartedNodes[:-1], retryWait=1, timeout=10)) + waitNodeLedgersEquality(looper, node, *restartedNodes[:-1]) # Building a new client that reads from the genesis txn file # but is able to connect to all nodes diff --git a/plenum/test/pool_transactions/test_client_change_ha.py b/plenum/test/pool_transactions/test_client_change_ha.py index 347e0eb358..7957b5d8bc 100644 --- a/plenum/test/pool_transactions/test_client_change_ha.py +++ b/plenum/test/pool_transactions/test_client_change_ha.py @@ -1,12 +1,11 @@ import os import shutil -from plenum.common.port_dispenser import genHa -from plenum.test.test_client import genTestClient from plenum.test.node_catchup.helper import \ ensureClientConnectedToNodesAndPoolLedgerSame from plenum.test.pool_transactions.helper import buildPoolClientAndWallet - +from plenum.test.test_client import genTestClient +from stp_core.network.port_dispenser import genHa whitelist = ['client already added'] diff --git a/plenum/test/pool_transactions/test_client_with_pool_txns.py b/plenum/test/pool_transactions/test_client_with_pool_txns.py index e5197f4f8d..ff40ac1e2e 100644 --- a/plenum/test/pool_transactions/test_client_with_pool_txns.py +++ b/plenum/test/pool_transactions/test_client_with_pool_txns.py @@ -1,8 +1,9 @@ -from plenum.common.eventually import eventually -from plenum.common.log import getlogger +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger from plenum.common.util import randomString, bootstrapClientKeys +from plenum.test import waits from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, \ - sendRandomRequest, checkSufficientRepliesForRequests + sendRandomRequest, waitForSufficientRepliesForRequests from plenum.test.node_catchup.helper import \ ensureClientConnectedToNodesAndPoolLedgerSame from plenum.test.test_client import genTestClient @@ -30,11 +31,11 @@ def testClientConnectAfterRestart(looper, txnPoolNodeSet, tdirWithPoolTxns): logger.debug("{} starting at {}".format(newClient, newClient.nodestack.ha)) looper.add(newClient) logger.debug("Public keys of client {} {}".format( - newClient.nodestack.local.priver.keyhex, - newClient.nodestack.local.priver.pubhex)) + newClient.nodestack.prihex, + newClient.nodestack.pubhex)) logger.debug("Signer keys of client {} {}".format( - newClient.nodestack.local.signer.keyhex, - newClient.nodestack.local.signer.verhex)) + newClient.nodestack.keyhex, + newClient.nodestack.verhex)) looper.run(newClient.ensureConnectedToNodes()) newClient.stop() looper.removeProdable(newClient) @@ -44,11 +45,11 @@ def testClientConnectAfterRestart(looper, txnPoolNodeSet, tdirWithPoolTxns): newClient.nodestack.ha)) looper.add(newClient) logger.debug("Public keys of client {} {}".format( - newClient.nodestack.local.priver.keyhex, - newClient.nodestack.local.priver.pubhex)) + newClient.nodestack.prihex, + newClient.nodestack.pubhex)) logger.debug("Signer keys of client {} {}".format( - newClient.nodestack.local.signer.keyhex, - newClient.nodestack.local.signer.verhex)) + newClient.nodestack.keyhex, + newClient.nodestack.verhex)) looper.run(newClient.ensureConnectedToNodes()) @@ -76,20 +77,19 @@ def testClientConnectToRestartedNodes(looper, txnPoolNodeSet, tdirWithPoolTxns, looper.add(node) txnPoolNodeSet.append(node) looper.run(checkNodesConnected(txnPoolNodeSet)) - ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet, retryWait=1, - timeout=10) + ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) def chk(): for node in txnPoolNodeSet: assert node.isParticipating - looper.run(eventually(chk, retryWait=1, timeout=10)) + timeout = waits.expectedCatchupTime(len(txnPoolNodeSet)) + looper.run(eventually(chk, retryWait=1, timeout=timeout)) bootstrapClientKeys(w.defaultId, w.getVerkey(), txnPoolNodeSet) req = sendRandomRequest(w, newClient) - checkSufficientRepliesForRequests(looper, newClient, [req, ], - timeoutPerReq=10) + waitForSufficientRepliesForRequests(looper, newClient, requests=[req]) ensureClientConnectedToNodesAndPoolLedgerSame(looper, newClient, *txnPoolNodeSet) diff --git a/plenum/test/pool_transactions/test_multiple_clients.py b/plenum/test/pool_transactions/test_multiple_clients.py new file mode 100644 index 0000000000..86e9bab231 --- /dev/null +++ b/plenum/test/pool_transactions/test_multiple_clients.py @@ -0,0 +1,41 @@ +import os +import psutil + +import pytest +import zmq + +from stp_core.loop.eventually import eventually +from plenum.common.util import randomString +from plenum.test import waits +from plenum.test.pool_transactions.helper import addNewClient +from plenum.test.test_client import TestClient +from stp_core.network.port_dispenser import genHa + + +@pytest.mark.skip(reason='This is not a test') +def testMultipleClients(looper, txnPoolNodeSet, steward1, stewardWallet, + tdirWithPoolTxns): + n = txnPoolNodeSet[0] + n.nodestack.ctx.set(zmq.MAX_SOCKETS, 4096) + clientNum = 100 + pr = psutil.Process(os.getpid()) + print('Len connections before starting {}'.format(len(pr.connections()))) + for i in range(clientNum): + name = randomString() + wallet = addNewClient(None, looper, steward1, stewardWallet, + name) + + def chk(): + for node in txnPoolNodeSet: + assert wallet.defaultId in node.clientAuthNr.clients + + timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) + looper.run(eventually(chk, retryWait=1, timeout=timeout)) + newSteward = TestClient(name=name, + nodeReg=None, ha=genHa(), + basedirpath=tdirWithPoolTxns) + + looper.add(newSteward) + looper.run(newSteward.ensureConnectedToNodes()) + print('Connected {}'.format(i)) + print('Len connections {}'.format(len(pr.connections()))) diff --git a/plenum/test/pool_transactions/test_nodes_with_pool_txns.py b/plenum/test/pool_transactions/test_nodes_with_pool_txns.py index 3e0ccfd664..87e609a5e9 100644 --- a/plenum/test/pool_transactions/test_nodes_with_pool_txns.py +++ b/plenum/test/pool_transactions/test_nodes_with_pool_txns.py @@ -2,17 +2,20 @@ import pytest -from plenum.common.eventually import eventually -from plenum.common.log import getlogger -from plenum.common.port_dispenser import genHa -from plenum.common.raet import initLocalKeep +from plenum.common import util +from plenum.common.keygen_utils import initNodeKeysForBothStacks +from stp_core.network.port_dispenser import genHa +from stp_core.types import HA + +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger from plenum.common.signer_simple import SimpleSigner -from plenum.common.types import HA from plenum.common.constants import CLIENT_STACK_SUFFIX from plenum.common.util import getMaxFailures, randomString +from plenum.test import waits from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, \ checkReqNackWithReason -from plenum.test.node_catchup.helper import checkNodeLedgersForEquality, \ +from plenum.test.node_catchup.helper import waitNodeLedgersEquality, \ ensureClientConnectedToNodesAndPoolLedgerSame from plenum.test.pool_transactions.helper import addNewClient, addNewNode, \ changeNodeHa, addNewStewardAndNode, changeNodeKeys @@ -24,7 +27,11 @@ # logged errors to ignore whitelist = ['found legacy entry', "doesn't match", 'reconciling nodeReg', 'missing', 'conflicts', 'matches', 'nodeReg', - 'conflicting address', 'unable to send message'] + 'conflicting address', 'unable to send message', + 'got error while verifying message'] +# Whitelisting "got error while verifying message" since a node while not have +# initialised a connection for a new node by the time the new node's message +# reaches it def getNodeWithName(txnPoolNodeSet, name: str): @@ -49,7 +56,8 @@ def chk(): for node in txnPoolNodeSet: assert wallet.defaultId in node.clientAuthNr.clients - looper.run(eventually(chk, retryWait=1, timeout=5)) + timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) + looper.run(eventually(chk, retryWait=1, timeout=timeout)) def testStewardCannotAddMoreThanOneNode(looper, txnPoolNodeSet, steward1, @@ -95,7 +103,9 @@ def chkNodeRegRecvd(): assert (len(steward1.nodeReg) - len(oldNodeReg)) == 1 assert (newNode.name + CLIENT_STACK_SUFFIX) in steward1.nodeReg - looper.run(eventually(chkNodeRegRecvd, retryWait=1, timeout=5)) + fVal = util.getMaxFailures(len(txnPoolNodeSet)) + timeout = waits.expectedClientConnectionTimeout(fVal) + looper.run(eventually(chkNodeRegRecvd, retryWait=1, timeout=timeout)) ensureClientConnectedToNodesAndPoolLedgerSame(looper, steward1, *txnPoolNodeSet) ensureClientConnectedToNodesAndPoolLedgerSame(looper, newSteward, @@ -110,17 +120,17 @@ def testAdd2NewNodes(looper, txnPoolNodeSet, tdirWithPoolTxns, tconf, steward1, for nodeName in ("Zeta", "Eta"): newStewardName = "testClientSteward"+randomString(3) newSteward, newStewardWallet, newNode = addNewStewardAndNode(looper, - steward1, - stewardWallet, - newStewardName, - nodeName, - tdirWithPoolTxns, tconf, - allPluginsPath) + steward1, + stewardWallet, + newStewardName, + nodeName, + tdirWithPoolTxns, + tconf, + allPluginsPath) txnPoolNodeSet.append(newNode) - looper.run(checkNodesConnected(txnPoolNodeSet)) - logger.debug("{} connected to the pool".format(newNode)) - looper.run(eventually(checkNodeLedgersForEquality, newNode, - *txnPoolNodeSet[:-1], retryWait=1, timeout=7)) + looper.run(checkNodesConnected(txnPoolNodeSet)) + logger.debug("{} connected to the pool".format(newNode)) + waitNodeLedgersEquality(looper, newNode, *txnPoolNodeSet[:-1]) f = getMaxFailures(len(txnPoolNodeSet)) @@ -129,9 +139,9 @@ def checkFValue(): assert node.f == f assert len(node.replicas) == (f + 1) - looper.run(eventually(checkFValue, retryWait=1, timeout=5)) - checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1, - timeout=5) + timeout = waits.expectedClientConnectionTimeout(f) + looper.run(eventually(checkFValue, retryWait=1, timeout=timeout)) + checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) def testNodePortCannotBeChangedByAnotherSteward(looper, txnPoolNodeSet, @@ -173,14 +183,16 @@ def testNodePortChanged(looper, txnPoolNodeSet, tdirWithPoolTxns, # stopped txnPoolNodeSet[-1] = node looper.run(checkNodesConnected(txnPoolNodeSet)) - looper.run(eventually(checkNodeLedgersForEquality, node, - *txnPoolNodeSet[:-1], retryWait=1, timeout=10)) + + waitNodeLedgersEquality(looper, node, *txnPoolNodeSet[:-1]) + ensureClientConnectedToNodesAndPoolLedgerSame(looper, steward1, *txnPoolNodeSet) ensureClientConnectedToNodesAndPoolLedgerSame(looper, newSteward, *txnPoolNodeSet) +@pytest.mark.skip(reason="SOV-881") def testNodeKeysChanged(looper, txnPoolNodeSet, tdirWithPoolTxns, tconf, steward1, nodeThetaAdded, allPluginsPath=None): @@ -191,13 +203,14 @@ def testNodeKeysChanged(looper, txnPoolNodeSet, tdirWithPoolTxns, newNode = getNodeWithName(txnPoolNodeSet, newNode.name) newNode.stop() + looper.removeProdable(name=newNode.name) nodeHa, nodeCHa = HA(*newNode.nodestack.ha), HA(*newNode.clientstack.ha) sigseed = randomString(32).encode() verkey = SimpleSigner(seed=sigseed).naclSigner.verhex.decode() changeNodeKeys(looper, newSteward, newStewardWallet, newNode, verkey) - initLocalKeep(newNode.name, tdirWithPoolTxns, sigseed) - initLocalKeep(newNode.name+CLIENT_STACK_SUFFIX, tdirWithPoolTxns, sigseed) - looper.removeProdable(name=newNode.name) + initNodeKeysForBothStacks(newNode.name, tdirWithPoolTxns, sigseed, + override=True) + logger.debug("{} starting with HAs {} {}".format(newNode, nodeHa, nodeCHa)) node = TestNode(newNode.name, basedirpath=tdirWithPoolTxns, config=tconf, ha=nodeHa, cliha=nodeCHa, pluginPaths=allPluginsPath) @@ -206,8 +219,7 @@ def testNodeKeysChanged(looper, txnPoolNodeSet, tdirWithPoolTxns, # stopped txnPoolNodeSet[-1] = node looper.run(checkNodesConnected(txnPoolNodeSet)) - looper.run(eventually(checkNodeLedgersForEquality, node, - *txnPoolNodeSet[:-1], retryWait=1, timeout=10)) + waitNodeLedgersEquality(looper, node, *txnPoolNodeSet[:-1]) ensureClientConnectedToNodesAndPoolLedgerSame(looper, steward1, *txnPoolNodeSet) ensureClientConnectedToNodesAndPoolLedgerSame(looper, newSteward, diff --git a/plenum/test/pool_transactions/test_suspend_node.py b/plenum/test/pool_transactions/test_suspend_node.py index e9ffdec9e6..c8998ac940 100644 --- a/plenum/test/pool_transactions/test_suspend_node.py +++ b/plenum/test/pool_transactions/test_suspend_node.py @@ -1,16 +1,15 @@ import pytest from plenum.client.client import Client -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.constants import CLIENT_STACK_SUFFIX from plenum.common.util import hexToFriendly from plenum.server.node import Node from plenum.test.helper import sendRandomRequest, \ - checkSufficientRepliesForRequests + waitForSufficientRepliesForRequests from plenum.test.node_catchup.helper import \ ensureClientConnectedToNodesAndPoolLedgerSame from plenum.test.pool_transactions.helper import suspendNode, \ buildPoolClientAndWallet, cancelNodeSuspension -from plenum.test.test_client import TestClient from plenum.test.test_node import TestNode, checkNodesConnected whitelist = ['found legacy entry', "doesn't match", 'reconciling nodeReg', @@ -39,7 +38,7 @@ def testStewardSuspendsNode(looper, txnPoolNodeSet, allPluginsPath): newSteward, newStewardWallet, newNode = nodeThetaAdded - newNodeNym = hexToFriendly(newNode.nodestack.local.signer.verhex) + newNodeNym = hexToFriendly(newNode.nodestack.verhex) suspendNode(looper, newSteward, newStewardWallet, newNodeNym, newNode.name) # Check suspended node does not exist in any nodeReg or remotes of # nodes or clients @@ -52,8 +51,8 @@ def testStewardSuspendsNode(looper, txnPoolNodeSet, # Check a client can send request and receive replies req = sendRandomRequest(newStewardWallet, newSteward) - checkSufficientRepliesForRequests(looper, newSteward, [req, ], - timeoutPerReq=10) + waitForSufficientRepliesForRequests(looper, newSteward, + requests=[req]) # Check that a restarted client or node does not connect to the suspended # node @@ -92,7 +91,7 @@ def testStewardSuspendsNode(looper, txnPoolNodeSet, ha=newNode.nodestack.ha, cliha=newNode.clientstack.ha) looper.add(nodeTheta) txnPoolNodeSet.append(nodeTheta) - looper.run(checkNodesConnected(txnPoolNodeSet, overrideTimeout=30)) + looper.run(checkNodesConnected(txnPoolNodeSet)) ensureClientConnectedToNodesAndPoolLedgerSame(looper, steward1, *txnPoolNodeSet) ensureClientConnectedToNodesAndPoolLedgerSame(looper, newSteward, diff --git a/plenum/test/primary_election/test_primary_election_case1.py b/plenum/test/primary_election/test_primary_election_case1.py index 133173aecf..42d23096cc 100644 --- a/plenum/test/primary_election/test_primary_election_case1.py +++ b/plenum/test/primary_election/test_primary_election_case1.py @@ -1,7 +1,7 @@ import pytest -from plenum.common.eventually import eventually -from plenum.common.log import getlogger +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger from plenum.common.types import Nomination from plenum.server.replica import Replica from plenum.server.suspicion_codes import Suspicions @@ -11,6 +11,8 @@ getSelfNominationByNode from plenum.test.test_node import TestNodeSet, checkNodesConnected, \ ensureElectionsDone +from plenum.test import waits + nodeCount = 4 whitelist = ['already got nomination', @@ -68,8 +70,10 @@ def testPrimaryElectionCase1(case1Setup, looper, keySharedNodes): # Node B sends multiple NOMINATE msgs for Node D but only after A has # nominated itself - looper.run(eventually(checkNomination, nodeA, nodeA.name, retryWait=.25, - timeout=1)) + timeout = waits.expectedNominationTimeout(nodeCount=1) + looper.run(eventually(checkNomination, nodeA, nodeA.name, + retryWait=.25, + timeout=timeout)) instId = getSelfNominationByNode(nodeA) @@ -86,8 +90,7 @@ def testPrimaryElectionCase1(case1Setup, looper, keySharedNodes): Replica.generateName(nodeD.name, instId)) \ <= 1 - primaryReplicas = ensureElectionsDone(looper=looper, nodes=nodes, - retryWait=1, timeout=30) + primaryReplicas = ensureElectionsDone(looper=looper, nodes=nodes) for node in nodes: logger.debug( diff --git a/plenum/test/primary_election/test_primary_election_case2.py b/plenum/test/primary_election/test_primary_election_case2.py index c50482a837..0b2162c8c8 100644 --- a/plenum/test/primary_election/test_primary_election_case2.py +++ b/plenum/test/primary_election/test_primary_election_case2.py @@ -1,6 +1,6 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.types import Nomination from plenum.server.replica import Replica from plenum.server.suspicion_codes import Suspicions @@ -9,6 +9,8 @@ getSelfNominationByNode from plenum.test.test_node import TestNodeSet, checkNodesConnected, \ ensureElectionsDone +from plenum.test import waits + nodeCount = 4 whitelist = ['already got nomination', @@ -49,7 +51,9 @@ def testPrimaryElectionCase2(case2Setup, looper, keySharedNodes): looper.run(checkNodesConnected(nodeSet)) # Node B sends multiple NOMINATE msgs but only after A has nominated itself - looper.run(eventually(checkNomination, A, A.name, retryWait=.25, timeout=1)) + timeout = waits.expectedNominationTimeout(len(nodeSet)) + looper.run(eventually(checkNomination, A, A.name, + retryWait=.25, timeout=timeout)) instId = getSelfNominationByNode(A) @@ -63,7 +67,7 @@ def testPrimaryElectionCase2(case2Setup, looper, keySharedNodes): B.send(Nomination(DRep, instId, B.viewNo)) # Ensure elections are done - ensureElectionsDone(looper=looper, nodes=nodeSet, retryWait=1, timeout=45) + ensureElectionsDone(looper=looper, nodes=nodeSet) # All nodes from node A, node C, node D(node B is malicious anyway so # not considering it) should have nomination for node C from node B since diff --git a/plenum/test/primary_election/test_primary_election_case4.py b/plenum/test/primary_election/test_primary_election_case4.py index 3c42c24ea4..e52a54fe2f 100644 --- a/plenum/test/primary_election/test_primary_election_case4.py +++ b/plenum/test/primary_election/test_primary_election_case4.py @@ -1,8 +1,9 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.types import Primary from plenum.server.suspicion_codes import Suspicions +from plenum.test import waits from plenum.test.test_node import TestNodeSet, checkNodesConnected, \ ensureElectionsDone @@ -60,11 +61,11 @@ def x(): primDecs = list(node.elector.primaryDeclarations[0].values()) assert primDecs.count(D.name) <= 1 + timeout = waits.expectedNominationTimeout(len(allNodes)) for node in (A, C, D): - looper.run(eventually(x, retryWait=.5, timeout=2)) + looper.run(eventually(x, retryWait=.5, timeout=timeout)) - ensureElectionsDone(looper=looper, nodes=allNodes, - retryWait=1, timeout=45) + ensureElectionsDone(looper=looper, nodes=allNodes) # Node D should not have any primary replica assert not D.hasPrimary diff --git a/plenum/test/primary_election/test_primary_election_case5.py b/plenum/test/primary_election/test_primary_election_case5.py index 19cba6e093..9a24a9ffac 100644 --- a/plenum/test/primary_election/test_primary_election_case5.py +++ b/plenum/test/primary_election/test_primary_election_case5.py @@ -2,7 +2,7 @@ import pytest from plenum.common.types import Primary, Nomination -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.server.replica import Replica from plenum.server.suspicion_codes import Suspicions @@ -75,7 +75,7 @@ def testPrimaryElectionCase5(case5Setup, looper, keySharedNodes): B.send(Primary(DRep, 0, B.viewNo)) # Ensure elections are done - ensureElectionsDone(looper=looper, nodes=nodeSet, retryWait=1, timeout=45) + ensureElectionsDone(looper=looper, nodes=nodeSet) # All nodes from node A, node C, node D(node B is malicious anyway so not # considering it) should have primary declarations for node C from node B diff --git a/plenum/test/primary_election/test_primary_election_contested.py b/plenum/test/primary_election/test_primary_election_contested.py index 68a8975c4a..6c59af4658 100644 --- a/plenum/test/primary_election/test_primary_election_contested.py +++ b/plenum/test/primary_election/test_primary_election_contested.py @@ -1,12 +1,14 @@ import pytest -from plenum.common.eventually import eventually -from plenum.common.log import getlogger +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger from plenum.common.types import Nomination from plenum.test.delayers import delayerMsgTuple from plenum.test.primary_election.helpers import checkNomination from plenum.test.test_node import TestNodeSet, checkPoolReady, \ checkProtocolInstanceSetup +from plenum.test import waits + nodeCount = 4 @@ -53,18 +55,22 @@ def testPrimaryElectionContested(electContFixture, looper, keySharedNodes): checkPoolReady(looper, nodeSet) logger.debug("Check nomination") + timeout = waits.expectedNominationTimeout(nodeCount) + # Checking whether Node A nominated itself - looper.run(eventually(checkNomination, A, A.name, retryWait=1, timeout=10)) + looper.run(eventually(checkNomination, A, A.name, + retryWait=1, timeout=timeout)) # Checking whether Node B nominated itself - looper.run(eventually(checkNomination, B, B.name, retryWait=1, timeout=10)) + looper.run(eventually(checkNomination, B, B.name, + retryWait=1, timeout=timeout)) for n in [C, D]: # Checking whether Node C and Node D nominated Node A - looper.run(eventually(checkNomination, n, A.name, retryWait=1, timeout=10)) + looper.run(eventually(checkNomination, n, A.name, + retryWait=1, timeout=timeout)) - checkProtocolInstanceSetup(looper=looper, nodes=nodeSet, - retryWait=1, timeout=45) + checkProtocolInstanceSetup(looper=looper, nodes=nodeSet, retryWait=1) # Node D should not be primary assert not D.hasPrimary diff --git a/plenum/test/primary_election/test_primary_election_with_clear_winner.py b/plenum/test/primary_election/test_primary_election_with_clear_winner.py index bed207e32a..bbba0d4c08 100644 --- a/plenum/test/primary_election/test_primary_election_with_clear_winner.py +++ b/plenum/test/primary_election/test_primary_election_with_clear_winner.py @@ -1,9 +1,10 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.test.primary_election.helpers import checkNomination from plenum.test.test_node import TestNodeSet, checkPoolReady, \ checkProtocolInstanceSetup +from plenum.test import waits nodeCount = 4 @@ -57,13 +58,14 @@ def testPrimaryElectionWithAClearWinner(electContFixture, looper, keySharedNodes checkPoolReady(looper, nodeSet) # Checking whether one of the replicas of Node A nominated itself - looper.run(eventually(checkNomination, A, A.name, retryWait=1, timeout=10)) + timeout = waits.expectedNominationTimeout(len(nodeSet)) + looper.run(eventually(checkNomination, A, A.name, retryWait=1, timeout=timeout)) + timeout = waits.expectedNominationTimeout(len(nodeSet)) for n in nodesBCD: # Checking whether Node B, C and D nominated Node A - looper.run(eventually(checkNomination, n, A.name, retryWait=1, - timeout=10)) + looper.run(eventually(checkNomination, n, A.name, + retryWait=1, timeout=timeout)) - checkProtocolInstanceSetup(looper=looper, nodes=nodeSet, retryWait=1, - timeout=10) + checkProtocolInstanceSetup(looper=looper, nodes=nodeSet, retryWait=1) assert A.hasPrimary diff --git a/plenum/test/primary_election/test_primary_election_with_tie.py b/plenum/test/primary_election/test_primary_election_with_tie.py index 1e16c789de..81372fe560 100644 --- a/plenum/test/primary_election/test_primary_election_with_tie.py +++ b/plenum/test/primary_election/test_primary_election_with_tie.py @@ -1,12 +1,14 @@ import pytest -from plenum.common.eventually import eventually -from plenum.common.log import getlogger +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger from plenum.common.types import Nomination from plenum.test.delayers import delay from plenum.test.primary_election.helpers import checkNomination from plenum.test.test_node import TestNodeSet, checkPoolReady, \ checkProtocolInstanceSetup +from plenum.test import waits + nodeCount = 4 @@ -66,18 +68,23 @@ def testPrimaryElectionWithTie(electTieFixture, looper, keySharedNodes): format(replica.name, replica.instId, node.elector.nominations.get(instId, {}))) + nominationTimeout = waits.expectedNominationTimeout(len(nodeSet)) logger.debug("Check nomination") # Checking whether Node A nominated itself - looper.run(eventually(checkNomination, A, A.name, retryWait=1, timeout=10)) + looper.run(eventually(checkNomination, A, A.name, + retryWait=1, timeout=nominationTimeout)) # Checking whether Node B nominated itself - looper.run(eventually(checkNomination, B, B.name, retryWait=1, timeout=10)) + looper.run(eventually(checkNomination, B, B.name, + retryWait=1, timeout=nominationTimeout)) # Checking whether Node C nominated Node A - looper.run(eventually(checkNomination, C, A.name, retryWait=1, timeout=10)) + looper.run(eventually(checkNomination, C, A.name, + retryWait=1, timeout=nominationTimeout)) # Checking whether Node D nominated Node D - looper.run(eventually(checkNomination, D, B.name, retryWait=1, timeout=10)) + looper.run(eventually(checkNomination, D, B.name, + retryWait=1, timeout=nominationTimeout)) # No node should be primary for node in nodeSet.nodes.values(): @@ -86,5 +93,4 @@ def testPrimaryElectionWithTie(electTieFixture, looper, keySharedNodes): for node in nodeSet.nodes.values(): node.resetDelays() - checkProtocolInstanceSetup(looper=looper, nodes=nodeSet, retryWait=1, - timeout=60) + checkProtocolInstanceSetup(looper=looper, nodes=nodeSet, retryWait=1) diff --git a/plenum/test/primary_election/test_primary_forfeit.py b/plenum/test/primary_election/test_primary_forfeit.py index 47836a1d70..783cba7418 100644 --- a/plenum/test/primary_election/test_primary_forfeit.py +++ b/plenum/test/primary_election/test_primary_forfeit.py @@ -1,6 +1,6 @@ import pytest -from plenum.test.helper import getPrimaryReplica +from plenum.test.test_node import getPrimaryReplica @pytest.mark.skip(reason="SOV-556. Test implementation pending, " diff --git a/plenum/test/primary_selection/test_primary_selection.py b/plenum/test/primary_selection/test_primary_selection.py index 29626cc832..09f796b47c 100644 --- a/plenum/test/primary_selection/test_primary_selection.py +++ b/plenum/test/primary_selection/test_primary_selection.py @@ -2,12 +2,12 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.util import getNoInstances from plenum.server.primary_selector import PrimarySelector from plenum.server.replica import Replica -from plenum.test.helper import getPrimaryReplica -from plenum.test.test_node import checkProtocolInstanceSetup +from plenum.test import waits +from plenum.test.test_node import checkProtocolInstanceSetup, getPrimaryReplica from plenum.test.view_change.conftest import viewNo from plenum.test.view_change.test_view_change import viewChangeDone @@ -57,10 +57,11 @@ def checkPrimaryPlacement(): assert node.replicas[2].isPrimary # Check if the primary is on the correct node - looper.run(eventually(checkPrimaryPlacement, retryWait=1, timeout=10)) + timeout = waits.expectedElectionTimeout(len(nodeSet)) + looper.run(eventually(checkPrimaryPlacement, retryWait=1, timeout=timeout)) # Check if every protocol instance has one and only one primary and any node # has no more than one primary - checkProtocolInstanceSetup(looper, nodeSet, retryWait=1, timeout=5) + checkProtocolInstanceSetup(looper, nodeSet, retryWait=1) # noinspection PyIncorrectDocstring @@ -82,4 +83,4 @@ def testPrimarySelectionAfterViewChange(looper, nodeSet, ready, primaryReplicas, for br, ar in zip(prBeforeVC, prAfterVC): assert ar.node.rank - br.node.rank == 1 - checkProtocolInstanceSetup(looper, nodeSet, retryWait=1, timeout=5) + checkProtocolInstanceSetup(looper, nodeSet, retryWait=1) diff --git a/plenum/test/propagate/test_propagate_recvd_after_request.py b/plenum/test/propagate/test_propagate_recvd_after_request.py index e705938b45..fa8775962a 100644 --- a/plenum/test/propagate/test_propagate_recvd_after_request.py +++ b/plenum/test/propagate/test_propagate_recvd_after_request.py @@ -1,19 +1,20 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.types import Propagate +from plenum.test import waits from plenum.test.delayers import delay from plenum.test.propagate.helper import recvdRequest, recvdPropagate, \ sentPropagate from plenum.test.test_node import TestNode nodeCount = 4 - +howlong = 5 @pytest.fixture() def setup(nodeSet): A, B, C, D = nodeSet.nodes.values() # type: TestNode - delay(Propagate, frm=[B, C, D], to=A, howlong=5) + delay(Propagate, frm=[B, C, D], to=A, howlong=howlong) def testPropagateRecvdAfterRequest(setup, looper, nodeSet, up, sent1): @@ -27,7 +28,8 @@ def x(): # A should have sent a PROPAGATE assert len(sentPropagate(A)) == 1 - looper.run(eventually(x, retryWait=.5, timeout=3)) + timeout = howlong - 2 + looper.run(eventually(x, retryWait=.5, timeout=timeout)) def y(): # A should have received 3 PROPAGATEs @@ -39,4 +41,5 @@ def y(): # A should still have sent only one PROPAGATE assert len(sentPropagate(A)) == 1 - looper.run(eventually(y, retryWait=.5, timeout=7)) + timeout = howlong + 2 + looper.run(eventually(y, retryWait=.5, timeout=timeout)) diff --git a/plenum/test/propagate/test_propagate_recvd_before_request.py b/plenum/test/propagate/test_propagate_recvd_before_request.py index 02e8836722..ac834ea3b6 100644 --- a/plenum/test/propagate/test_propagate_recvd_before_request.py +++ b/plenum/test/propagate/test_propagate_recvd_before_request.py @@ -1,20 +1,23 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.types import Propagate from plenum.test.delayers import delay from plenum.test.helper import assertLength from plenum.test.propagate.helper import recvdRequest, recvdPropagate, \ sentPropagate, forwardedRequest +from plenum.test import waits -nodeCount = 4 +nodeCount = 4 +howlong = 10 +delaySec = 5 @pytest.fixture() def setup(nodeSet): A, B, C, D = nodeSet.nodes.values() - A.clientIbStasher.delay(lambda x: 5) - delay(Propagate, frm=[C, D], to=A, howlong=10) + A.clientIbStasher.delay(lambda x: delaySec) + delay(Propagate, frm=[C, D], to=A, howlong=howlong) def testPropagateRecvdBeforeRequest(setup, looper, nodeSet, up, sent1): @@ -28,7 +31,8 @@ def x(): # A should have sent only one PROPAGATE assert len(sentPropagate(A)) == 1 - looper.run(eventually(x, retryWait=.5, timeout=3)) + timeout = delaySec - 2 + looper.run(eventually(x, retryWait=.5, timeout=timeout)) def y(): # A should have received a request from the client @@ -36,10 +40,12 @@ def y(): # A should still have sent only one PROPAGATE assert len(sentPropagate(A)) == 1 - looper.run(eventually(y, retryWait=.5, timeout=6)) + timeout = delaySec + 2 + looper.run(eventually(y, retryWait=.5, timeout=timeout)) def chk(): # A should have forwarded the request assertLength(forwardedRequest(A), 1) - looper.run(eventually(chk, retryWait=1, timeout=15)) + timeout = waits.expectedClientRequestPropagationTime(len(nodeSet)) + looper.run(eventually(chk, retryWait=1, timeout=timeout)) diff --git a/plenum/test/raet/__init__.py b/plenum/test/raet/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/plenum/test/raet/helper.py b/plenum/test/raet/helper.py deleted file mode 100644 index 1939297ed8..0000000000 --- a/plenum/test/raet/helper.py +++ /dev/null @@ -1,45 +0,0 @@ -import time - - -def handshake(*stacks): - svc(stacks) - print("Finished Handshake\n") - - -def svc(stacks): - while True: - for stack in stacks: - stack.serviceAll() - stack.store.advanceStamp(0.1) - if all([not stack.transactions for stack in stacks]): - break - time.sleep(.1) - - -def cleanup(*stacks): - for stack in stacks: - stack.server.close() # close the UDP socket - stack.keep.clearAllDir() # clear persisted data - print("Finished\n") - - -def sendMsgs(frm, to, toRemote): - stacks = [frm, to] - msg = {'subject': 'Example message {} to {}'.format(frm.name, to.name), - 'content': 'test'} - frm.transmit(msg, toRemote.uid) - svc(stacks) - rx = to.rxMsgs.popleft() - print("{0}\n".format(rx)) - print("Finished Message {} to {}\n".format(frm.name, to.name)) - msg = {'subject': 'Example message {} to {}'.format(to.name, frm.name), - 'content': 'Another test.'} - to.transmit(msg, toRemote.uid) - svc(stacks) - rx = frm.rxMsgs.popleft() - print("{0}\n".format(rx)) - print("Finished Message {} to {}\n".format(to.name, frm.name)) - - -def getRemote(stack, name): - return next(r for r in stack.remotes.values() if r.name == name) diff --git a/plenum/test/raet/test_communication.py b/plenum/test/raet/test_communication.py deleted file mode 100644 index eb46e7bf0c..0000000000 --- a/plenum/test/raet/test_communication.py +++ /dev/null @@ -1,198 +0,0 @@ -from ioflo.base.consoling import getConsole -from raet.nacling import Privateer -from raet.raeting import AutoMode, Acceptance -from raet.road.estating import RemoteEstate -from raet.road.stacking import RoadStack - -from plenum.common.log import getlogger -from plenum.common.port_dispenser import genHa -from plenum.common.signer_simple import SimpleSigner -from plenum.test.raet.helper import handshake, cleanup, sendMsgs, getRemote - -logger = getlogger() - - -def testPromiscuousConnection(tdir): - alpha = RoadStack(name='alpha', - ha=genHa(), - auto=AutoMode.always, - basedirpath=tdir) - - beta = RoadStack(name='beta', - ha=genHa(), - main=True, - auto=AutoMode.always, - basedirpath=tdir) - - try: - betaRemote = RemoteEstate(stack=alpha, ha=beta.ha) - alpha.addRemote(betaRemote) - - alpha.join(uid=betaRemote.uid, cascade=True) - - handshake(alpha, beta) - - sendMsgs(alpha, beta, betaRemote) - finally: - cleanup(alpha, beta) - - -def testRaetPreSharedKeysPromiscous(tdir): - alphaSigner = SimpleSigner() - betaSigner = SimpleSigner() - - logger.debug("Alpha's verkey {}".format(alphaSigner.naclSigner.verhex)) - logger.debug("Beta's verkey {}".format(betaSigner.naclSigner.verhex)) - - alpha = RoadStack(name='alpha', - ha=genHa(), - sigkey=alphaSigner.naclSigner.keyhex, - auto=AutoMode.always, - basedirpath=tdir) - - beta = RoadStack(name='beta', - ha=genHa(), - sigkey=betaSigner.naclSigner.keyhex, - main=True, - auto=AutoMode.always, - basedirpath=tdir) - - try: - - betaRemote = RemoteEstate(stack=alpha, ha=beta.ha, - verkey=betaSigner.naclSigner.verhex) - - alpha.addRemote(betaRemote) - - alpha.allow(uid=betaRemote.uid, cascade=True) - - handshake(alpha, beta) - - sendMsgs(alpha, beta, betaRemote) - - finally: - cleanup(alpha, beta) - - -def testRaetPreSharedKeysNonPromiscous(tdir): - alphaSigner = SimpleSigner() - betaSigner = SimpleSigner() - - alphaPrivateer = Privateer() - betaPrivateer = Privateer() - - logger.debug("Alpha's verkey {}".format(alphaSigner.naclSigner.verhex)) - logger.debug("Beta's verkey {}".format(betaSigner.naclSigner.verhex)) - - alpha = RoadStack(name='alpha', - ha=genHa(), - sigkey=alphaSigner.naclSigner.keyhex, - prikey=alphaPrivateer.keyhex, - auto=AutoMode.never, - basedirpath=tdir) - - beta = RoadStack(name='beta', - ha=genHa(), - sigkey=betaSigner.naclSigner.keyhex, - prikey=betaPrivateer.keyhex, - main=True, - auto=AutoMode.never, - basedirpath=tdir) - - alpha.keep.dumpRemoteRoleData({ - "acceptance": Acceptance.accepted.value, - "verhex": betaSigner.naclSigner.verhex, - "pubhex": betaPrivateer.pubhex - }, "beta") - - beta.keep.dumpRemoteRoleData({ - "acceptance": Acceptance.accepted.value, - "verhex": alphaSigner.naclSigner.verhex, - "pubhex": alphaPrivateer.pubhex - }, "alpha") - - try: - - betaRemote = RemoteEstate(stack=alpha, ha=beta.ha) - - alpha.addRemote(betaRemote) - - alpha.allow(uid=betaRemote.uid, cascade=True) - - handshake(alpha, beta) - - sendMsgs(alpha, beta, betaRemote) - finally: - cleanup(alpha, beta) - - -def testConnectionWithHaChanged(tdir): - console = getConsole() - console.reinit(verbosity=console.Wordage.verbose) - - alphaSigner = SimpleSigner() - betaSigner = SimpleSigner() - - alphaPrivateer = Privateer() - betaPrivateer = Privateer() - - logger.debug("Alpha's verkey {}".format(alphaSigner.naclSigner.verhex)) - logger.debug("Beta's verkey {}".format(betaSigner.naclSigner.verhex)) - - alpha = None - - def setupAlpha(ha): - nonlocal alpha - alpha = RoadStack(name='alpha', - ha=ha, - sigkey=alphaSigner.naclSigner.keyhex, - prikey=alphaPrivateer.keyhex, - auto=AutoMode.never, - basedirpath=tdir) - - alpha.keep.dumpRemoteRoleData({ - "acceptance": Acceptance.accepted.value, - "verhex": betaSigner.naclSigner.verhex, - "pubhex": betaPrivateer.pubhex - }, "beta") - - oldHa = genHa() - setupAlpha(oldHa) - - beta = RoadStack(name='beta', - ha=genHa(), - sigkey=betaSigner.naclSigner.keyhex, - prikey=betaPrivateer.keyhex, - main=True, - auto=AutoMode.never, - basedirpath=tdir, mutable=True) - - beta.keep.dumpRemoteRoleData({ - "acceptance": Acceptance.accepted.value, - "verhex": alphaSigner.naclSigner.verhex, - "pubhex": alphaPrivateer.pubhex - }, "alpha") - - try: - betaRemote = RemoteEstate(stack=alpha, ha=beta.ha) - alpha.addRemote(betaRemote) - alpha.join(uid=betaRemote.uid, cascade=True) - handshake(alpha, beta) - sendMsgs(alpha, beta, betaRemote) - logger.debug("beta knows alpha as {}". - format(getRemote(beta, "alpha").ha)) - cleanup(alpha) - - newHa = genHa() - logger.debug("alpha changing ha to {}".format(newHa)) - - setupAlpha(newHa) - betaRemote = RemoteEstate(stack=alpha, ha=beta.ha) - alpha.addRemote(betaRemote) - alpha.join(uid=betaRemote.uid, cascade=True) - handshake(alpha, beta) - sendMsgs(alpha, beta, betaRemote) - logger.debug("beta knows alpha as {}". - format(getRemote(beta, "alpha").ha)) - finally: - cleanup(alpha, beta) diff --git a/plenum/test/raet/test_raet_comm_with_one_key.py b/plenum/test/raet/test_raet_comm_with_one_key.py deleted file mode 100644 index 89f9b0b21d..0000000000 --- a/plenum/test/raet/test_raet_comm_with_one_key.py +++ /dev/null @@ -1,103 +0,0 @@ -from binascii import hexlify - -import pytest -from plenum.common.crypto import ed25519SkToCurve25519, ed25519PkToCurve25519 -from raet.nacling import Signer -from raet.road.estating import RemoteEstate -from plenum.common.port_dispenser import genHa -from plenum.test.raet.helper import handshake, cleanup, sendMsgs -from raet.raeting import AutoMode, Acceptance -from raet.road.stacking import RoadStack - - -@pytest.fixture(scope="module") -def keysAndNames(): - alphaSigner = Signer() - betaSigner = Signer() - alphaPrikey = ed25519SkToCurve25519(alphaSigner.keyraw) - betaPrikey = ed25519SkToCurve25519(betaSigner.keyraw) - alphaPubkey = ed25519PkToCurve25519(alphaSigner.verraw) - betaPubkey = ed25519PkToCurve25519(betaSigner.verraw) - alphaName = 'alpha' - betaName = 'beta' - return alphaSigner.keyhex, alphaPrikey, alphaSigner.verhex, alphaPubkey, \ - alphaName, betaSigner.keyhex, betaPrikey, betaSigner.verhex, \ - betaPubkey, betaName - - -def testNonPromiscousConnectionWithOneKey(tdir, keysAndNames): - # Simulating node to node connection - alphaSighex, alphaPrikey, alphaVerhex, alphaPubkey, alphaName, betaSighex,\ - betaPrikey, betaVerhex, betaPubkey, betaName = keysAndNames - alpha = RoadStack(name=alphaName, - ha=genHa(), - sigkey=alphaSighex, - prikey=hexlify(alphaPrikey), - auto=AutoMode.never, - basedirpath=tdir) - - beta = RoadStack(name=betaName, - ha=genHa(), - sigkey=betaSighex, - prikey=hexlify(betaPrikey), - main=True, - auto=AutoMode.never, - basedirpath=tdir) - - alpha.keep.dumpRemoteRoleData({ - "acceptance": Acceptance.accepted.value, - "verhex": betaVerhex, - "pubhex": hexlify(betaPubkey) - }, betaName) - - beta.keep.dumpRemoteRoleData({ - "acceptance": Acceptance.accepted.value, - "verhex": alphaVerhex, - "pubhex": hexlify(alphaPubkey) - }, alphaName) - - try: - - betaRemote = RemoteEstate(stack=alpha, ha=beta.ha) - - alpha.addRemote(betaRemote) - - alpha.allow(uid=betaRemote.uid, cascade=True) - - handshake(alpha, beta) - - sendMsgs(alpha, beta, betaRemote) - finally: - cleanup(alpha, beta) - - -def testPromiscuousConnection(tdir, keysAndNames): - # Simulating node to client connection - alphaSighex, alphaPrikey, alphaVerhex, alphaPubkey, alphaName, betaSighex, \ - betaPrikey, betaVerhex, betaPubkey, betaName = keysAndNames - alpha = RoadStack(name=alphaName, - ha=genHa(), - sigkey=alphaSighex, - prikey=hexlify(alphaPrikey), - auto=AutoMode.always, - basedirpath=tdir) - - beta = RoadStack(name=betaName, - ha=genHa(), - main=True, - sigkey=betaSighex, - prikey=hexlify(betaPrikey), - auto=AutoMode.always, - basedirpath=tdir) - - try: - betaRemote = RemoteEstate(stack=alpha, ha=beta.ha) - alpha.addRemote(betaRemote) - - alpha.join(uid=betaRemote.uid, cascade=True) - - handshake(alpha, beta) - - sendMsgs(alpha, beta, betaRemote) - finally: - cleanup(alpha, beta) diff --git a/plenum/test/replica/test_primary_marked_suspicious_for_sending_prepare.py b/plenum/test/replica/test_primary_marked_suspicious_for_sending_prepare.py index f051ce6b15..f24163bf8b 100644 --- a/plenum/test/replica/test_primary_marked_suspicious_for_sending_prepare.py +++ b/plenum/test/replica/test_primary_marked_suspicious_for_sending_prepare.py @@ -1,12 +1,13 @@ import time -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.exceptions import SuspiciousNode from plenum.common.types import Prepare from plenum.server.suspicion_codes import Suspicions -from plenum.test.helper import getPrimaryReplica, getNodeSuspicions +from plenum.test.helper import getNodeSuspicions from plenum.test.spy_helpers import getAllArgs -from plenum.test.test_node import TestNode, getNonPrimaryReplicas +from plenum.test.test_node import TestNode, getNonPrimaryReplicas, \ + getPrimaryReplica nodeCount = 7 diff --git a/plenum/test/replica/test_replica_reject_same_pre_prepare.py b/plenum/test/replica/test_replica_reject_same_pre_prepare.py index d23b71034d..68bcb19bcf 100644 --- a/plenum/test/replica/test_replica_reject_same_pre_prepare.py +++ b/plenum/test/replica/test_replica_reject_same_pre_prepare.py @@ -2,16 +2,17 @@ import pytest -from plenum.common.eventually import eventually -from plenum.common.log import getlogger +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger from plenum.common.types import PrePrepare from plenum.common.util import getMaxFailures +from plenum.test import waits from plenum.test.helper import checkPrePrepareReqSent, \ checkPrePrepareReqRecvd, \ checkPrepareReqSent -from plenum.test.helper import sendRandomRequest, checkSufficientRepliesRecvd, \ - getPrimaryReplica -from plenum.test.test_node import getNonPrimaryReplicas + +from plenum.test.helper import sendRandomRequest, checkSufficientRepliesReceived +from plenum.test.test_node import getNonPrimaryReplicas, getPrimaryReplica whitelist = ['doing nothing for now', 'cannot process incoming PRE-PREPARE', @@ -32,10 +33,11 @@ def testReplicasRejectSamePrePrepareMsg(looper, nodeSet, client1, wallet1): numOfNodes = 4 fValue = getMaxFailures(numOfNodes) request1 = sendRandomRequest(wallet1, client1) + timeout = waits.expectedReqAckQuorumTime() result1 = looper.run( - eventually(checkSufficientRepliesRecvd, client1.inBox, + eventually(checkSufficientRepliesReceived, client1.inBox, request1.reqId, fValue, - retryWait=1, timeout=5)) + retryWait=1, timeout=timeout)) logger.debug("request {} gives result {}".format(request1, result1)) primaryRepl = getPrimaryReplica(nodeSet) logger.debug("Primary Replica: {}".format(primaryRepl)) @@ -44,8 +46,9 @@ def testReplicasRejectSamePrePrepareMsg(looper, nodeSet, client1, wallet1): "one...") primaryRepl.lastPrePrepareSeqNo -= 1 request2 = sendRandomRequest(wallet1, client1) + timeout = waits.expectedPrePrepareTime(len(nodeSet)) looper.run(eventually(checkPrePrepareReqSent, primaryRepl, request2, - retryWait=1, timeout=10)) + retryWait=1, timeout=timeout)) nonPrimaryReplicas = getNonPrimaryReplicas(nodeSet) logger.debug("Non Primary Replicas: " + str(nonPrimaryReplicas)) @@ -61,14 +64,16 @@ def testReplicasRejectSamePrePrepareMsg(looper, nodeSet, client1, wallet1): logger.debug("""Checking whether all the non primary replicas have received the pre-prepare request with same sequence number""") + timeout = waits.expectedPrePrepareTime(len(nodeSet)) looper.run(eventually(checkPrePrepareReqRecvd, nonPrimaryReplicas, prePrepareReq, retryWait=1, - timeout=10)) + timeout=timeout)) logger.debug("""Check that none of the non primary replicas didn't send any prepare message " in response to the pre-prepare message""") + timeout = waits.expectedPrePrepareTime(len(nodeSet)) for npr in nonPrimaryReplicas: with pytest.raises(AssertionError): looper.run(eventually(checkPrepareReqSent, @@ -76,4 +81,4 @@ def testReplicasRejectSamePrePrepareMsg(looper, nodeSet, client1, wallet1): wallet1.defaultId, request2.reqId, retryWait=1, - timeout=10)) + timeout=timeout)) diff --git a/plenum/test/script/helper.py b/plenum/test/script/helper.py index 9cabb081a2..c9952b2a50 100644 --- a/plenum/test/script/helper.py +++ b/plenum/test/script/helper.py @@ -4,17 +4,18 @@ import pytest from plenum.client.wallet import Wallet -from plenum.common.eventually import eventually -from plenum.common.log import getlogger -from plenum.common.port_dispenser import genHa +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger from plenum.common.script_helper import changeHA from plenum.common.signer_simple import SimpleSigner from plenum.common.util import getMaxFailures -from plenum.test.helper import checkSufficientRepliesRecvd, \ +from plenum.test import waits +from plenum.test.helper import waitForSufficientRepliesForRequests, \ sendReqsToNodesAndVerifySuffReplies from plenum.test.test_client import genTestClient from plenum.test.test_node import TestNode, checkNodesConnected, \ ensureElectionsDone +from stp_core.network.port_dispenser import genHa logger = getlogger() @@ -70,9 +71,9 @@ def changeNodeHa(looper, txnPoolNodeSet, tdirWithPoolTxns, # change HA stewardClient, req = changeHA(looper, tconf, subjectedNode.name, nodeSeed, nodeStackNewHA, stewardName, stewardsSeed) - f = getMaxFailures(len(stewardClient.nodeReg)) - looper.run(eventually(checkSufficientRepliesRecvd, stewardClient.inBox, - req.reqId, f, retryWait=1, timeout=20)) + + waitForSufficientRepliesForRequests(looper, stewardClient, + requests=[req]) # stop node for which HA will be changed subjectedNode.stop() @@ -85,8 +86,9 @@ def changeNodeHa(looper, txnPoolNodeSet, tdirWithPoolTxns, looper.add(restartedNode) txnPoolNodeSet[nodeIndex] = restartedNode - looper.run(checkNodesConnected(txnPoolNodeSet, overrideTimeout=70)) - ensureElectionsDone(looper, txnPoolNodeSet, retryWait=1, timeout=10) + + looper.run(checkNodesConnected(txnPoolNodeSet, customTimeout=70)) + ensureElectionsDone(looper, txnPoolNodeSet, retryWait=1) # start client and check the node HA anotherClient, _ = genTestClient(tmpdir=tdirWithPoolTxns, @@ -96,6 +98,7 @@ def changeNodeHa(looper, txnPoolNodeSet, tdirWithPoolTxns, stewardWallet = Wallet(stewardName) stewardWallet.addIdentifier(signer=SimpleSigner(seed=stewardsSeed)) sendReqsToNodesAndVerifySuffReplies(looper, stewardWallet, stewardClient, 8) + timeout = waits.expectedPoolLedgerCheck(len(txnPoolNodeSet) + 1) looper.run(eventually(checkIfGenesisPoolTxnFileUpdated, *txnPoolNodeSet, stewardClient, anotherClient, retryWait=1, - timeout=10)) + timeout=timeout)) diff --git a/plenum/test/script/test_add_unregistered_remote.py b/plenum/test/script/test_add_unregistered_remote.py deleted file mode 100644 index 0b4d8b9241..0000000000 --- a/plenum/test/script/test_add_unregistered_remote.py +++ /dev/null @@ -1,55 +0,0 @@ -from copy import copy -import pytest - -from plenum.common.looper import Looper -from plenum.test.helper import msgAll, randomText -from plenum.common.log import getlogger -from plenum.common.exceptions import RemoteNotFound -from plenum.common.constants import CLIENT_STACK_SUFFIX -from plenum.common.port_dispenser import genHa -from plenum.test.test_node import TestNodeSet, checkNodesConnected, genNodeReg, TestNode -from plenum.common.types import NodeDetail - - -logger = getlogger() - - -def testAddUnregisteredRemote(tdir_for_func): - nodeReg = genNodeReg(5) - - logger.debug("-----sharing keys-----") - with TestNodeSet(nodeReg=nodeReg, - tmpdir=tdir_for_func) as nodeSet: - with Looper(nodeSet) as looper: - for n in nodeSet: - n.startKeySharing() - looper.run(checkNodesConnected(nodeSet)) - - logger.debug("-----key sharing done, connect after key sharing-----") - with TestNodeSet(nodeReg=nodeReg, - tmpdir=tdir_for_func) as nodeSet: - with Looper(nodeSet) as loop: - loop.run(checkNodesConnected(nodeSet), - msgAll(nodeSet)) - for nodeName, node in nodeSet.nodes.items(): - assert len(node.nodestack.spylog) == 0 - - name = randomText(20) - ha = genHa() - cliname = name + CLIENT_STACK_SUFFIX - cliha = genHa() - faultyNodeReg = copy(nodeReg) - faultyNodeReg.update({name: NodeDetail(ha, cliname, cliha)}) - unregisteredNode = TestNode(name=name, ha=ha, cliname=cliname, cliha=cliha, - nodeRegistry=copy(faultyNodeReg), basedirpath=tdir_for_func, - primaryDecider=None, pluginPaths=None) - logger.debug("-----can not connect after adding unregistered node-----") - with TestNodeSet(nodeReg=nodeReg, - tmpdir=tdir_for_func) as nodeSet: - nodeSet.nodes[name] = unregisteredNode - with Looper(nodeSet) as loop: - with pytest.raises(RemoteNotFound) as e: - loop.run(checkNodesConnected(nodeSet), msgAll(nodeSet)) - for nodeName, node in nodeSet.nodes.items(): - if node.name != unregisteredNode.name: - assert len(node.nodestack.spylog) > 0 diff --git a/plenum/test/script/test_bootstrap_test_node.py b/plenum/test/script/test_bootstrap_test_node.py index 62a0ab95b0..644818703b 100644 --- a/plenum/test/script/test_bootstrap_test_node.py +++ b/plenum/test/script/test_bootstrap_test_node.py @@ -10,4 +10,4 @@ def testBootstrapTestNode(tconf): config=tconf, envName="test", appendToLedgers=False, domainTxnFieldOrder=getTxnOrderedFields(), ips=None, nodeCount=4, clientCount=1, - nodeNum=1, startingPort=portsStart) + nodeNum=1, startingPort=portsStart, nodeParamsFileName='plenum.env') diff --git a/plenum/test/script/test_change_non_primary_node_ha.py b/plenum/test/script/test_change_non_primary_node_ha.py index 9a947dd189..8d014e58b4 100644 --- a/plenum/test/script/test_change_non_primary_node_ha.py +++ b/plenum/test/script/test_change_non_primary_node_ha.py @@ -1,7 +1,7 @@ import pytest from plenum.test.script.helper import looper, tconf -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.test.script.helper import changeNodeHa @@ -9,10 +9,12 @@ whitelist = ['found legacy entry', "doesn't match", 'reconciling nodeReg', 'missing', 'conflicts', 'matches', 'nodeReg', - 'conflicting address', 'unable to send message'] + 'conflicting address', 'unable to send message', + 'got error while verifying message'] @pytest.mark.skipif('sys.platform == "win32"', reason='SOV-330') +@pytest.mark.skip(reason="SOV-941") def testChangeNodeHaForNonPrimary(looper, txnPoolNodeSet, tdirWithPoolTxns, poolTxnData, poolTxnStewardNames, tconf): changeNodeHa(looper, txnPoolNodeSet, tdirWithPoolTxns, diff --git a/plenum/test/script/test_change_primary_node_ha.py b/plenum/test/script/test_change_primary_node_ha.py index 4134202cfa..4288a1ddcb 100644 --- a/plenum/test/script/test_change_primary_node_ha.py +++ b/plenum/test/script/test_change_primary_node_ha.py @@ -1,7 +1,7 @@ import pytest from plenum.test.script.helper import looper, tconf -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.test.script.helper import changeNodeHa @@ -9,10 +9,11 @@ whitelist = ['found legacy entry', "doesn't match", 'reconciling nodeReg', 'missing', 'conflicts', 'matches', 'nodeReg', - 'conflicting address', 'unable to send message'] + 'conflicting address', 'unable to send message', + 'got error while verifying message'] -@pytest.mark.skipif('sys.platform == "win32"', reason='SOV-330') +@pytest.mark.skip(reason='SOV-330') def testChangeNodeHaForPrimary(looper, txnPoolNodeSet, tdirWithPoolTxns, poolTxnData, poolTxnStewardNames, tconf): changeNodeHa(looper, txnPoolNodeSet, tdirWithPoolTxns, diff --git a/plenum/test/signing/test_signing.py b/plenum/test/signing/test_signing.py index fd177a5791..234419d461 100644 --- a/plenum/test/signing/test_signing.py +++ b/plenum/test/signing/test_signing.py @@ -1,9 +1,10 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.exceptions import InvalidSignature -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.common.util import adict +from plenum.test import waits from plenum.test.malicious_behaviors_node import changesRequest, makeNodeFaulty from plenum.test.node_request.node_request_helper import checkPropagated from plenum.test.test_node import TestNode @@ -57,4 +58,5 @@ def check(): for good in goodNodes: assert good.name in props - looper.run(eventually(check, retryWait=1, timeout=10)) + timeout = waits.expectedClientRequestPropagationTime(len(nodeSet)) + looper.run(eventually(check, retryWait=1, timeout=timeout)) diff --git a/plenum/test/stasher.py b/plenum/test/stasher.py index 5e6ea83723..451ff67704 100644 --- a/plenum/test/stasher.py +++ b/plenum/test/stasher.py @@ -1,6 +1,6 @@ import time -from plenum.common.log import getlogger +from stp_core.common.log import getlogger logger = getlogger() diff --git a/plenum/test/storage/helper.py b/plenum/test/storage/helper.py index 411a12426b..9d7f784342 100644 --- a/plenum/test/storage/helper.py +++ b/plenum/test/storage/helper.py @@ -1,6 +1,7 @@ -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.constants import TXN_TYPE from plenum.common.types import f +from plenum.test import waits def checkReplyIsPersisted(nodes, lpr, reply1): @@ -11,5 +12,6 @@ def chk(node): assert result.get(f.IDENTIFIER.nm) == reply1.identifier assert result.get(TXN_TYPE) == reply1.operation.get(TXN_TYPE) + timeout = waits.expectedPoolLedgerCheck(len(nodes)) for node in nodes: - lpr.run(eventually(chk, node, retryWait=1, timeout=5)) + lpr.run(eventually(chk, node, retryWait=1, timeout=timeout)) diff --git a/plenum/test/storage/test_orientdb_hash_store.py b/plenum/test/storage/test_orientdb_hash_store.py index 2bccebb8d4..555b7ffffc 100644 --- a/plenum/test/storage/test_orientdb_hash_store.py +++ b/plenum/test/storage/test_orientdb_hash_store.py @@ -72,3 +72,4 @@ def testRecoverLedgerFromHashStore(odbhs, tdir): assert restartedLedger.root_hash == ledger.root_hash assert restartedLedger.tree.hashes == updatedTree.hashes assert restartedLedger.tree.root_hash == updatedTree.root_hash + restartedLedger.stop() diff --git a/plenum/test/test_action_queue.py b/plenum/test/test_action_queue.py new file mode 100644 index 0000000000..dfccea1a7b --- /dev/null +++ b/plenum/test/test_action_queue.py @@ -0,0 +1,35 @@ +import time +from functools import partial + +from stp_core.loop.looper import Looper +from plenum.common.motor import Motor +from plenum.server.has_action_queue import HasActionQueue + + +def testActionQueue(): + class Q1(Motor, HasActionQueue): + def __init__(self, name): + self.name = name + self.results = {} + Motor.__init__(self) + HasActionQueue.__init__(self) + + def start(self, loop): + pass + + async def prod(self, limit: int = None) -> int: + return self._serviceActions() + + def meth1(self, x): + if 'meth1' not in self.results: + self.results['meth1'] = [] + self.results['meth1'].append((x, time.perf_counter())) + + with Looper(debug=True) as looper: + q1 = Q1('q1') + looper.add(q1) + q1._schedule(partial(q1.meth1, 1), 2) + q1._schedule(partial(q1.meth1, 2), 4) + looper.runFor(2.3) + assert 1 in [t[0] for t in q1.results['meth1']] + assert 2 not in [t[0] for t in q1.results['meth1']] \ No newline at end of file diff --git a/plenum/test/test_bootstrapping.py b/plenum/test/test_bootstrapping.py index efdbd4e1e5..4b35af8b3f 100644 --- a/plenum/test/test_bootstrapping.py +++ b/plenum/test/test_bootstrapping.py @@ -1,15 +1,20 @@ -from plenum.common.exceptions import RemoteNotFound -from plenum.common.log import getlogger +import pytest +from stp_core.network.exceptions import RemoteNotFound, PublicKeyNotFoundOnDisk +from stp_core.common.log import getlogger from plenum.test.greek import genNodeNames -from plenum.common.looper import Looper +from stp_core.loop.looper import Looper from plenum.test.helper import msgAll -from plenum.test.test_stack import RemoteState +from plenum.test.test_stack import NOT_CONNECTED from plenum.test.test_node import TestNodeSet, checkNodesConnected, genNodeReg logger = getlogger() +whitelist = ['public key from disk', 'verification key from disk', + 'doesnt have enough info to connect'] + + # noinspection PyIncorrectDocstring def testKeyShareParty(tdir_for_func): """ @@ -22,8 +27,6 @@ def testKeyShareParty(tdir_for_func): with TestNodeSet(nodeReg=nodeReg, tmpdir=tdir_for_func) as nodeSet: with Looper(nodeSet) as looper: - for n in nodeSet: - n.startKeySharing() looper.run(checkNodesConnected(nodeSet)) logger.debug("-----key sharing done, connect after key sharing-----") @@ -40,17 +43,11 @@ def testConnectWithoutKeySharingFails(tdir_for_func): attempts at connecting to nodes when key sharing is disabled must fail """ nodeNames = genNodeNames(5) - with TestNodeSet(names=nodeNames, tmpdir=tdir_for_func, - keyshare=False) as nodes: - with Looper(nodes) as looper: - try: - looper.run( - checkNodesConnected(nodes, - RemoteState(None, None, None))) - except RemoteNotFound: - pass - except KeyError as ex: - assert [n for n in nodeNames - if n == ex.args[0]] - except Exception: - raise + + with pytest.raises(PublicKeyNotFoundOnDisk): + with TestNodeSet(names=nodeNames, tmpdir=tdir_for_func, + keyshare=False) as nodes: + with Looper(nodes) as looper: + looper.run( + checkNodesConnected(nodes, NOT_CONNECTED)) + diff --git a/plenum/test/test_client.py b/plenum/test/test_client.py index eb27a941e8..4d50fc45e7 100644 --- a/plenum/test/test_client.py +++ b/plenum/test/test_client.py @@ -1,16 +1,21 @@ from functools import partial +from plenum.common.keygen_utils import initRemoteKeys +from plenum.common.stacks import nodeStackClass +from stp_core.network.network_interface import NetworkInterface +from stp_core.network.port_dispenser import genHa +from stp_core.types import HA, Identifier + from plenum.client.client import Client, ClientProvider from plenum.client.wallet import Wallet -from plenum.common.log import getlogger -from plenum.common.port_dispenser import genHa -from plenum.common.stacked import NodeStack -from plenum.common.constants import REQACK, REQNACK, REPLY, OP_FIELD_NAME -from plenum.common.types import Identifier, HA, f -from plenum.common.util import bootstrapClientKeys from plenum.common.error import error +from stp_core.common.log import getlogger +from plenum.common.constants import REQACK, REQNACK, REPLY +from plenum.common.types import f +from plenum.common.util import bootstrapClientKeys from plenum.test.test_stack import StackedTester, getTestableStack from plenum.test.testable import Spyable +from plenum.common.constants import OP_FIELD_NAME logger = getlogger() @@ -18,9 +23,13 @@ @Spyable(methods=[Client.handleOneNodeMsg, Client.resendRequests]) class TestClient(Client, StackedTester): + def __init__(self, *args, **kwargs): + self.NodeStackClass = nodeStackClass + super().__init__(*args, **kwargs) + @property - def nodeStackClass(self) -> NodeStack: - return getTestableStack(NodeStack) + def nodeStackClass(self) -> NetworkInterface: + return getTestableStack(self.NodeStackClass) def handleOneNodeMsg(self, wrappedMsg, excludeFromCli=None) -> None: super().handleOneNodeMsg(wrappedMsg, excludeFromCli=excludeFromCli) @@ -60,6 +69,13 @@ def genTestClient(nodes = None, ha=ha, basedirpath=tmpdir, sighex=sighex) + + if not usePoolLedger and nodes: + for node in nodes: + stack = node.clientstack + initRemoteKeys(tc.name, stack.name, tmpdir, stack.verhex, + override=True) + w = None # type: Wallet if bootstrapKeys and nodes: if not identifier or not verkey: diff --git a/plenum/test/test_connections_with_converted_key.py b/plenum/test/test_connections_with_converted_key.py index 39f998708f..f572ec74b7 100644 --- a/plenum/test/test_connections_with_converted_key.py +++ b/plenum/test/test_connections_with_converted_key.py @@ -1,21 +1,23 @@ -from plenum.common.crypto import ed25519SkToCurve25519, ed25519PkToCurve25519 +from binascii import unhexlify + +from stp_core.crypto.util import ed25519SkToCurve25519, ed25519PkToCurve25519 def testNodesConnectedUsingConvertedKeys(nodeSet, up): for node in nodeSet: - secretKey = ed25519SkToCurve25519(node.nodestack.local.signer.keyraw) - publicKey = ed25519PkToCurve25519(node.nodestack.local.signer.verraw) - assert node.nodestack.local.priver.keyraw == secretKey - assert node.nodestack.local.priver.pubraw == publicKey + secretKey = ed25519SkToCurve25519(node.nodestack.keyhex) + publicKey = ed25519PkToCurve25519(node.nodestack.verhex) + assert unhexlify(node.nodestack.prihex) == secretKey + assert unhexlify(node.nodestack.pubhex) == publicKey - secretKey = ed25519SkToCurve25519(node.clientstack.local.signer.keyraw) - publicKey = ed25519PkToCurve25519(node.clientstack.local.signer.verraw) - assert node.clientstack.local.priver.keyraw == secretKey - assert node.clientstack.local.priver.pubraw == publicKey + secretKey = ed25519SkToCurve25519(node.clientstack.keyhex) + publicKey = ed25519PkToCurve25519(node.clientstack.verhex) + assert unhexlify(node.clientstack.prihex) == secretKey + assert unhexlify(node.clientstack.pubhex) == publicKey def testClientConnectedUsingConvertedKeys(nodeSet, up, client1, replied1): - secretKey = ed25519SkToCurve25519(client1.nodestack.local.signer.keyraw) - publicKey = ed25519PkToCurve25519(client1.nodestack.local.signer.verraw) - assert client1.nodestack.local.priver.keyraw == secretKey - assert client1.nodestack.local.priver.pubraw == publicKey + secretKey = ed25519SkToCurve25519(client1.nodestack.keyhex) + publicKey = ed25519PkToCurve25519(client1.nodestack.verhex) + assert unhexlify(client1.nodestack.prihex) == secretKey + assert unhexlify(client1.nodestack.pubhex) == publicKey diff --git a/plenum/test/test_crypto.py b/plenum/test/test_crypto.py index bbf27c7d0a..9715411425 100644 --- a/plenum/test/test_crypto.py +++ b/plenum/test/test_crypto.py @@ -3,8 +3,9 @@ import pytest from libnacl import randombytes, crypto_sign, crypto_sign_open from libnacl.public import SecretKey, Box -from plenum.common.crypto import ed25519SkToCurve25519, ed25519PkToCurve25519 -from raet.nacling import Signer, SigningKey, Verifier, PrivateKey +from stp_core.crypto.util import ed25519SkToCurve25519, ed25519PkToCurve25519 +from stp_core.crypto.nacl_wrappers import Signer, SigningKey, Verifier, \ + PrivateKey pytestmark = pytest.mark.smoke diff --git a/plenum/test/test_delay.py b/plenum/test/test_delay.py index 9a3a6a77bb..3557c642bf 100644 --- a/plenum/test/test_delay.py +++ b/plenum/test/test_delay.py @@ -1,12 +1,13 @@ import pytest -from plenum.common.eventually import eventually -from plenum.common.log import getlogger -from plenum.common.looper import Looper +from stp_core.loop.eventually import eventually, slowFactor +from stp_core.common.log import getlogger +from stp_core.loop.looper import Looper from plenum.server.node import Node +from plenum.test import waits from plenum.test.delayers import delayerMsgTuple -from plenum.test.helper import sendMsgAndCheck, addNodeBack, assertExp -from plenum.test.msgs import randomMsg +from plenum.test.helper import sendMessageAndCheckDelivery, addNodeBack, assertExp +from plenum.test.msgs import randomMsg, TestMsg from plenum.test.test_node import TestNodeSet, checkNodesConnected, \ ensureElectionsDone, prepareNodeSet @@ -21,30 +22,28 @@ def testTestNodeDelay(tdir_for_func): nodeB = nodes.getNode("testB") with Looper(nodes) as looper: - for n in nodes: - n.startKeySharing() - - logger.debug("connect") looper.run(checkNodesConnected(nodes)) - logger.debug("send one message, without delay") - msg = randomMsg() - looper.run(sendMsgAndCheck(nodes, nodeA, nodeB, msg, 1)) - logger.debug("set delay, then send another message and find that " - "it doesn't arrive") - msg = randomMsg() - nodeB.nodeIbStasher.delay(delayerMsgTuple(6, type(msg), nodeA.name)) + # send one message, without delay + looper.run(sendMessageAndCheckDelivery(nodes, nodeA, nodeB)) + # set delay, then send another message + # and find that it doesn't arrive + delay = 10 * slowFactor + nodeB.nodeIbStasher.delay( + delayerMsgTuple(delay, TestMsg, nodeA.name) + ) with pytest.raises(AssertionError): - looper.run(sendMsgAndCheck(nodes, nodeA, nodeB, msg, 3)) - logger.debug("but then find that it arrives after the delay " - "duration has passed") - looper.run(sendMsgAndCheck(nodes, nodeA, nodeB, msg, 4)) - logger.debug( - "reset the delay, and find another message comes quickly") + looper.run(sendMessageAndCheckDelivery(nodes, nodeA, nodeB)) + + # but then find that it arrives after the delay + # duration has passed + looper.run(sendMessageAndCheckDelivery(nodes, nodeA, nodeB, + customTimeout=delay)) + + # reset the delay, and find another message comes quickly nodeB.nodeIbStasher.resetDelays() - msg = randomMsg() - looper.run(sendMsgAndCheck(nodes, nodeA, nodeB, msg, 1)) + looper.run(sendMessageAndCheckDelivery(nodes, nodeA, nodeB)) def testSelfNominationDelay(tdir_for_func): @@ -65,23 +64,25 @@ def testSelfNominationDelay(tdir_for_func): # Ensuring that NodeA is started before any other node to demonstrate # that it is delaying self nomination + timeout = waits.expectedNodeStartUpTimeout() looper.run( eventually(lambda: assertExp(nodeA.isReady()), retryWait=1, - timeout=5)) + timeout=timeout)) - # Elections should be done - ensureElectionsDone(looper=looper, nodes=nodeSet, retryWait=1, - timeout=10) + ensureElectionsDone(looper=looper, + nodes=nodeSet, + retryWait=1) # node A should not have any primary replica + timeout = waits.expectedNodeStartUpTimeout() looper.run( eventually(lambda: assertExp(not nodeA.hasPrimary), retryWait=1, - timeout=10)) + timeout=timeout)) # Make sure that after at the most 30 seconds, nodeA's # `startElection` is called looper.run(eventually(lambda: assertExp( len(nodeA.spylog.getAll( Node.decidePrimaries.__name__)) > 0), - retryWait=1, timeout=30)) + retryWait=1, timeout=delay)) diff --git a/plenum/test/test_log_rotation.py b/plenum/test/test_log_rotation.py index 5d7251bac1..5020a7fc76 100644 --- a/plenum/test/test_log_rotation.py +++ b/plenum/test/test_log_rotation.py @@ -3,7 +3,7 @@ import logging import shutil import time -from plenum.common.logging.TimeAndSizeRotatingFileHandler \ +from stp_core.common.logging.TimeAndSizeRotatingFileHandler \ import TimeAndSizeRotatingFileHandler @@ -14,6 +14,7 @@ def cleanFolder(path): return path +@pytest.mark.skip(reason="SOV-950") def test_time_log_rotation(): logDirPath = cleanFolder("/tmp/plenum/test_time_log_rotation") logFile = os.path.join(logDirPath, "log") @@ -28,6 +29,7 @@ def test_time_log_rotation(): assert len(os.listdir(logDirPath)) == 4 # initial + 3 new +@pytest.mark.skip(reason="SOV-950") def test_size_log_rotation(): logDirPath = cleanFolder("/tmp/plenum/test_size_log_rotation") logFile = os.path.join(logDirPath, "log") @@ -44,6 +46,7 @@ def test_size_log_rotation(): assert len(os.listdir(logDirPath)) == 5 +@pytest.mark.skip(reason="SOV-950") def test_time_and_size_log_rotation(): logDirPath = cleanFolder("/tmp/plenum/test_time_and_size_log_rotation") logFile = os.path.join(logDirPath, "log") diff --git a/plenum/test/test_memory_consumpion.py b/plenum/test/test_memory_consumpion.py index 3a7bcf26bd..1f2c1e625a 100644 --- a/plenum/test/test_memory_consumpion.py +++ b/plenum/test/test_memory_consumpion.py @@ -1,6 +1,6 @@ import pytest -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.common.perf_util import get_size from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies from plenum.test.node_catchup.helper import \ @@ -30,13 +30,16 @@ def testRequestsSize(txnPoolNodesLooper, txnPoolNodeSet, poolTxnClientNames, *txnPoolNodeSet) clients.append((client, wallet)) - n = 250 - timeOutPerReq = 3 + numRequests = 250 + fVal = 1 for (client, wallet) in clients: - logger.debug("{} sending {} requests".format(client, n)) - sendReqsToNodesAndVerifySuffReplies(txnPoolNodesLooper, wallet, client, - n, 1, timeOutPerReq) - logger.debug("{} sent {} requests".format(client, n)) + logger.debug("{} sending {} requests".format(client, numRequests)) + sendReqsToNodesAndVerifySuffReplies(txnPoolNodesLooper, + wallet, + client, + numRequests, + fVal) + logger.debug("{} sent {} requests".format(client, numRequests)) for node in txnPoolNodeSet: logger.debug("{} has requests {} with size {}". format(node, len(node.requests), get_size(node.requests))) diff --git a/plenum/test/test_node.py b/plenum/test/test_node.py index 01190991e6..ed6e67c808 100644 --- a/plenum/test/test_node.py +++ b/plenum/test/test_node.py @@ -9,14 +9,17 @@ from typing import Iterable, Iterator, Tuple, Sequence, Union, Dict, TypeVar, \ List +from plenum.common.stacks import nodeStackClass, clientStackClass +from stp_core.crypto.util import randomSeed +from stp_core.network.port_dispenser import genHa + import plenum.test.delayers as delayers from plenum.common.error import error -from plenum.common.eventually import eventually, eventuallyAll -from plenum.common.exceptions import RemoteNotFound -from plenum.common.log import getlogger -from plenum.common.looper import Looper -from plenum.common.port_dispenser import genHa -from plenum.common.stacked import NodeStack, ClientStack, KITStack +from stp_core.loop.eventually import eventually, eventuallyAll +from stp_core.network.exceptions import RemoteNotFound +from plenum.common.keygen_utils import learnKeysFromOthers, tellKeysToOthers +from stp_core.common.log import getlogger +from stp_core.loop.looper import Looper from plenum.common.startable import Status from plenum.common.types import TaggedTuples, NodeDetail from plenum.common.constants import CLIENT_STACK_SUFFIX @@ -37,7 +40,7 @@ from plenum.test.test_stack import StackedTester, getTestableStack, CONNECTED, \ checkRemoteExists, RemoteState, checkState from plenum.test.testable import Spyable -from plenum.test.waits import expectedWait +from plenum.test import waits logger = getlogger() @@ -133,7 +136,7 @@ def whitelistNode(self, nodeName: str, *codes: int): if nodeName not in self.whitelistedClients: self.whitelistedClients[nodeName] = set() self.whitelistedClients[nodeName].update(codes) - logger.debug("{} white listing {} for codes {}" + logger.debug("{} whitelisting {} for codes {}" .format(self, nodeName, codes)) def blacklistNode(self, nodeName: str, reason: str=None, code: int=None): @@ -150,7 +153,7 @@ def whitelistClient(self, clientName: str, *codes: int): if clientName not in self.whitelistedClients: self.whitelistedClients[clientName] = set() self.whitelistedClients[clientName].update(codes) - logger.debug("{} white listing {} for codes {}" + logger.debug("{} whitelisting {} for codes {}" .format(self, clientName, codes)) def blacklistClient(self, clientName: str, reason: str=None, code: int=None): @@ -178,8 +181,7 @@ def serviceReplicaOutBox(self, *args, **kwargs) -> int: r.outBoxTestStasher.process() return super().serviceReplicaOutBox(*args, **kwargs) - @classmethod - def ensureKeysAreSetup(cls, name, baseDir): + def ensureKeysAreSetup(self): pass @@ -201,10 +203,15 @@ def ensureKeysAreSetup(cls, name, baseDir): Node.send, Node.sendInstanceChange, Node.processInstanceChange, - Node.checkPerformance + Node.checkPerformance, + Node.processStashedOrderedReqs ]) class TestNode(TestNodeCore, Node): + def __init__(self, *args, **kwargs): + self.NodeStackClass = nodeStackClass + self.ClientStackClass = clientStackClass + Node.__init__(self, *args, **kwargs) TestNodeCore.__init__(self, *args, **kwargs) # Balances of all client @@ -218,12 +225,12 @@ def _getOrientDbStore(self, name, dbType): self.config, name, dbType) @property - def nodeStackClass(self) -> NodeStack: - return getTestableStack(Spyable(methods=[KITStack.handleJoinFromUnregisteredRemote], deepLevel=3)(NodeStack)) + def nodeStackClass(self): + return getTestableStack(self.NodeStackClass) @property - def clientStackClass(self) -> ClientStack: - return getTestableStack(ClientStack) + def clientStackClass(self): + return getTestableStack(self.ClientStackClass) def getLedgerManager(self): return TestLedgerManager(self, ownedByNode=True) @@ -252,6 +259,7 @@ def _serviceActions(self): replica.Replica.doPrepare, replica.Replica.doOrder, replica.Replica.discard, + replica.Replica.stashOutsideWatermarks # replica.Replica.orderPendingCommit ]) class TestReplica(replica.Replica): @@ -266,16 +274,19 @@ def __init__(self, *args, **kwargs): class TestNodeSet(ExitStack): def __init__(self, - names: Iterable[str] = None, - count: int = None, + names: Iterable[str]=None, + count: int=None, nodeReg=None, tmpdir=None, keyshare=True, primaryDecider=None, - pluginPaths:Iterable[str]=None, + pluginPaths: Iterable[str]=None, testNodeClass=TestNode): + + super().__init__() self.tmpdir = tmpdir + self.keyshare = keyshare self.primaryDecider = primaryDecider self.pluginPaths = pluginPaths @@ -305,6 +316,10 @@ def addNode(self, name: str) -> TestNode: assert name in self.nodeReg ha, cliname, cliha = self.nodeReg[name] + seed = randomSeed() + if self.keyshare: + learnKeysFromOthers(self.tmpdir, name, self.nodes.values()) + testNodeClass = self.testNodeClass node = self.enter_context( testNodeClass(name=name, @@ -314,7 +329,12 @@ def addNode(self, name: str) -> TestNode: nodeRegistry=copy(self.nodeReg), basedirpath=self.tmpdir, primaryDecider=self.primaryDecider, - pluginPaths=self.pluginPaths)) + pluginPaths=self.pluginPaths, + seed=seed)) + + if self.keyshare: + tellKeysToOthers(node, self.nodes.values()) + self.nodes[name] = node self.__dict__[name] = node return node @@ -375,17 +395,6 @@ def getAllMsgReceived(self, node: NodeRef, method: str = None) -> Tuple: return getAllMsgReceivedForNode(self.getNode(node), method) -def getNonPrimaryReplicas(nodes: Iterable[TestNode], instId: int = 0) -> \ - Sequence[TestReplica]: - return [node.replicas[instId] for node in nodes if - node.replicas[instId].isPrimary is False] - - -def getAllReplicas(nodes: Iterable[TestNode], instId: int = 0) -> \ - Sequence[TestReplica]: - return [node.replicas[instId] for node in nodes] - - @Spyable(methods=[Monitor.isMasterThroughputTooLow, Monitor.isMasterReqLatencyTooHigh, Monitor.sendThroughput, @@ -416,12 +425,12 @@ def run(self, coro, nodecount=4): tmpdir = self.fresh_tdir() with self.testNodeSetClass(count=nodecount, tmpdir=tmpdir) as nodeset: with Looper(nodeset) as looper: - for n in nodeset: - n.startKeySharing() + # for n in nodeset: + # n.startKeySharing() ctx = adict(looper=looper, nodeset=nodeset, tmpdir=tmpdir) looper.run(checkNodesConnected(nodeset)) - ensureElectionsDone(looper=looper, nodes=nodeset, retryWait=1, - timeout=30) + ensureElectionsDone(looper=looper, + nodes=nodeset) looper.run(coro(ctx)) def fresh_tdir(self): @@ -438,10 +447,17 @@ def isBlacklisted(self, remote): return True -def checkPoolReady(looper: Looper, nodes: Sequence[TestNode], - timeout: int = 20): +def checkPoolReady(looper: Looper, + nodes: Sequence[TestNode], + customTimeout = None): + """ + Check that pool is in Ready state + """ + + timeout = customTimeout or waits.expectedPoolGetReadyTimeout(len(nodes)) looper.run( - eventually(checkNodesAreReady, nodes, retryWait=.25, + eventually(checkNodesAreReady, nodes, + retryWait=.25, timeout=timeout, ratchetSteps=10)) @@ -462,18 +478,19 @@ def x(): async def checkNodesConnected(stacks: Iterable[Union[TestNode, TestClient]], expectedRemoteState=None, - overrideTimeout=None): + customTimeout=None): expectedRemoteState = expectedRemoteState if expectedRemoteState else CONNECTED # run for how long we expect all of the connections to take - wait = overrideTimeout if overrideTimeout else expectedWait(len(stacks)) - logger.debug("waiting for {} seconds to check connections...".format(wait)) + timeout = customTimeout or \ + (waits.expectedNodeInterconnectionTime(len(stacks)) * len(stacks)) + logger.debug("waiting for {} seconds to check connections...".format(timeout)) # verify every node can see every other as a remote funcs = [ partial(checkRemoteExists, frm.nodestack, to.name, expectedRemoteState) for frm, to in permutations(stacks, 2)] await eventuallyAll(*funcs, retryWait=.5, - totalTimeout=wait, + totalTimeout=timeout, acceptableExceptions=[AssertionError, RemoteNotFound]) @@ -528,10 +545,11 @@ def checkPrisAreSame(): def checkNodesAreReady(nodes: Sequence[TestNode]): for node in nodes: - assert node.isReady() + assert node.isReady(), '{} has status {}'.format(node, node.status) async def checkNodesParticipating(nodes: Sequence[TestNode], timeout: int=None): + # TODO is this used? If so - add timeout for it to plenum.test.waits if not timeout: timeout = .75 * len(nodes) @@ -564,11 +582,12 @@ def checkEveryProtocolInstanceHasOnlyOnePrimary(looper: Looper, def checkEveryNodeHasAtMostOnePrimary(looper: Looper, nodes: Sequence[TestNode], retryWait: float = None, - timeout: float = None): + customTimeout: float = None): def checkAtMostOnePrim(node): prims = [r for r in node.replicas if r.isPrimary] assert len(prims) <= 1 + timeout = customTimeout or waits.expectedElectionTimeout(len(nodes)) for node in nodes: looper.run(eventually(checkAtMostOnePrim, node, @@ -578,13 +597,22 @@ def checkAtMostOnePrim(node): def checkProtocolInstanceSetup(looper: Looper, nodes: Sequence[TestNode], retryWait: float = 1, - timeout: float = None): - checkEveryProtocolInstanceHasOnlyOnePrimary( - looper=looper, nodes=nodes, retryWait=retryWait, - timeout=timeout if timeout else None) + customTimeout: float = None): + + totalTimeout = customTimeout or waits.expectedElectionTimeout(len(nodes)) + instanceTimeout = totalTimeout * 4/5 + nodeTimeout = totalTimeout * 1/5 + - checkEveryNodeHasAtMostOnePrimary(looper=looper, nodes=nodes, - retryWait=retryWait, timeout=timeout / 5) + checkEveryProtocolInstanceHasOnlyOnePrimary(looper=looper, + nodes=nodes, + retryWait=retryWait, + timeout=instanceTimeout) + + checkEveryNodeHasAtMostOnePrimary(looper=looper, + nodes=nodes, + retryWait=retryWait, + customTimeout=nodeTimeout) primaryReplicas = {replica.instId: replica for node in nodes @@ -595,17 +623,32 @@ def checkProtocolInstanceSetup(looper: Looper, nodes: Sequence[TestNode], def ensureElectionsDone(looper: Looper, nodes: Sequence[TestNode], - retryWait: float = None, + retryWait: float = None, # seconds timeout: float = None) -> Sequence[TestNode]: - # Wait for elections to be complete and returns the primary replica for - # each protocol instance + """ + Wait for elections to be complete + + :param retryWait: + :param timeout: specific timeout + :return: primary replica for each protocol instance + """ + + if retryWait is None: + retryWait = 1 + + if timeout is None: + timeout = waits.expectedElectionTimeout(len(nodes)) + + poolReadyTimeout = 1/3 * timeout + setupCheckTimeout = 2/3 * timeout - checkPoolReady(looper=looper, nodes=nodes, - timeout=timeout / 3 if timeout else None) + checkPoolReady(looper, nodes, customTimeout=poolReadyTimeout) return checkProtocolInstanceSetup( - looper=looper, nodes=nodes, retryWait=retryWait, - timeout=2 * timeout / 3 if timeout else None) + looper=looper, + nodes=nodes, + retryWait=retryWait, + customTimeout=setupCheckTimeout) def genNodeReg(count=None, names=None) -> Dict[str, NodeDetail]: @@ -631,8 +674,8 @@ def extractCliNodeReg(self): def prepareNodeSet(looper: Looper, nodeSet: TestNodeSet): # TODO: Come up with a more specific name for this - for n in nodeSet: - n.startKeySharing() + # for n in nodeSet: + # n.startKeySharing() # Key sharing party looper.run(checkNodesConnected(nodeSet)) @@ -677,3 +720,24 @@ def getRequiredInstances(nodeCount: int) -> int: return f_value + 1 +def getPrimaryReplica(nodes: Sequence[TestNode], + instId: int = 0) -> TestReplica: + preplicas = [node.replicas[instId] for node in nodes if + node.replicas[instId].isPrimary] + if len(preplicas) > 1: + raise RuntimeError('More than one primary node found') + elif len(preplicas) < 1: + raise RuntimeError('No primary node found') + else: + return preplicas[0] + + +def getNonPrimaryReplicas(nodes: Iterable[TestNode], instId: int = 0) -> \ + Sequence[TestReplica]: + return [node.replicas[instId] for node in nodes if + node.replicas[instId].isPrimary is False] + + +def getAllReplicas(nodes: Iterable[TestNode], instId: int = 0) -> \ + Sequence[TestReplica]: + return [node.replicas[instId] for node in nodes] \ No newline at end of file diff --git a/plenum/test/test_node_basic.py b/plenum/test/test_node_basic.py index 09d06bf2e4..18fcc773b8 100644 --- a/plenum/test/test_node_basic.py +++ b/plenum/test/test_node_basic.py @@ -1,20 +1,27 @@ import pytest +from plenum.test import waits from plenum.test.test_node import TestNode, checkProtocolInstanceSetup from plenum.test.node_helpers.node_helper import getProtocolInstanceNums from plenum.common.util import getMaxFailures, adict -from plenum.test.helper import checkNodesConnected, sendMsgAndCheck, msgAll +from plenum.test.helper import checkNodesConnected, sendMessageAndCheckDelivery, msgAll from plenum.test.msgs import randomMsg nodeCount = 4 +# @pytest.fixture(scope="module") +# def setup(request, tdir, nodeReg): +# for name in nodeReg: +# pass + + @pytest.fixture(scope="module") def pool(looper, nodeSet): - for n in nodeSet: # type: TestNode - n.startKeySharing() + # for n in nodeSet: # type: TestNode + # n.startKeySharing() looper.run(checkNodesConnected(nodeSet)) - checkProtocolInstanceSetup(looper, nodeSet, timeout=5) + checkProtocolInstanceSetup(looper, nodeSet) return adict(looper=looper, nodeset=nodeSet) @@ -29,12 +36,13 @@ def testAllBroadcast(pool): def testMsgSendingTime(pool, nodeReg): nodeNames = list(nodeReg.keys()) msg = randomMsg() + timeout = waits.expectedNodeStartUpTimeout() pool.looper.run( - sendMsgAndCheck(pool.nodeset, - nodeNames[0], - nodeNames[1], - msg, - 1)) + sendMessageAndCheckDelivery(pool.nodeset, + nodeNames[0], + nodeNames[1], + msg, + customTimeout=timeout)) def testCorrectNumOfProtocolInstances(pool): diff --git a/plenum/test/test_node_connection.py b/plenum/test/test_node_connection.py index ee82fe70c7..535b7cc5d0 100644 --- a/plenum/test/test_node_connection.py +++ b/plenum/test/test_node_connection.py @@ -3,19 +3,23 @@ import pytest from ioflo.aid import getConsole -from plenum.common.eventually import eventually -from plenum.common.log import getlogger -from plenum.common.looper import Looper -from plenum.common.port_dispenser import genHa +from plenum.common.keygen_utils import initNodeKeysForBothStacks, tellKeysToOthers +from plenum.common.util import randomString +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger +from stp_core.loop.looper import Looper from plenum.common.temp_file_util import SafeTemporaryDirectory from plenum.common.types import NodeDetail +from plenum.test import waits from plenum.test.helper import stopNodes from plenum.test.test_node import TestNode, checkNodesConnected, \ checkProtocolInstanceSetup +from stp_core.network.port_dispenser import genHa logger = getlogger() -whitelist = ['discarding message', 'found legacy entry'] +whitelist = ['discarding message', 'found legacy entry', + 'error while verifying message'] @pytest.fixture() @@ -27,17 +31,27 @@ def nodeReg(): 'Delta': NodeDetail(genHa(1), "DeltaC", genHa(1)) } +def initLocalKeys(tdir, nodeReg): + for nName in nodeReg.keys(): + sigseed = randomString(32).encode() + initNodeKeysForBothStacks(nName, tdir, sigseed, override=True) + + # Its a function fixture, deliberately @pytest.yield_fixture() -def tdirAndLooper(): +def tdirAndLooper(nodeReg): with SafeTemporaryDirectory() as td: logger.debug("temporary directory: {}".format(td)) with Looper() as looper: yield td, looper -def testNodesConnectsWhenOneNodeIsLate(allPluginsPath, tdirAndLooper, nodeReg): + + +@pytest.mark.skip() +def testNodesConnectsWhenOneNodeIsLate(allPluginsPath, tdirAndLooper, + nodeReg, conf): tdir, looper = tdirAndLooper nodes = [] names = list(nodeReg.keys()) @@ -47,7 +61,6 @@ def create(name): node = TestNode(name, nodeReg, basedirpath=tdir, pluginPaths=allPluginsPath) looper.add(node) - node.startKeySharing() nodes.append(node) for name in names[:3]: @@ -61,58 +74,77 @@ def create(name): # create the fourth and see that it learns who the primaries are # from the other nodes create(names[3]) - checkProtocolInstanceSetup(looper, nodes, timeout=10) + # TODO set timeout from 'waits' after the test enabled + checkProtocolInstanceSetup(looper, nodes, customTimeout=10) stopNodes(nodes, looper) -def testNodesConnectWhenTheyAllStartAtOnce(allPluginsPath, tdirAndLooper, nodeReg): +def testNodesConnectWhenTheyAllStartAtOnce(allPluginsPath, tdirAndLooper, + nodeReg): tdir, looper = tdirAndLooper nodes = [] + + initLocalKeys(tdir, nodeReg) + for name in nodeReg: node = TestNode(name, nodeReg, basedirpath=tdir, pluginPaths=allPluginsPath) - looper.add(node) - node.startKeySharing() nodes.append(node) + + for node in nodes: + tellKeysToOthers(node, nodes) + + for node in nodes: + looper.add(node) + looper.run(checkNodesConnected(nodes)) stopNodes(nodes, looper) # @pytest.mark.parametrize("x10", range(1, 11)) # def testNodesComingUpAtDifferentTimes(x10): -def testNodesComingUpAtDifferentTimes(allPluginsPath, tdirAndLooper, nodeReg): +def testNodesComingUpAtDifferentTimes(allPluginsPath, tdirAndLooper, + nodeReg): console = getConsole() console.reinit(flushy=True, verbosity=console.Wordage.verbose) tdir, looper = tdirAndLooper + initLocalKeys(tdir, nodeReg) + nodes = [] names = list(nodeReg.keys()) + shuffle(names) waits = [randint(1, 10) for _ in names] rwaits = [randint(1, 10) for _ in names] - for i, name in enumerate(names): + for name in names: node = TestNode(name, nodeReg, basedirpath=tdir, pluginPaths=allPluginsPath) - looper.add(node) - node.startKeySharing() nodes.append(node) + + for node in nodes: + tellKeysToOthers(node, nodes) + + for i, node in enumerate(nodes): + looper.add(node) looper.runFor(waits[i]) - looper.run(checkNodesConnected(nodes, - overrideTimeout=10)) + looper.run(checkNodesConnected(nodes)) logger.debug("connects") logger.debug("node order: {}".format(names)) logger.debug("waits: {}".format(waits)) stopNodes(nodes, looper) + # # Giving some time for sockets to close, use eventually + # time.sleep(1) + for i, n in enumerate(nodes): n.start(looper.loop) looper.runFor(rwaits[i]) looper.runFor(3) - looper.run(checkNodesConnected(nodes, - overrideTimeout=10)) + looper.run(checkNodesConnected(nodes)) stopNodes(nodes, looper) logger.debug("reconnects") logger.debug("node order: {}".format(names)) @@ -124,17 +156,24 @@ def testNodeConnection(allPluginsPath, tdirAndLooper, nodeReg): console.reinit(flushy=True, verbosity=console.Wordage.verbose) tdir, looper = tdirAndLooper names = ["Alpha", "Beta"] - logger.debug(names) nrg = {n: nodeReg[n] for n in names} - A, B = [TestNode(name, nrg, basedirpath=tdir, - pluginPaths=allPluginsPath) - for name in names] + initLocalKeys(tdir, nrg) + + logger.debug(names) + nodes = [] + for name in names: + node = TestNode(name, nrg, basedirpath=tdir, + pluginPaths=allPluginsPath) + nodes.append(node) + + for node in nodes: + tellKeysToOthers(node, nodes) + + A, B = nodes looper.add(A) - A.startKeySharing() looper.runFor(4) logger.debug("wait done") looper.add(B) - B.startKeySharing() looper.runFor(4) looper.run(checkNodesConnected([A, B])) looper.stopall() @@ -145,35 +184,7 @@ def testNodeConnection(allPluginsPath, tdirAndLooper, nodeReg): stopNodes([A, B], looper) -@pytest.mark.skip(reason="SOV-538. " - "Fails due to a bug. Its fixed here " - "https://github.com/RaetProtocol/raet/pull/9") -def testNodeConnectionAfterKeysharingRestarted(allPluginsPath, tdirAndLooper): - console = getConsole() - console.reinit(flushy=True, verbosity=console.Wordage.verbose) - tdir, looper = tdirAndLooper - timeout = 60 - names = ["Alpha", "Beta"] - logger.debug(names) - nrg = {n: nodeReg[n] for n in names} - A, B = [TestNode(name, nodeRegistry=nrg, basedirpath=tdir, - pluginPaths=allPluginsPath) - for name in names] - looper.add(A) - A.startKeySharing(timeout=timeout) - looper.runFor(timeout+1) - logger.debug("done waiting for A's timeout") - looper.add(B) - B.startKeySharing(timeout=timeout) - looper.runFor(timeout+1) - logger.debug("done waiting for B's timeout") - A.startKeySharing(timeout=timeout) - B.startKeySharing(timeout=timeout) - looper.run(checkNodesConnected([A, B])) - stopNodes([A, B], looper) - - -def testNodeRemoveUnknownRemote(allPluginsPath, tdirAndLooper, nodeReg): +def testNodeRemoveUnknownRemote(allPluginsPath, tdirAndLooper, nodeReg, conf): """ The nodes Alpha and Beta know about each other so they should connect but they should remove remote for C when it tries to connect to them @@ -181,30 +192,39 @@ def testNodeRemoveUnknownRemote(allPluginsPath, tdirAndLooper, nodeReg): tdir, looper = tdirAndLooper names = ["Alpha", "Beta"] - logger.debug(names) nrg = {n: nodeReg[n] for n in names} - A, B = [TestNode(name, nrg, basedirpath=tdir, - pluginPaths=allPluginsPath) - for name in names] - for node in (A, B): + initLocalKeys(tdir, nrg) + logger.debug(names) + + nodes = [] + for name in names: + node = TestNode(name, nrg, basedirpath=tdir, + pluginPaths=allPluginsPath) + nodes.append(node) + + for node in nodes: + tellKeysToOthers(node, nodes) + + A, B = nodes + for node in nodes: looper.add(node) - node.startKeySharing() - looper.run(checkNodesConnected([A, B])) + looper.run(checkNodesConnected(nodes)) + initLocalKeys(tdir, {"Gamma": nodeReg["Gamma"]}) C = TestNode("Gamma", {**nrg, **{"Gamma": nodeReg["Gamma"]}}, basedirpath=tdir, pluginPaths=allPluginsPath) - looper.add(C) - C.startKeySharing(timeout=20) + for node in nodes: + tellKeysToOthers(node, [C,]) - def chk(): - assert not C.nodestack.isKeySharing + looper.add(C) + looper.runFor(5) - looper.run(eventually(chk, retryWait=2, timeout=21)) stopNodes([C, ], looper) def chk(): assert C.name not in B.nodestack.nameRemotes assert C.name not in A.nodestack.nameRemotes - looper.run(eventually(chk, retryWait=2, timeout=5)) + timeout = waits.expectedNodeInterconnectionTime(len(nodeReg)) + looper.run(eventually(chk, retryWait=2, timeout=timeout)) stopNodes([A, B], looper) diff --git a/plenum/test/test_node_request.py b/plenum/test/test_node_request.py index 48e9f0e771..c487ff99ff 100644 --- a/plenum/test/test_node_request.py +++ b/plenum/test/test_node_request.py @@ -1,18 +1,20 @@ from pprint import pprint import pytest +from plenum import config -from plenum.common.eventually import eventually -from plenum.common.log import getlogger -from plenum.common.looper import Looper +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger +from stp_core.loop.looper import Looper from plenum.common.types import PrePrepare, Prepare, \ Commit, Primary from plenum.common.util import getMaxFailures +from plenum.test import waits from plenum.test.delayers import delayerMsgTuple from plenum.test.greek import genNodeNames from plenum.test.helper import setupNodesAndClient, \ sendRandomRequest, setupClient, \ - assertLength, addNodeBack, checkSufficientRepliesRecvd, \ + assertLength, addNodeBack, waitForSufficientRepliesForRequests, \ getPendingRequestsForReplica, checkRequestReturnedToNode from plenum.test.profiler import profile_this from plenum.test.test_node import TestNode, TestNodeSet, checkPoolReady, \ @@ -25,15 +27,13 @@ def testReqExecWhenReturnedByMaster(tdir_for_func): with TestNodeSet(count=4, tmpdir=tdir_for_func) as nodeSet: with Looper(nodeSet) as looper: - for n in nodeSet: - n.startKeySharing() client1, wallet1 = setupNodesAndClient(looper, nodeSet, tmpdir=tdir_for_func) req = sendRandomRequest(wallet1, client1) - looper.run(eventually(checkSufficientRepliesRecvd, client1.inBox, - req.reqId, 1, - retryWait=1, timeout=15)) + waitForSufficientRepliesForRequests(looper, client1, + requests=[req], fVal=1) + async def chk(): for node in nodeSet: entries = node.spylog.getAll( @@ -45,8 +45,8 @@ async def chk(): assert result else: assert result is None - - looper.run(eventually(chk, timeout=3)) + timeout = waits.expectedOrderingTime(nodeSet.nodes['Alpha'].instances.count) + looper.run(eventually(chk, timeout=timeout)) # noinspection PyIncorrectDocstring @@ -72,8 +72,7 @@ def testRequestReturnToNodeWhenPrePrepareNotReceivedByOneNode(tdir_for_func): delayerMsgTuple(120, PrePrepare, nodeA.name)) # Ensure elections are done - ensureElectionsDone(looper=looper, nodes=nodeSet, retryWait=1, - timeout=30) + ensureElectionsDone(looper=looper, nodes=nodeSet) assert nodeA.hasPrimary instNo = nodeA.primaryReplicaNo @@ -82,6 +81,7 @@ def testRequestReturnToNodeWhenPrePrepareNotReceivedByOneNode(tdir_for_func): # All nodes including B should return their ordered requests for node in nodeSet: + # TODO set timeout from 'waits' after the test enabled looper.run(eventually(checkRequestReturnedToNode, node, wallet1.defaultId, req.reqId, instNo, retryWait=1, timeout=30)) @@ -110,8 +110,8 @@ def testPrePrepareWhenPrimaryStatusIsUnknown(tdir_for_func): # will not know whether it is primary or not # nodeD.nodestack.delay(delayer(20, PRIMARY)) - - nodeD.nodeIbStasher.delay(delayerMsgTuple(20, Primary)) + delayD = 20 + nodeD.nodeIbStasher.delay(delayerMsgTuple(delayD, Primary)) checkPoolReady(looper=looper, nodes=nodeSet) @@ -121,6 +121,7 @@ def testPrePrepareWhenPrimaryStatusIsUnknown(tdir_for_func): # TODO Rethink this instNo = 0 + timeout = waits.expectedClientRequestPropagationTime(len(nodeSet)) for i in range(3): node = nodeSet.getNode(nodeNames[i]) # Nodes A, B and C should have received PROPAGATE request @@ -128,14 +129,15 @@ def testPrePrepareWhenPrimaryStatusIsUnknown(tdir_for_func): looper.run( eventually(checkIfPropagateRecvdFromNode, node, nodeD, request.identifier, - request.reqId, retryWait=1, timeout=10)) + request.reqId, retryWait=1, timeout=timeout)) # Node D should have 1 pending PRE-PREPARE request def assertOnePrePrepare(): assert len(getPendingRequestsForReplica(nodeD.replicas[instNo], PrePrepare)) == 1 - looper.run(eventually(assertOnePrePrepare, retryWait=1, timeout=10)) + timeout = waits.expectedPrePrepareTime(len(nodeSet)) + looper.run(eventually(assertOnePrePrepare, retryWait=1, timeout=timeout)) # Node D should have 2 pending PREPARE requests(from node B and C) @@ -143,7 +145,8 @@ def assertTwoPrepare(): assert len(getPendingRequestsForReplica(nodeD.replicas[instNo], Prepare)) == 2 - looper.run(eventually(assertTwoPrepare, retryWait=1, timeout=10)) + timeout = waits.expectedPrePrepareTime(len(nodeSet)) + looper.run(eventually(assertTwoPrepare, retryWait=1, timeout=timeout)) # Node D should have no pending PRE-PREPARE, PREPARE or COMMIT # requests @@ -151,7 +154,7 @@ def assertTwoPrepare(): looper.run(eventually(lambda: assertLength( getPendingRequestsForReplica(nodeD.replicas[instNo], reqType), - 0), retryWait=1, timeout=20)) + 0), retryWait=1, timeout=delayD)) async def checkIfPropagateRecvdFromNode(recvrNode: TestNode, @@ -163,34 +166,44 @@ async def checkIfPropagateRecvdFromNode(recvrNode: TestNode, # noinspection PyIncorrectDocstring +@pytest.mark.skip(reason="ZStack does not have any mechanism to have stats " + "either remove this once raet is removed " + "or implement a `stats` feature in ZStack") def testMultipleRequests(tdir_for_func): """ Send multiple requests to the client """ with TestNodeSet(count=7, tmpdir=tdir_for_func) as nodeSet: with Looper(nodeSet) as looper: - for n in nodeSet: - n.startKeySharing() - - ss0 = snapshotStats(*nodeSet) + # for n in nodeSet: + # n.startKeySharing() + + # TODO: ZStack does not have any mechanism to have stats, + # either remove this once raet is removed or implement a `stats` + # feature in ZStack + if not config.UseZStack: + ss0 = snapshotStats(*nodeSet) client, wal = setupNodesAndClient(looper, nodeSet, tmpdir=tdir_for_func) - ss1 = snapshotStats(*nodeSet) + if not config.UseZStack: + ss1 = snapshotStats(*nodeSet) def x(): requests = [sendRandomRequest(wal, client) for _ in range(10)] - for request in requests: - looper.run(eventually( - checkSufficientRepliesRecvd, client.inBox, - request.reqId, 3, - retryWait=1, timeout=3 * len(nodeSet))) + waitForSufficientRepliesForRequests(looper, client, + requests=requests, fVal=3) + ss2 = snapshotStats(*nodeSet) diff = statsDiff(ss2, ss1) - pprint(ss2) - print("----------------------------------------------") - pprint(diff) + if not config.UseZStack: + ss2 = snapshotStats(*nodeSet) + diff = statsDiff(ss2, ss1) + + pprint(ss2) + print("----------------------------------------------") + pprint(diff) profile_this(x) @@ -201,10 +214,7 @@ def testClientSendingSameRequestAgainBeforeFirstIsProcessed(looper, nodeSet, size = len(client1.inBox) req = sendRandomRequest(wallet1, client1) client1.submitReqs(req) - f = getMaxFailures(len(nodeSet)) - looper.run(eventually( - checkSufficientRepliesRecvd, client1.inBox, - req.reqId, f, retryWait=1, timeout=3 * len(nodeSet))) + waitForSufficientRepliesForRequests(looper, client1, requests=[req]) # Only REQACK will be sent twice by the node but not REPLY assert len(client1.inBox) == size + 12 diff --git a/plenum/test/test_port_conflicts.py b/plenum/test/test_port_conflicts.py index e6905bab87..afc9309cb1 100644 --- a/plenum/test/test_port_conflicts.py +++ b/plenum/test/test_port_conflicts.py @@ -1,11 +1,12 @@ import pytest +from stp_zmq.zstack import ZStack +from stp_core.network.port_dispenser import genHa +from stp_core.types import HA -from plenum.common.port_dispenser import genHa -from plenum.common.raet import isPortUsed -from plenum.common.types import NodeDetail, HA +from stp_raet.util import isPortUsedByRaetRemote +from plenum.common.types import NodeDetail -# noinspection PyIncorrectDocstring @pytest.fixture('module') def overlapNodePorts(nodeReg): """ @@ -27,7 +28,10 @@ def testOverlappingNodePorts(up): def testUsedPortDetection(tdir, client1): - port = client1.nodestack.ha[1] - assert isPortUsed(tdir, port) - newPort = genHa()[1] - assert not isPortUsed(tdir, newPort) + if isinstance(client1.nodestack, ZStack): + pytest.skip("ZStack does not store port numbers on disk") + else: + port = client1.nodestack.ha[1] + assert isPortUsedByRaetRemote(tdir, port) + newPort = genHa()[1] + assert not isPortUsedByRaetRemote(tdir, newPort) diff --git a/plenum/test/test_round_trip_with_one_faulty_node.py b/plenum/test/test_round_trip_with_one_faulty_node.py index ce272a505f..9880408603 100644 --- a/plenum/test/test_round_trip_with_one_faulty_node.py +++ b/plenum/test/test_round_trip_with_one_faulty_node.py @@ -3,7 +3,7 @@ import pytest from plenum.common.types import Propagate -from plenum.common.log import getlogger +from stp_core.common.log import getlogger nodeCount = 4 faultyNodes = 1 diff --git a/plenum/test/test_stack.py b/plenum/test/test_stack.py index cb1d566bb0..483f09c528 100644 --- a/plenum/test/test_stack.py +++ b/plenum/test/test_stack.py @@ -1,19 +1,30 @@ from typing import Any, Optional, NamedTuple -from plenum.common.eventually import eventuallyAll, eventually -from plenum.common.log import getlogger -from plenum.common.stacked import Stack -from plenum.common.types import HA -from plenum.test.exceptions import NotFullyConnected +from stp_core.network.network_interface import NetworkInterface +from stp_raet.rstack import RStack +from stp_zmq.zstack import ZStack +from stp_core.types import HA + +from plenum.common.config_util import getConfig +from stp_core.loop.eventually import eventuallyAll, eventually from plenum.common.exceptions import NotConnectedToAny +from stp_core.common.log import getlogger +from plenum.test.exceptions import NotFullyConnected from plenum.test.stasher import Stasher -from plenum.test.waits import expectedWait - +from plenum.test import waits +from plenum.common import util logger = getlogger() +config = getConfig() + + +if config.UseZStack: + BaseStackClass = ZStack +else: + BaseStackClass = RStack -class TestStack(Stack): +class TestStack(BaseStackClass): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.stasher = Stasher(self.rxMsgs, @@ -21,8 +32,12 @@ def __init__(self, *args, **kwargs): self.delay = self.stasher.delay - def _serviceStack(self, age): - super()._serviceStack(age) + # def _serviceStack(self, age): + # super()._serviceStack(age) + # self.stasher.process(age) + + async def _serviceStack(self, age): + await super()._serviceStack(age) self.stasher.process(age) def resetDelays(self): @@ -36,7 +51,7 @@ def checkIfConnectedTo(self, count=None): for address in self.nodeReg.values(): for remote in self.nodestack.remotes.values(): if HA(*remote.ha) == address: - if Stack.isRemoteConnected(remote): + if BaseStackClass.isRemoteConnected(remote): connected += 1 break totalNodes = len(self.nodeReg) if count is None else count @@ -47,39 +62,61 @@ def checkIfConnectedTo(self, count=None): else: assert connected == totalNodes - async def ensureConnectedToNodes(self, timeout=None): - wait = timeout or expectedWait(len(self.nodeReg)) + async def ensureConnectedToNodes(self, customTimeout=None): + f = util.getQuorum(len(self.nodeReg)) + timeout = customTimeout or waits.expectedClientConnectionTimeout(f) + logger.debug( "waiting for {} seconds to check client connections to " - "nodes...".format(wait)) - await eventuallyAll(self.checkIfConnectedTo, retryWait=.5, - totalTimeout=wait) + "nodes...".format(timeout)) + await eventuallyAll(self.checkIfConnectedTo, + retryWait=.5, + totalTimeout=timeout) async def ensureDisconnectedToNodes(self, timeout): - await eventually(self.checkIfConnectedTo, 0, retryWait=.5, + # TODO is this used? If so - add timeout for it to plenum.test.waits + await eventually(self.checkIfConnectedTo, 0, + retryWait=.5, timeout=timeout) -def getTestableStack(stack: Stack): +def getTestableStack(stack: NetworkInterface): """ - Dynamically modify a class that extends from `Stack` and introduce + Dynamically modify a class that extends from `RStack` and introduce `TestStack` in the class hierarchy :param stack: :return: """ + # TODO: Can it be achieved without this mro manipulation? mro = stack.__mro__ newMro = [] for c in mro[1:]: - if c == Stack: + if c == BaseStackClass: newMro.append(TestStack) newMro.append(c) return type(stack.__name__, tuple(newMro), dict(stack.__dict__)) - -RemoteState = NamedTuple("RemoteState", [ - ('joined', Optional[bool]), - ('allowed', Optional[bool]), - ('alived', Optional[bool])]) +# TODO: move to stp +if config.UseZStack: + RemoteState = NamedTuple("RemoteState", [ + ('isConnected', Optional[bool]) + ]) + + CONNECTED = RemoteState(isConnected=True) + NOT_CONNECTED = RemoteState(isConnected=False) + # TODO this is to allow imports to pass until we create abstractions for RAET and ZMQ + JOINED_NOT_ALLOWED = RemoteState(isConnected=False) + JOINED = RemoteState(isConnected=False) +else: + RemoteState = NamedTuple("RemoteState", [ + ('joined', Optional[bool]), + ('allowed', Optional[bool]), + ('alived', Optional[bool])]) + + CONNECTED = RemoteState(joined=True, allowed=True, alived=True) + NOT_CONNECTED = RemoteState(joined=None, allowed=None, alived=None) + JOINED_NOT_ALLOWED = RemoteState(joined=True, allowed=None, alived=None) + JOINED = RemoteState(joined=True, allowed='N/A', alived='N/A') def checkState(state: RemoteState, obj: Any, details: str=None): @@ -92,14 +129,8 @@ def checkState(state: RemoteState, obj: Any, details: str=None): set(state._asdict().items()) -def checkRemoteExists(frm: Stack, +def checkRemoteExists(frm: RStack, to: str, # remoteName state: Optional[RemoteState] = None): remote = frm.getRemote(to) checkState(state, remote, "{}'s remote {}".format(frm.name, to)) - - -CONNECTED = RemoteState(joined=True, allowed=True, alived=True) -NOT_CONNECTED = RemoteState(joined=None, allowed=None, alived=None) -JOINED_NOT_ALLOWED = RemoteState(joined=True, allowed=None, alived=None) -JOINED = RemoteState(joined=True, allowed='N/A', alived='N/A') \ No newline at end of file diff --git a/plenum/test/test_testable.py b/plenum/test/test_testable.py index e442804df2..3024c8fd44 100644 --- a/plenum/test/test_testable.py +++ b/plenum/test/test_testable.py @@ -3,7 +3,7 @@ from typing import Any from unittest import TestCase -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.server.node import Node from plenum.test.testable import Spyable diff --git a/plenum/test/test_util.py b/plenum/test/test_util.py index d7f0efa807..b7196b0b9e 100644 --- a/plenum/test/test_util.py +++ b/plenum/test/test_util.py @@ -3,7 +3,8 @@ from libnacl import crypto_hash_sha256 -from plenum.common.util import evenCompare, distributedConnectionMap, randomString +from plenum.common.util import randomString +from stp_core.network.util import evenCompare, distributedConnectionMap from plenum.test.greek import genNodeNames diff --git a/plenum/test/test_verif_merkle_proof.py b/plenum/test/test_verif_merkle_proof.py index 9b84acb8d5..3b89305757 100644 --- a/plenum/test/test_verif_merkle_proof.py +++ b/plenum/test/test_verif_merkle_proof.py @@ -1,6 +1,5 @@ from plenum.client.client import Client -from plenum.common.eventually import eventually -from plenum.test.helper import checkSufficientRepliesRecvd, \ +from plenum.test.helper import waitForSufficientRepliesForRequests, \ sendRandomRequest from plenum.test.test_client import TestClient @@ -12,8 +11,6 @@ def testMerkleProofForFirstLeaf(client1: TestClient, replied1): def testMerkleProofForNonFirstLeaf(looper, nodeSet, wallet1, client1, replied1): req2 = sendRandomRequest(wallet1, client1) - f = nodeSet.f - looper.run(eventually(checkSufficientRepliesRecvd, client1.inBox, req2.reqId - , f, retryWait=1, timeout=15)) + waitForSufficientRepliesForRequests(looper, client1, requests=[req2]) replies = client1.getRepliesFromAllNodes(*req2.key).values() assert Client.verifyMerkleProof(*replies) diff --git a/plenum/test/testable.py b/plenum/test/testable.py index d560a6a785..c90bab79e5 100644 --- a/plenum/test/testable.py +++ b/plenum/test/testable.py @@ -7,7 +7,7 @@ from typing import Dict from plenum.common.util import objSearchReplace -from plenum.common.log import getlogger +from stp_core.common.log import getlogger logger = getlogger() @@ -35,7 +35,9 @@ def getLast(self, method: SpyableMethod, required: bool = False) -> \ "spylog entry for method {} not found".format(method)) return entry - def getAll(self, method: str) -> List[Entry]: + def getAll(self, method: SpyableMethod) -> List[Entry]: + if callable(method): + method = method.__name__ return list(reversed([x for x in self if x.method == method])) def getLastParam(self, method: str, paramIndex: int = 0) -> Any: @@ -45,7 +47,9 @@ def getLastParams(self, method: str, required: bool = True) -> Tuple: last = self.getLast(method, required) return last.params if last is not None else None - def count(self, method: str) -> int: + def count(self, method: SpyableMethod) -> int: + if callable(method): + method = method.__name__ return sum(1 for x in self if x.method == method) diff --git a/plenum/test/testing_utils.py b/plenum/test/testing_utils.py index 61c40cd2a3..d56c9c40c7 100644 --- a/plenum/test/testing_utils.py +++ b/plenum/test/testing_utils.py @@ -3,7 +3,7 @@ from ioflo.base.consoling import getConsole from plenum.common.error import error -from plenum.common.log import getlogger, addTraceToLogging, TRACE_LOG_LEVEL +from stp_core.common.log import getlogger, TRACE_LOG_LEVEL logger = getlogger() @@ -39,8 +39,6 @@ def checkDblImp(): def setupTestLogging(): - addTraceToLogging() - logging.basicConfig( level=TRACE_LOG_LEVEL, format='{relativeCreated:,.0f} {levelname:7s} {message:s}', diff --git a/plenum/test/view_change/test_discard_inst_chng_msg_from_past_view.py b/plenum/test/view_change/test_discard_inst_chng_msg_from_past_view.py index 9eda18f678..8221a1fdb8 100644 --- a/plenum/test/view_change/test_discard_inst_chng_msg_from_past_view.py +++ b/plenum/test/view_change/test_discard_inst_chng_msg_from_past_view.py @@ -1,7 +1,8 @@ -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.types import InstanceChange from plenum.server.node import Node -from plenum.test.helper import checkDiscardMsg, checkViewNoForNodes +from plenum.test import waits +from plenum.test.helper import checkDiscardMsg, waitForViewChange # noinspection PyIncorrectDocstring @@ -18,11 +19,12 @@ def testDiscardInstChngMsgFrmPastView(nodeSet, looper, ensureView): nodeSet.Alpha.send(icMsg) # ensure every node but Alpha discards the invalid instance change request + timeout = waits.expectedViewChangeTime(len(nodeSet)) looper.run(eventually(checkDiscardMsg, nodeSet, icMsg, - 'less than its view no', nodeSet.Alpha, timeout=5)) + 'less than its view no', nodeSet.Alpha, timeout=timeout)) # Check that that message is discarded. - looper.run(eventually(checkViewNoForNodes, nodeSet, timeout=3)) + waitForViewChange(looper, nodeSet) # noinspection PyIncorrectDocstring @@ -47,7 +49,7 @@ def testDoNotSendInstChngMsgIfMasterDoesntSeePerformanceProblem( nodeSet.Alpha.send(icMsg) # Check that that message is discarded. - looper.run(eventually(checkViewNoForNodes, nodeSet, timeout=3)) + waitForViewChange(looper, nodeSet) # No node should have sent a view change and thus must not have called # `sendInstanceChange` for n in nodeSet: diff --git a/plenum/test/view_change/test_elections_after_view_change.py b/plenum/test/view_change/test_elections_after_view_change.py index a65ae60a61..95f997e9d7 100644 --- a/plenum/test/view_change/test_elections_after_view_change.py +++ b/plenum/test/view_change/test_elections_after_view_change.py @@ -1,7 +1,8 @@ from functools import partial -from plenum.common.eventually import eventually -from plenum.common.looper import Looper +from stp_core.loop.eventually import eventually +from stp_core.loop.looper import Looper +from plenum.test import waits from plenum.test.delayers import ppDelay from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies from plenum.test.test_node import TestNodeSet, getNonPrimaryReplicas, \ @@ -20,17 +21,20 @@ def testElectionsAfterViewChange(delayedPerf, looper: Looper, # Delay processing of PRE-PREPARE from all non primary replicas of master # so master's throughput falls # and view changes + delay = 10 nonPrimReps = getNonPrimaryReplicas(nodeSet, 0) for r in nonPrimReps: - r.node.nodeIbStasher.delay(ppDelay(10, 0)) + r.node.nodeIbStasher.delay(ppDelay(delay, 0)) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4) # Ensure view change happened for both node and its primary elector + timeout = waits.expectedViewChangeTime(len(nodeSet)) for node in nodeSet: looper.run(eventually(partial(checkViewChangeInitiatedForNode, node, 1), - retryWait=1, timeout=20)) + retryWait=1, timeout=timeout)) # Ensure elections are done again and pool is setup again with appropriate # protocol instances and each protocol instance is setup properly too - checkProtocolInstanceSetup(looper, nodeSet, retryWait=1, timeout=30) + timeout = waits.expectedElectionTimeout(len(nodeSet)) + delay + checkProtocolInstanceSetup(looper, nodeSet, retryWait=1, customTimeout=timeout) diff --git a/plenum/test/view_change/test_instance_change_msg_checking.py b/plenum/test/view_change/test_instance_change_msg_checking.py index 558a4bbef0..384258daf0 100644 --- a/plenum/test/view_change/test_instance_change_msg_checking.py +++ b/plenum/test/view_change/test_instance_change_msg_checking.py @@ -1,5 +1,6 @@ -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.common.types import InstanceChange +from plenum.test import waits from plenum.test.test_node import TestNode DISCARD_REASON = 'viewNo has incorrect type' @@ -21,4 +22,5 @@ def chk(): assert isinstance(params['msg'], InstanceChange) assert DISCARD_REASON in params['reason'] - looper.run(eventually(chk, timeout=5)) + timeout = waits.expectedNodeToNodeMessageDeliveryTime() + looper.run(eventually(chk, timeout=timeout)) diff --git a/plenum/test/view_change/test_queueing_req_from_future_view.py b/plenum/test/view_change/test_queueing_req_from_future_view.py index d90786cd7f..896ba94ca5 100644 --- a/plenum/test/view_change/test_queueing_req_from_future_view.py +++ b/plenum/test/view_change/test_queueing_req_from_future_view.py @@ -2,9 +2,10 @@ import pytest -from plenum.common.eventually import eventually -from plenum.common.log import getlogger +from stp_core.loop.eventually import eventually +from stp_core.common.log import getlogger from plenum.common.util import getMaxFailures +from plenum.test import waits from plenum.test.delayers import ppDelay, icDelay from plenum.test.helper import sendRandomRequest, \ sendReqsToNodesAndVerifySuffReplies @@ -27,33 +28,37 @@ def testQueueingReqFromFutureView(delayedPerf, looper, nodeSet, up, f = getMaxFailures(nodeCount) # Delay processing of instance change on a node + delayIcA = 60 nodeA = nodeSet.Alpha - nodeA.nodeIbStasher.delay(icDelay(60)) + nodeA.nodeIbStasher.delay(icDelay(delayIcA)) nonPrimReps = getNonPrimaryReplicas(nodeSet, 0) # Delay processing of PRE-PREPARE from all non primary replicas of master # so master's throughput falls and view changes - ppDelayer = ppDelay(5, 0) + delay = 5 + ppDelayer = ppDelay(delay, 0) for r in nonPrimReps: r.node.nodeIbStasher.delay(ppDelayer) + timeout = waits.expectedTransactionExecutionTime(len(nodeSet)) + delay sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4, - timeoutPerReq=5 * nodeCount) + customTimeoutPerReq=timeout) # Every node except Node A should have a view change + timeout = waits.expectedViewChangeTime(len(nodeSet)) for node in nodeSet: - if node.name != nodeA.name: + if node.name == nodeA.name: + # Node A's view should not have changed yet + with pytest.raises(AssertionError): + looper.run(eventually(partial( + checkViewChangeInitiatedForNode, node, 1), + retryWait=1, + timeout=timeout)) + else: looper.run(eventually( partial(checkViewChangeInitiatedForNode, node, 1), retryWait=1, - timeout=20)) - - # Node A's view should not have changed yet - with pytest.raises(AssertionError): - looper.run(eventually(partial( - checkViewChangeInitiatedForNode, nodeA, 1), - retryWait=1, - timeout=20)) + timeout=timeout)) # NodeA should not have any pending 3 phase request for a later view for r in nodeA.replicas: # type: TestReplica @@ -69,7 +74,7 @@ def testQueueingReqFromFutureView(delayedPerf, looper, nodeSet, up, def checkPending3PhaseReqs(): # Get all replicas that have their primary status decided reps = [rep for rep in nodeA.replicas if rep.isPrimary is not None] - # Atleast one replica should have its primary status decided + # At least one replica should have its primary status decided assert len(reps) > 0 for r in reps: # type: TestReplica logger.debug("primary status for replica {} is {}" @@ -77,4 +82,5 @@ def checkPending3PhaseReqs(): assert len(r.threePhaseMsgsForLaterView) > 0 # NodeA should now have pending 3 phase request for a later view - looper.run(eventually(checkPending3PhaseReqs, retryWait=1, timeout=30)) + timeout = waits.expectedViewChangeTime(len(nodeSet)) + delayIcA + looper.run(eventually(checkPending3PhaseReqs, retryWait=1, timeout=timeout)) diff --git a/plenum/test/view_change/test_view_change.py b/plenum/test/view_change/test_view_change.py index 09d61f8ce9..cde42349f1 100644 --- a/plenum/test/view_change/test_view_change.py +++ b/plenum/test/view_change/test_view_change.py @@ -3,11 +3,12 @@ import pytest -from plenum.common.eventually import eventually +from stp_core.loop.eventually import eventually from plenum.server.node import Node from plenum.test.delayers import delayNonPrimaries -from plenum.test.helper import checkViewNoForNodes, \ - sendReqsToNodesAndVerifySuffReplies, getPrimaryReplica +from plenum.test.helper import waitForViewChange, \ + sendReqsToNodesAndVerifySuffReplies +from plenum.test.test_node import getPrimaryReplica nodeCount = 7 @@ -21,8 +22,7 @@ def viewChangeDone(nodeSet, looper, up, wallet1, client1, viewNo): sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4) - looper.run(eventually(partial(checkViewNoForNodes, nodeSet, viewNo+1), - retryWait=1, timeout=20)) + waitForViewChange(looper, nodeSet, expectedViewNo=viewNo+1) # noinspection PyIncorrectDocstring @@ -62,8 +62,7 @@ def testViewChangeCase1(nodeSet, looper, up, wallet1, client1, viewNo): sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4) # Check that view change happened for all nodes - looper.run(eventually(partial(checkViewNoForNodes, nodeSet, viewNo + 1), - retryWait=1, timeout=20)) + waitForViewChange(looper, nodeSet, expectedViewNo=viewNo+1) # All nodes except the reluctant node should have sent a view change and # thus must have called `sendInstanceChange` diff --git a/plenum/test/view_change/test_view_changes_if_backup_primary_disconnected.py b/plenum/test/view_change/test_view_changes_if_backup_primary_disconnected.py new file mode 100644 index 0000000000..34d9781f0d --- /dev/null +++ b/plenum/test/view_change/test_view_changes_if_backup_primary_disconnected.py @@ -0,0 +1,32 @@ +from stp_core.loop.eventually import eventually +from plenum.test.conftest import txnPoolNodeSet, txnPoolNodesLooper +from plenum.test.helper import stopNodes, viewNoForNodes, \ + nodeByName, primaryNodeNameForInstance + + +def testViewChangesIfBackupPrimaryDisconnected(txnPoolNodeSet, + txnPoolNodesLooper): + + # Setup + nodes = set(txnPoolNodeSet) + looper = txnPoolNodesLooper + + viewNoBefore = viewNoForNodes(nodes) + primaryNodeForBackupInstance1Before = nodeByName( + nodes, primaryNodeNameForInstance(nodes, 1)) + + # Exercise + stopNodes([primaryNodeForBackupInstance1Before], looper) + + # Verify + remainingNodes = nodes - {primaryNodeForBackupInstance1Before} + + def assertNewPrimariesElected(): + viewNoAfter = viewNoForNodes(remainingNodes) + primaryNodeForBackupInstance1After = nodeByName( + nodes, primaryNodeNameForInstance(remainingNodes, 1)) + assert viewNoBefore + 1 == viewNoAfter + assert primaryNodeForBackupInstance1Before != \ + primaryNodeForBackupInstance1After + + looper.run(eventually(assertNewPrimariesElected, retryWait=1, timeout=30)) \ No newline at end of file diff --git a/plenum/test/view_change/test_view_changes_if_master_primary_disconnected.py b/plenum/test/view_change/test_view_changes_if_master_primary_disconnected.py new file mode 100644 index 0000000000..190b0e7b66 --- /dev/null +++ b/plenum/test/view_change/test_view_changes_if_master_primary_disconnected.py @@ -0,0 +1,32 @@ +from stp_core.loop.eventually import eventually +from plenum.test.conftest import txnPoolNodeSet, txnPoolNodesLooper +from plenum.test.helper import stopNodes, viewNoForNodes, nodeByName, \ + primaryNodeNameForInstance + + +def testViewChangesIfMasterPrimaryDisconnected(txnPoolNodeSet, + txnPoolNodesLooper): + + # Setup + nodes = set(txnPoolNodeSet) + looper = txnPoolNodesLooper + + viewNoBefore = viewNoForNodes(nodes) + primaryNodeForMasterInstanceBefore = nodeByName( + nodes, primaryNodeNameForInstance(nodes, 0)) + + # Exercise + stopNodes([primaryNodeForMasterInstanceBefore], looper) + + # Verify + remainingNodes = nodes - {primaryNodeForMasterInstanceBefore} + + def assertNewPrimariesElected(): + viewNoAfter = viewNoForNodes(remainingNodes) + primaryNodeForMasterInstanceAfter = nodeByName( + nodes, primaryNodeNameForInstance(remainingNodes, 0)) + assert viewNoBefore + 1 == viewNoAfter + assert primaryNodeForMasterInstanceBefore != \ + primaryNodeForMasterInstanceAfter + + looper.run(eventually(assertNewPrimariesElected, retryWait=1, timeout=30)) diff --git a/plenum/test/view_change/test_view_not_changed.py b/plenum/test/view_change/test_view_not_changed.py index 2f1fa20ee1..69dd5b33dd 100644 --- a/plenum/test/view_change/test_view_not_changed.py +++ b/plenum/test/view_change/test_view_not_changed.py @@ -1,6 +1,6 @@ from typing import Iterable -from plenum.common.looper import Looper +from stp_core.loop.looper import Looper from plenum.common.util import getMaxFailures from plenum.test.helper import checkViewNoForNodes, \ @@ -36,4 +36,4 @@ def testViewNotChanged(looper: Looper, nodeSet: TestNodeSet, up, wallet1, sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5) - checkViewNoForNodes(nodeSet, 0) + checkViewNoForNodes(nodeSet, expectedViewNo=0) diff --git a/plenum/test/waits.py b/plenum/test/waits.py index 6258249323..680340c1a4 100644 --- a/plenum/test/waits.py +++ b/plenum/test/waits.py @@ -1,23 +1,124 @@ -import math - -from plenum.common.log import getlogger +from stp_core.common.log import getlogger from plenum.common.config_util import getConfig +from plenum.common.util import totalConnections +from plenum.config import CLIENT_REQACK_TIMEOUT, CLIENT_REPLY_TIMEOUT logger = getlogger() +config = getConfig() + + +######################### +# Pool internal timeouts +######################### + + +def expectedNodeInterconnectionTime(nodeCount): + count = totalConnections(nodeCount) + return count * config.ExpectedConnectTime + + +def expectedCatchupTime(nodeCount, customConsistencyProofsTimeout=None): + timeout = customConsistencyProofsTimeout or config.ConsistencyProofsTimeout + return timeout * nodeCount + + +def expectedPoolGetReadyTimeout(nodeCount): + # looks the same with catchup process + return expectedCatchupTime(nodeCount) + + +def expectedPoolLedgerCheck(nodeCount): + """ + Expected time required for checking that 'pool ledger' on nodes and client + is the same + """ + return 5 * nodeCount + + +def expectedNodeStartUpTimeout(): + return 5 + + +def expectedPoolStartUpTimeout(nodeCount): + return nodeCount * expectedNodeStartUpTimeout() + + +def expectedRequestStashingTime(): + return 20 + + +######################### +# Pool election timeouts +######################### + +def expectedNominationTimeout(nodeCount): + return 3 * nodeCount + + +def expectedElectionTimeout(nodeCount): + return expectedNominationTimeout(nodeCount) + 4 * nodeCount + + +def expectedNextPerfCheck(nodes): + return max([n.perfCheckFreq for n in nodes]) + 1 + + +def expectedViewChangeTime(nodeCount): + return int(0.75 * nodeCount) + + +######################### +# Processing timeouts +######################### + +def expectedNodeToNodeMessageDeliveryTime(): + return 5 + + +def expectedPropagateTime(nodeCount): + count = totalConnections(nodeCount) + return expectedNodeToNodeMessageDeliveryTime() * count + + +def expectedPrePrepareTime(nodeCount): + count = totalConnections(nodeCount) + return expectedNodeToNodeMessageDeliveryTime() * count + + +def expectedOrderingTime(numInstances): + return int(2.14 * numInstances) + + +######################### +# Client timeouts +######################### + +def expectedClientConnectionTimeout(fVal): + # TODO calc fVal here, get nodeCount + return 3 * fVal + + +def expectedClientRequestPropagationTime(nodeCount): + return int(2.5 * nodeCount) + + +def expectedTransactionExecutionTime(nodeCount): + return int(CLIENT_REPLY_TIMEOUT * nodeCount) + + +def expectedReqAckQuorumTime(): + return CLIENT_REQACK_TIMEOUT -def expectedWaitDirect(count): - conf = getConfig() - return count * conf.ExpectedConnectTime + 1 +def expectedReqNAckQuorumTime(): + return CLIENT_REQACK_TIMEOUT -def expectedWait(nodeCount): - c = totalConnections(nodeCount) - w = expectedWaitDirect(c) - logger.debug("wait time for {} nodes and {} connections is {}".format( - nodeCount, c, w)) - return w +######################### +# Agent timeouts +######################### +def expectedAgentCommunicationTime(): + # TODO: implement if it is needed + raise NotImplementedError() -def totalConnections(nodeCount: int) -> int: - return math.ceil((nodeCount * (nodeCount - 1)) / 2) \ No newline at end of file diff --git a/plenum/test/wallet/test_wallet.py b/plenum/test/wallet/test_wallet.py index 0d0b275f38..03cdc9aefc 100644 --- a/plenum/test/wallet/test_wallet.py +++ b/plenum/test/wallet/test_wallet.py @@ -1,6 +1,7 @@ import pytest from plenum.client.wallet import Wallet -from plenum.common.util import getTimeBasedId, randomSeed +from plenum.common.util import getTimeBasedId +from stp_core.crypto.util import randomSeed def add_and_sign(signersNum = 10): diff --git a/plenum/common/logging/__init__.py b/plenum/test/zstack_tests/__init__.py similarity index 100% rename from plenum/common/logging/__init__.py rename to plenum/test/zstack_tests/__init__.py diff --git a/plenum/test/zstack_tests/test_zstack_reconnection.py b/plenum/test/zstack_tests/test_zstack_reconnection.py new file mode 100644 index 0000000000..a269f6101b --- /dev/null +++ b/plenum/test/zstack_tests/test_zstack_reconnection.py @@ -0,0 +1,62 @@ +import pytest + +from stp_core.loop.eventually import eventually +from plenum.test.pool_transactions.conftest import looper, clientAndWallet1, \ + client1, wallet1, client1Connected +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, stopNodes +from plenum.test.test_node import TestNode, ensureElectionsDone + + +@pytest.fixture(scope="module") +def tconf(conf, tdirWithPoolTxns): + conf.UseZStack = True + return conf + + +def checkNodesSendingCommits(nodeSet): + for node in nodeSet: + for r in node.replicas: + i = r.instId + commitSenders = [_.voters for _ in r.commits.values()] + for otherNode in nodeSet: + if node == otherNode: + continue + otherReplica = otherNode.replicas[i] + for senders in commitSenders: + assert otherReplica.name in senders + + +def testZStackNodeReconnection(tconf, looper, txnPoolNodeSet, client1, wallet1, + tdirWithPoolTxns, client1Connected): + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1) + + npr = [n for n in txnPoolNodeSet if not n.hasPrimary] + nodeToCrash = npr[0] + idxToCrash = txnPoolNodeSet.index(nodeToCrash) + otherNodes = [_ for _ in txnPoolNodeSet if _ != nodeToCrash] + + def checkFlakyConnected(conn=True): + for node in otherNodes: + if conn: + assert nodeToCrash.nodestack.name in node.nodestack.connecteds + else: + assert nodeToCrash.nodestack.name not in node.nodestack.connecteds + + checkFlakyConnected(True) + nodeToCrash.stop() + looper.removeProdable(nodeToCrash) + looper.runFor(1) + stopNodes([nodeToCrash], looper) + # TODO Select or create the timeout from 'waits'. Don't use constant. + looper.run(eventually(checkFlakyConnected, False, retryWait=1, timeout=35)) + looper.runFor(1) + node = TestNode(nodeToCrash.name, basedirpath=tdirWithPoolTxns, config=tconf, + ha=nodeToCrash.nodestack.ha, cliha=nodeToCrash.clientstack.ha) + looper.add(node) + txnPoolNodeSet[idxToCrash] = node + # TODO Select or create the timeout from 'waits'. Don't use constant. + looper.run(eventually(checkFlakyConnected, True, retryWait=2, timeout=50)) + # TODO Select or create the timeout from 'waits'. Don't use constant. + ensureElectionsDone(looper, txnPoolNodeSet, retryWait=2, timeout=50) + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1) + checkNodesSendingCommits(txnPoolNodeSet) diff --git a/runner.py b/runner.py index 4f65534bec..ca61f3fb12 100644 --- a/runner.py +++ b/runner.py @@ -3,8 +3,13 @@ import sys import argparse +import time -def run(pytest, output_file): + +def run(pytest, output_file, repeatUntilFailure): + if repeatUntilFailure: + log("'repeatUntilFailure' is set") + log("Is going to repeat the test suite until failure") log("Preparing test suite with {}".format(pytest)) testListFile = "test_list.txt" os.system('{} --collect-only > {}'.format(pytest, testListFile)) @@ -24,81 +29,97 @@ def run(pytest, output_file): totalFailed = 0 totalSkipped = 0 totalErros = 0 + runsCount = 0 allFailedTests = [] allErrorTests = [] failureData = [] testRep = 'currentTestReport.txt' - passPat = re.compile("==.+ ([0-9]+) passed,?.+===\n") - skipPat = re.compile("==.+ ([0-9]+) skipped,?.+===\n") - failPat = re.compile("==.+ ([0-9]+) failed,?.+===\n") - errPat = re.compile("==.+ ([0-9]+) error,?.+===\n") + passPat = re.compile(".* ([0-9]+) passed.*$") + skipPat = re.compile(".* ([0-9]+) skipped.*$") + failPat = re.compile(".* ([0-9]+) failed.*$") + errPat = re.compile(".* ([0-9]+) error.*$") failedTestPat = re.compile('____ (test.+) ____') errorTestPat = re.compile('____ (ERROR.+) ____') + while True: + for i, test in enumerate(testList): + # testRep = '{}.rep'.format(test.split("/")[-1]) + log("Going to run {}".format(test)) + testStartTime = time.time() + r = os.system('{} -k "{}" > {}'.format(pytest, test, testRep)) + testExecutionTime = time.time() - testStartTime + reportLines = open(testRep).readlines() + output = ''.join(reportLines) + pas = passPat.search(output) + passed = int(pas.groups()[0]) if pas else 0 + skp = skipPat.search(output) + skipped = int(skp.groups()[0]) if skp else 0 + if r: + fai = failPat.search(output) + err = errPat.search(output) + if not (fai or err): + log("Non zero return value from {} run but no failures " + "or errors reported".format(test)) + log(output) + return -1 + failed = int(fai.groups()[0]) if fai else 0 + errors = int(err.groups()[0]) if err else 0 + failedNames = [] + errorNames = [] + startedF = None + startedE = None + for line in reportLines: + if '= FAILURES =' in line: + startedF = True + startedE = None + continue + if '= ERRORS =' in line: + startedF = None + startedE = True + continue + if startedF: + failureData.append(line) + m = failedTestPat.search(line) + if m: + failedNames.append(m.groups()[0]) + if startedE: + failureData.append(line) + m = errorTestPat.search(line) + if m: + errorNames.append(m.groups()[0]) + else: + failed = 0 + errors = 0 + log('In {}, {} passed, {} failed, {} errors, {} skipped, {:.1f}s time ' + '({}/{} progress)'. + format(test, passed, errors, failed, skipped, + testExecutionTime, i+1, len(testList))) + if failed: + logError("Failed tests: {}".format(', '.join(failedNames))) + for nm in failedNames: + allFailedTests.append((test, nm)) + if errors: + logError("Error in tests: {}".format(', '.join(errorNames))) + for nm in errorNames: + allErrorTests.append((test, nm)) + retVal += r + totalPassed += passed + totalFailed += failed + totalErros += errors + totalSkipped += skipped + runsCount += 1 + + if repeatUntilFailure: + if totalFailed or totalErros: + break # repeatUntilFailure set and failures happened + else: + logSuccess('Run #{} was successful'.format(runsCount)) + log('\n\n') - for test in testList: - # testRep = '{}.rep'.format(test.split("/")[-1]) - log("Going to run {}".format(test)) - r = os.system('{} -k "{}" > {}'.format(pytest, test, testRep)) - reportLines = open(testRep).readlines() - output = ''.join(reportLines) - pas = passPat.search(output) - passed = int(pas.groups()[0]) if pas else 0 - skp = skipPat.search(output) - skipped = int(skp.groups()[0]) if skp else 0 - if r: - fai = failPat.search(output) - err = errPat.search(output) - if not (fai or err): - log("Non zero return value from {} run but no failures " - "or errors reported".format(test)) - log(output) - return -1 - failed = int(fai.groups()[0]) if fai else 0 - errors = int(err.groups()[0]) if err else 0 - failedNames = [] - errorNames = [] - startedF = None - startedE = None - for line in reportLines: - if '= FAILURES =' in line: - startedF = True - startedE = None - continue - if '= ERRORS =' in line: - startedF = None - startedE = True - continue - if startedF: - failureData.append(line) - m = failedTestPat.search(line) - if m: - failedNames.append(m.groups()[0]) - if startedE: - failureData.append(line) - m = errorTestPat.search(line) - if m: - errorNames.append(m.groups()[0]) else: - failed = 0 - errors = 0 - log('In {}, {} passed, {} failed, {} errors, {} skipped'. - format(test, passed, errors, failed, skipped)) - if failed: - log("Failed tests: {}".format(', '.join(failedNames))) - for nm in failedNames: - allFailedTests.append((test, nm)) - if errors: - log("Error in tests: {}".format(', '.join(errorNames))) - for nm in errorNames: - allErrorTests.append((test, nm)) - retVal += r - totalPassed += passed - totalFailed += failed - totalErros += errors - totalSkipped += skipped - - summaryMsg = 'Total {} passed, {} failed, {} errors, {} skipped'.\ - format(totalPassed, totalFailed, totalErros, totalSkipped) + break # just one run + + summaryMsg = 'Total {} runs {} passed, {} failed, {} errors, {} skipped'.\ + format(runsCount, totalPassed, totalFailed, totalErros, totalSkipped) log(summaryMsg) if totalFailed: @@ -128,11 +149,29 @@ def log(msg): return print(msg, flush=True) +def logError(msg): + return print('\x1b[0;30;41m' + msg + '\x1b[0m', flush=True) + + +def logSuccess(msg): + return print('\x1b[6;30;42m' + msg + '\x1b[0m') + + if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('--pytest', type=str, help='pytest instance', default='pytest') - parser.add_argument('--output', type=str, help='result file', default='../Test-Report.txt') - parser.add_argument('--nooutput', help='no result file', action="store_true") + parser.add_argument('--pytest', type=str, + help='pytest instance', default='python -m pytest') + parser.add_argument('--output', type=str, + help='result file', default='../Test-Report.txt') + parser.add_argument('--nooutput', + help='no result file', action="store_true") + parser.add_argument('--repeat', dest='repeatUntilFailure', + action="store_true", + help='repeat the test suite until failure') args = parser.parse_args() - r = run(pytest=args.pytest, output_file=args.output if not args.nooutput else None) + r = run( + pytest=args.pytest, + output_file=args.output if not args.nooutput else None, + repeatUntilFailure=args.repeatUntilFailure, + ) sys.exit(0 if r == 0 else 1) diff --git a/scripts/gen_node b/scripts/gen_node index 2e147dc0d0..38b4ff378d 100755 --- a/scripts/gen_node +++ b/scripts/gen_node @@ -4,10 +4,11 @@ import argparse from sys import argv -from plenum.common.script_helper import getStewardKeyFromName, initKeep, \ +from plenum.common.script_helper import getStewardKeyFromName, \ generateNodeGenesisTxn, buildKeepDirIfNotExists from plenum.common.constants import CLIENT_STACK_SUFFIX -from plenum.common.util import isHexKey +from plenum.common.keygen_utils import initLocalKeys +from stp_core.crypto.util import isHexKey from plenum.common.config_util import getConfig config = getConfig() @@ -41,18 +42,22 @@ if __name__ == "__main__": # Initialize node stack stewardVerkey = None - if isHexKey(args.steward): - stewardVerkey = args.steward - else: - stewardVerkey = getStewardKeyFromName(config.baseDir, args.steward) + try: + if isHexKey(args.steward): + stewardVerkey = args.steward + else: + stewardVerkey = getStewardKeyFromName(config.baseDir, args.steward) + except Exception as ex: + print(ex) + exit(1) pubkey, verkey = None, None # Initialize node stack print("For node stack, stack name is", args.name) try: - pubkey, verkey = initKeep(config.baseDir, args.name, args.seed, - args.force) + pubkey, verkey = initLocalKeys(args.name, config.baseDir, args.seed, + args.force, config=config) except Exception as ex: print(ex) exit() @@ -60,8 +65,8 @@ if __name__ == "__main__": # Initialize client stack print("For client stack, stack name is", args.name + CLIENT_STACK_SUFFIX) try: - initKeep(config.baseDir, args.name + CLIENT_STACK_SUFFIX, args.seed, - args.force) + initLocalKeys(args.name + CLIENT_STACK_SUFFIX, config.baseDir, args.seed, + args.force, config=config) except Exception as ex: print(ex) exit() diff --git a/scripts/gen_steward_key b/scripts/gen_steward_key index 01d67fda6a..5b713a368a 100755 --- a/scripts/gen_steward_key +++ b/scripts/gen_steward_key @@ -4,8 +4,9 @@ import argparse from sys import argv -from plenum.common.script_helper import buildKeepDirIfNotExists, initKeep, \ +from plenum.common.script_helper import buildKeepDirIfNotExists, \ generateStewardGenesisTxn +from plenum.common.keygen_utils import initLocalKeys from plenum.common.config_util import getConfig config = getConfig() @@ -28,7 +29,7 @@ if __name__ == "__main__": try: # Initialize node stack - pubkey, verkey = initKeep(config.baseDir, name, args.seed, args.force) + pubkey, verkey = initLocalKeys(name, config.baseDir, args.seed, args.force) # Print genesis transaction commands generateStewardGenesisTxn(config.baseDir, args.print_gen_txn, args.name, verkey) diff --git a/scripts/generate_plenum_pool_transactions b/scripts/generate_plenum_pool_transactions index be53d457c7..3898b98744 100755 --- a/scripts/generate_plenum_pool_transactions +++ b/scripts/generate_plenum_pool_transactions @@ -6,9 +6,11 @@ from plenum.common.config_util import getConfig config = getConfig() portsStart = 9600 +nodeParamsFileName = 'plenum.env' + if __name__ == "__main__": - TestNetworkSetup.bootstrapTestNodes(config, portsStart, + TestNetworkSetup.bootstrapTestNodes(config, portsStart, nodeParamsFileName, getTxnOrderedFields()) diff --git a/scripts/get_keys b/scripts/get_keys index 42f3a0b3a7..bed519dc78 100755 --- a/scripts/get_keys +++ b/scripts/get_keys @@ -8,11 +8,11 @@ import os import sys from plenum.common.sys_util import getLoggedInUser -from raet.nacling import Signer, Privateer +from stp_core.crypto.nacl_wrappers import Signer, Privateer # TODO: Abstract this functionality in a method inside plenum/common/raet.py so -# it can be used in sovrin too. Look at how init_plenum_raet_keep is implemented +# it can be used in sovrin too. Look at how init_plenum_keys is implemented def breakIt(message): print(message) diff --git a/scripts/init_plenum_raet_keep b/scripts/init_plenum_keys similarity index 86% rename from scripts/init_plenum_raet_keep rename to scripts/init_plenum_keys index fa21b0d455..c664dce148 100644 --- a/scripts/init_plenum_raet_keep +++ b/scripts/init_plenum_keys @@ -2,8 +2,9 @@ import argparse -from plenum.common.script_helper import initKeep, getOldAddNewGenStewardCommand, getOldAddNewGenNodeCommand, \ +from plenum.common.script_helper import getOldAddNewGenStewardCommand, getOldAddNewGenNodeCommand, \ buildKeepDirIfNotExists +from plenum.common.keygen_utils import initLocalKeys from plenum.common.constants import CLIENT_STACK_SUFFIX from plenum.common.util import randomString from plenum.common.config_util import getConfig @@ -52,7 +53,8 @@ if __name__ == "__main__": args.seed = randomString(32) print("Generating a random seed for the keypair {}".format(args.seed)) try: - verkey = initKeep(config.baseDir, args.name, args.seed, args.force) + verkey = initLocalKeys(args.name, config.baseDir, args.seed, args.force, + config=config) except Exception as ex: print(ex) exit() @@ -60,8 +62,8 @@ if __name__ == "__main__": # Initialize client stack print("For client stack, stack name is", args.name + CLIENT_STACK_SUFFIX) try: - initKeep(config.baseDir, args.name + CLIENT_STACK_SUFFIX, args.seed, - args.force) + initLocalKeys(args.name + CLIENT_STACK_SUFFIX, config.baseDir, args.seed, + args.force, config=config) except Exception as ex: print(ex) exit() diff --git a/scripts/plenum b/scripts/plenum index a8f5957f66..8d28c67a91 100755 --- a/scripts/plenum +++ b/scripts/plenum @@ -17,7 +17,7 @@ import logging # NOTE: Loading of plugin should happen as early as possible # So put all other required imports after loadPlugins function call below -from plenum.common.raet import initLocalKeep +from plenum.common.keygen_utils import initLocalKeys from plenum.common.util import randomString from plenum.common.config_util import getConfig from plenum.common.plugin_helper import loadPlugins @@ -34,7 +34,7 @@ loadPlugins(basedirpath) # NOTE: Put all regular imports below (not related to loadplugin) import sys from plenum.cli.cli import Cli -from plenum.common.looper import Looper +from stp_core.loop.looper import Looper def run_cli(): @@ -42,7 +42,7 @@ def run_cli(): cliNodeReg = config.cliNodeReg commands = sys.argv[1:] for name in {**nodeReg, **cliNodeReg}: - initLocalKeep(name, basedirpath, randomString(32), True) + initLocalKeys(name, basedirpath, randomString(32), True) with Looper(debug=False) as looper: cli = Cli(looper=looper, nodeReg=nodeReg, diff --git a/scripts/start_plenum_node b/scripts/start_plenum_node old mode 100644 new mode 100755 index 6246e8fee0..46afa8db04 --- a/scripts/start_plenum_node +++ b/scripts/start_plenum_node @@ -5,11 +5,11 @@ import sys from ioflo.aid.consoling import Console -from plenum.common.looper import Looper -from plenum.common.types import HA +from stp_core.loop.looper import Looper +from stp_core.types import HA from plenum.common.config_util import getConfig from plenum.server.node import Node -from plenum.common.log import Logger, getRAETLogLevelFromConfig, \ +from stp_core.common.log import Logger, getRAETLogLevelFromConfig, \ getRAETLogFilePath config = getConfig() diff --git a/setup.py b/setup.py index a46ef33cd5..6c6a707bcf 100644 --- a/setup.py +++ b/setup.py @@ -61,17 +61,17 @@ data_files=[( (BASE_DIR, ['data/pool_transactions_sandbox', ]) )], - install_requires=['ledger==0.2.2', - 'raet', 'jsonpickle', 'portalocker==0.5.7', + install_requires=['ledger==0.2.13', 'stp==0.1.5', + 'jsonpickle', 'portalocker==0.5.7', 'prompt_toolkit==0.57', 'pyorient', 'pygments', 'ioflo==1.5.4', 'semver', 'base58', 'orderedset', - 'sortedcontainers==1.5.7', 'psutil'], + 'sortedcontainers==1.5.7', 'psutil', 'pip'], extras_require={ 'stats': ['python-firebase'] }, setup_requires=['pytest-runner'], tests_require=['pytest', 'pytest-xdist'], - scripts=['scripts/plenum', 'scripts/init_plenum_raet_keep', + scripts=['scripts/plenum', 'scripts/init_plenum_keys', 'scripts/start_plenum_node', 'scripts/generate_plenum_pool_transactions', 'scripts/gen_steward_key', 'scripts/gen_node', diff --git a/tutorial/tutorial.py b/tutorial/tutorial.py index 264cc23e9d..9844db9bbb 100644 --- a/tutorial/tutorial.py +++ b/tutorial/tutorial.py @@ -5,10 +5,11 @@ from ioflo.base.consoling import getConsole from plenum.client.client import Client from plenum.client.wallet import Wallet -from plenum.common.looper import Looper -from plenum.common.script_helper import initKeep +from stp_core.loop.looper import Looper +from plenum.common.keygen_utils import initLocalKeys from plenum.common.temp_file_util import SafeTemporaryDirectory -from plenum.common.types import HA, NodeDetail +from plenum.common.types import NodeDetail +from stp_core.types import HA from plenum.common.util import randomString from plenum.server.node import Node from plenum.test.malicious_behaviors_node import faultyReply, makeNodeFaulty @@ -31,14 +32,14 @@ """ The nodes need to have the their keys initialized """ - initKeep(tmpdir, 'Alpha', randomString(32), override=True) - initKeep(tmpdir, 'AlphaC', randomString(32), override=True) - initKeep(tmpdir, 'Beta', randomString(32), override=True) - initKeep(tmpdir, 'BetaC', randomString(32), override=True) - initKeep(tmpdir, 'Gamma', randomString(32), override=True) - initKeep(tmpdir, 'GammaC', randomString(32), override=True) - initKeep(tmpdir, 'Delta', randomString(32), override=True) - initKeep(tmpdir, 'DeltaC', randomString(32), override=True) + initLocalKeys('Alpha', tmpdir, randomString(32), override=True) + initLocalKeys('AlphaC', tmpdir, randomString(32), override=True) + initLocalKeys('Beta', tmpdir, randomString(32), override=True) + initLocalKeys('BetaC', tmpdir, randomString(32), override=True) + initLocalKeys('Gamma', tmpdir, randomString(32), override=True) + initLocalKeys('GammaC', tmpdir, randomString(32), override=True) + initLocalKeys('Delta', tmpdir, randomString(32), override=True) + initLocalKeys('DeltaC', tmpdir, randomString(32), override=True) """ A node registry is a dictionary of Node names and their IP addresses From 87adcddd013c7af34cd7254ab1136a596bb93d47 Mon Sep 17 00:00:00 2001 From: Andrei Goncharov Date: Wed, 3 May 2017 15:24:12 +0300 Subject: [PATCH 006/100] Stable release (#155) * proposed abstraction for network interface * initial commit for zstack * test in progress * fixing problem with verify key location * change in remote * test passing in promiscous mode * adding linger time but it does not help * using disconnect on remote and changing from old style to new style coroutines * completing test * comments * removed sleeps * refactoring tests and KITZStack * making basic node tests work * fixing tests in test_node_connection.py * changes for pool transactions * more changes * fixing more tests * fixing more tests * adding whitelist message * adding whitelist message for node catchup tests * added ELEC tag to logs for troubleshooting * fixing a bug where a node with a primary was nominating itself during re-elections * in the middle * patching non-test node and client objects to make script tests work * added dummy placeholders to allow tests to run until abstraction of networking is complete * all tests except one work * changes in scripts * changes for endpoint stack * commenting a log message which makes some tests run slow * fixes in scripts * not printing some logs on CLI * creating node params file * digest recalculation preventaion * checking for port in use for both tcp and udp * ensured new log entries didn't show up in CLI * printing exception with the log * beginnig to abolish key sharing * changes for key sharing * fixing a bug with req reply store and some tests * log messages * fixing logs * removing accidentally committed code * add pattern for hidden files to gitignore * adding disconnect detection * fixing bug for 3 phase messages prior to stable checkpoint * 'gc'ing requests only when checkpoint achived on protocol instances * fixing bug * resolving some race conditions around checkpoints * add option for supresing stdout logging * removing linger and handling exception while sending message through listener * closing auth, stopping prodables when logging error results in BlowUp and upgrating tests * Consistency proof request should specify the target seqNo * skipping a test and raising timeout * skipping test * skipping test * skipping test * skipping test * initial refactoring to move secure transport abstraction into a different project * minor changes * moved some modules to stp * moving node and client stacks * fixed some improper imports * more changes * fixed keystroke error * not calling sorted container's method from a newer api, resolving a bug in catchup when transactions are requested after timeout * [SOV-768] Fixed port not available exception getting ignored and tests failing on windows and linux based system * Removed unused import * flexible timeout and catching exception * Fixed error for port not available exception on windows * Removed extra lines at the end of exceptions file * Moved common logic related to transport to stp * using new names from stp * changes for stp * removing redundant close in orientdb socket * adding tests for catchup scenario and 3 phase message outside water mark * fixing import in test * Fix 'infinit' looping on receiving messages when their amount is very high (#97) * fix 'infinit' looping on receiving messages when their amount is very high * move quotas to config * make zstack get params from config only once * add test for quota * increase message quotas to 100 * Feature Added: Jenkins (#99) * Moved util methods to NetwrorkInterfaces; fixes in raet stack * fix key generation * Hotfix: Deps (#101) * - Added a custom jsonpickle handler for enums to avoid the issue https://github.com/jsonpickle/jsonpickle/issues/135 in jsonpickle 0.9.2 which is the canonical version for Ubuntu 16.04. (#98) - Corrected Cli.restoreWalletByPath method. * Merge branches 'stp' and 'zmq' of github.com:evernym/plenum into stp * handling infinite loop problem and fixing test delay * fixing import * bugfix in test * raising timeout * merge * change in ordered * move connection related exceptions to stp project * fix imports of moved exceptions * fixed dependencies * temporarily disabling tests on Windows * renamed init_plenum_raet_keep to init_plenum_keys * removing unused code and changing script mode * Introduced failing of the pytets session in case any unexpected warnings have been made (#104) * changes to troubleshoot testing for coro not awaited * added support for warning checking across all tests * fixed scope for warnfilters fixture * - Updated the warnings filter. Made the rules for ResourceWarning: "unclosed file", ResourceWarning: "unclosed.*socket\.socket" and RuntimeWarning: "coroutine.*was never awaited" global since they are made by the garbage collector in unpredictable points. - Sorted the rules in the warnings filter alphabetically. * Specialized the warnings filter rule for RuntimeWarning about a coroutine was never awaited (to "msgAll" coroutine). * Added an explicit call of the garbage collector to the tear-down phase of "warncheck" fixture in order to ensure that warnings will be generated and recorded for all the unclosed resources. * Fixed the issue with that a coroutine "msgAll" object was never awaited. Removed the corresponding "ignore" rule from the warnings filter. * - Removed the rules not being actuated currently from the warnings filter. - Replaced use of line numbers with use of message patterns in the warnings filter rules. * Corrected the message pattern for a rule from the warnings filter. * Added an "ignore" rule for ResourceWarning: "unclosed event loop" to the warnings filter. * Returned back the warnings filter rules for DeprecationWarnings in jsonpickle package. Replaced use of line numbers with use of message patterns in these rules. * adding warning * removing looper and checking while closing orientdb * increasing a timeout * replacing warn with warning methods * fixed stp dependency * fixed tests * skip failing test, created SOV-881. * changing logs to print alias * setting resource limit for tests * handling exception while raising resource limits * moved wallet persistence and restoration logic to helper methods so that it can be re-utilized from different places, some other minor refactoring (#106) * moved wallet persistence and restoration logic to helper methods so that it can be re-utilized from different places, some other minor refactoring * removed unwanted error code, refactored list keyrings command so that it correctly works for agent cli * removed unused imports * making CLI accept hex seed of size 64 * move logging to stp repo * mitigating key error in pool manager * changing timeouts so the diconnect check happens twice * handling unavailabily of resource module * adding timeout to test utility method * Versioning fix (#113) * [Closes SOV-905] Big Fixed: Orientdb is not installing with the latest build 0.3.16 * moving error codes to stp * upgrading version of stp * Stp (#116) * use common stp interface for changing of node's HA * get rid of explicitly calling zmq and raet; move all transport stuff into stp interfaces. * fix tests; increment stp version * add AuthMode enum instead of auto parameter * fixed testConnectWithoutKeySharingFails test * increased test timeout * Plenum test runner fix (#115) * Fixed a bug in plenum test runner with an incorrect test results summary pattern. Previously it had been expected that the line with the test results summary must begin with equality signs and end with them. But this line is padded with equality signs only if it is shorter than 80 characters. Now the test results summary patterns don't require equality signs. Instead they may match only the last line in the file. * Corrected test results summary patterns in plenum test runner. * fix import of OP_FIELD_NAME * up version of stp to 0.1.24 * Agent issuer wallet restoration (#117) * supplied keys parameter as True to wallet encoding and decoding functions so that dictionary keys which are custom objects restore properly, otherwise it used to be restored as string oppossed to actual object it was before persistence * minor changes in list keyrings to show issuer wallet for agent as well * minor change to print base wallet first and then any other related wallets * up stp version to 0.1.26 * skip tests in test_new_node_catchup.py * scheduling primary selection * Skip failing tests (#122) * skeip test testChangeNodeHaForNonPrimary due to SOV-941 * skip test testProtocolInstanceCannotBecomeActiveWithLessThanFourServers due to SOV-940 * parametrize test timeouts (#120) * rename expectedWait to expectedNodeInterconnectionTime * add signature for other time expect functions * use named timeouts in conftest * move totalConnections from waits to utils * use named timeout in ensureElectionsDone * use float instead of int for seconds * use default args of ensureElectionsDone where it is possible * use named argument for timeouts * use named timeout in helper * use names for parameters * rename 'timeoutPerReq' of checkSufficientRepliesForRequests and sendReqsToNodesAndVerifySuffReplies to 'customTimeoutPerRequest' to emphasize the fact that there is a default one * use default timeout instead of custom timeout of checkSufficientRepliesForRequests and sendReqsToNodesAndVerifySuffReplies where it can be used; use named argument for timeouts * add comments for two functions with similar names * rename checkSufficientRepliesRecvd to checkSufficientRepliesReceived * rename checkSufficientRepliesForRequests to waitForSufficientRepliesForRequests * add 'returns' to docstrings * fix passing of args to waits.expectedElectionTimeout * use waitForSufficientRepliesForRequests and it's default timeout instead of checkSufficientRepliesReceived everywhere it is possible * update doc lines * create waitForViewChange and use it in ensureView * replace usages of checkViewNoForNodes with eventually by waitForViewChange * add waits.expectedNodeToNodeMessageDeliveryTime and use it in sendMsgAndCheck * rename checkMsg to checkMessageReceived * rename sendMsgAndCheck to sendMessageAndCheckDelivery * add docstring to sendMessageAndCheckDelivery * remove unused helper function * add expectedGetReadyTimeout and use it in checkPoolReady * rename overrideTimeout parameter to customTimeout in checkNodesConnected * use default timeout of checkNodesConnected * create expectedPoolLedgerCheck and expectedClientConnectionTimeout waits and use them * add todo for ensureDisconnectedToNodes * update waits.expectedPoolLedgerCheck * add todo for checkNodesParticipating * add requestIds parameter for waitForSufficientRepliesForRequests * update docstring of waitForSufficientRepliesForRequests * fix waitForSufficientRepliesForRequests * remove unused imports from test_log_filtering * use named timeout in test_status_command * use waits.expectedTransactionExecutionTime in testTransactions * refactor testTransactions * use waitRequestSuccess and waitBalanceChange in test_cli_with_auction_plugin, move them to test.cli.helper * use named timeout in test_basic_client_commands * use named timeout in helper.checkRequest * create waitClientConnected and use it instead of checkClientConnected with eventually * add docstrings * rename checkNodeStarted to waitNodeStarted and use named timeout 'expectedNodeStartUpTimeout' in it * rename expectedGetReadyTimeout to expectedPoolGetReadyTimeout * rename checkAllNodesStarted to waitAllNodesStarted * fix default value of customTimeout of checkPoolReady * create waitAllNodesUp and use it instead of checkAllNodesUp with eventually * create waitReplyCount and use instead of checkReplyCount and eventually * use named timeouts in test_client * use named timeouts in some more tests * add basic implementation for waits.expectedCatchupTime and use it * make expectedCatchupTime get custom ConsistencyProofsTimeout as parameter * use named timeout in testNodeDiscardMessageFromUnknownView * use waits.expectedElectionTimeout and rename timeout arg to custonTimeout in checkEveryNodeHasAtMostOnePrimary * rename timeout argument of plenum/test/node_catchup/test_discard_view_no.py to customTimeout and make it used named timeouts from waits as default * update timeouts in testNodeDiscardMessageFromUnknownView * create waits.expectedRequestStashingTime and use it * add TODO to test_catchup_delayed_nodes * create waitNodeLedgersEquality and use it instead of directo usage of checkNodeLedgersEquality * use waits.expectedPoolLedgerCheck in waitNodeLedgersEquality * use named timeout in testOrderingCase2 * add waits.expectedNominationTimeout and use it * use named timeout in some more tests * add missing empty lines * update waits * add 'TODO[slow-factor]' * update timeouts in the tests * fix testTestNodeDelay and missed import * skip testMultipleRequests test * skip testStatusAfterClientAdded test * fix testInstChangeWithLowerRatioThanDelta * fix test_new_node_catchup.py * fix testChangeHaPersistsPostNodesRestart * fix testAdd2NewNodes * increase expectedElectionTimeout timeout * rename logger.warn => logger.warning * tune timeouts in the tests * make sections in waits.py * add --repat for runner.py * increase expectedCatchupTime timeout * improve runner.py * tune the test timeouts * skip some catchup tests * parametrize test timeouts * rm eventually.py * fix testQueueingReqFromFutureView, testNumOfPrePrepareWithFPlusOneFaults, testNumOfPrepareWithFPlusOneFaults * fix testProtocolInstanceCannotBecomeActiveWithLessThanFourServers * tune propagate and preprepare test timeouts * skip testNumOf*WithFPlusOneFaults * fixed plenum for the latest stp (#127) * fixed plenum for the latest stp * increment stp version * archive runner.py output for all agents (#107) * archive runner.py results * using env variable NODE_NAME instaed of func param for artifacts * configured archiveArtifacts to allow empty/no archive * Do view change if a primary is disconnected (#128) * check whether function is a partial function in getCallableName * add tests for view change when primary goes down * start view change if primary went offline * use startViewChangeIfPrimaryWentOffline instead of doElectionIfNeeded * Unclosed file warnings (#124) * Removed "ignore" rule for ResourceWarning about an unclosed file from the warnings filter. * Fixed some causes of ResourceWarnings about unclosed files. * - Fixed some causes of ResourceWarnings about unclosed files. - Corrected plenum.common.txn_util.updateGenesisPoolTxnFile function. * - Fixed the rest causes of ResourceWarnings about unclosed files. - Removed TimeAndSizeRotatingFileHandler class which is not used anymore (the class with the same name from stp is now used instead). * Updated stp-dev dependency to the new version. * Reverted update of stp-dev dependency. * Skipped the tests in plenum.test.test_log_rotation module since they require stp-dev dependency of a newer version (0.1.28 or higher). * Agent generalization (#126) * refactored test_network_setup; added some role-base helper classes for members of the network * fixed issue with wrong import and minor refactoring * more work on tests * bugfixes * interim changes during refactoring * adding init_utils * cleaning up before keys * removed type hint causing issues; this issue is fixed in Python 3.5.3 with a new typing.Coroutine * added a check in DidVerifier to check that we have a verkey * fixed unreliable tmpdir test fixture * modifications to tests to make more robust when running from command line * changed spyable to be able to monkey patch a single method * Added a run_till_quiet, waits for quiet prods This function will wait till there is a quiet period from the prodables. Dependent on prodables correctly reporting events. Will run forever if there is always activity. * small tweek to log format to keep | alined * changes to fix some issues happening in end-to-end test * using same seed for creating keys * ignore log files * added a small test for verifier * removed some excpetion class that were moved to STP * init Local keys for client in test network * updated to use new API from shared lib * Enabled the tests in plenum.test.test_log_rotation module. (#135) * check for close method before calling it (#136) * fix testNumOf*WithFPlusOneFaults tests * Updated stp dep * added missed __init__.py for common/member * add missing parameter to bootstrap test network (#143) * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * fix the catchup tests (#140) * Close SOV-976 (#144) * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * [Closes SOV-976] Unable to create the genesis transaction files * up stp to 0.1.42 (#148) * [Closes SOV-981] Bug Fixed: Unable to create pool transaction file on the client machine without passing it a node number (#149) * fix generation of trustee txns * fix generation of trustee txns (#151) * Deps updated Signed-off-by: Andrei Goncharov Sign-off-executed-by: toktar Approved-at: h-master --- plenum/cli/cli.py | 5 ++ plenum/client/wallet.py | 4 +- plenum/common/has_file_storage.py | 27 ++++++- plenum/common/init_util.py | 22 ++++++ plenum/common/member/__init__.py | 0 plenum/common/member/member.py | 24 ++++++ plenum/common/member/steward.py | 75 ++++++++++++++++++ plenum/common/member/trustee.py | 12 +++ plenum/common/verifier.py | 2 + plenum/config.py | 2 +- plenum/test/cli/helper.py | 4 +- plenum/test/common/test_verifier.py | 10 +++ plenum/test/test_ledger_manager.py | 4 +- plenum/test/test_log_rotation.py | 1 - plenum/test/test_testable.py | 20 ++--- plenum/test/testable.py | 116 ++++++++++++++-------------- setup.py | 2 +- 17 files changed, 254 insertions(+), 76 deletions(-) create mode 100644 plenum/common/init_util.py create mode 100644 plenum/common/member/__init__.py create mode 100644 plenum/common/member/member.py create mode 100644 plenum/common/member/steward.py create mode 100644 plenum/common/member/trustee.py create mode 100644 plenum/test/common/test_verifier.py diff --git a/plenum/cli/cli.py b/plenum/cli/cli.py index 41a5569bc9..eb24d9446b 100644 --- a/plenum/cli/cli.py +++ b/plenum/cli/cli.py @@ -655,6 +655,11 @@ def cmdHandlerToCmdMappings(self): mappings['helpAction'] = helpCmd mappings['statusAction'] = statusCmd mappings['changePrompt'] = changePromptCmd + mappings['newNodeAction'] = newNodeCmd + mappings['newClientAction'] = newClientCmd + mappings['statusNodeAction'] = statusNodeCmd + mappings['statusClientAction'] = statusClientCmd + # mappings['keyShareAction'] = keyShareCmd mappings['loadPluginDirAction'] = loadPluginsCmd mappings['newKeyring'] = newKeyringCmd diff --git a/plenum/client/wallet.py b/plenum/client/wallet.py index 9528d53689..fdf36dc9d1 100644 --- a/plenum/client/wallet.py +++ b/plenum/client/wallet.py @@ -34,9 +34,9 @@ def decrypt(self, key) -> 'Wallet': class Wallet: def __init__(self, - name: str, + name: str=None, supportedDidMethods: DidMethods=None): - self._name = name + self._name = name or 'wallet' + str(id(self)) self.ids = {} # type: Dict[Identifier, IdData] self.idsToSigners = {} # type: Dict[Identifier, Signer] self.aliasesToIds = {} # type: Dict[Alias, Identifier] diff --git a/plenum/common/has_file_storage.py b/plenum/common/has_file_storage.py index f07d375d11..5110afb5dd 100644 --- a/plenum/common/has_file_storage.py +++ b/plenum/common/has_file_storage.py @@ -1,5 +1,11 @@ import os +import shutil + +from stp_core.common.log import getlogger + +logger = getlogger() + class HasFileStorage: @@ -13,7 +19,26 @@ def __init__(self, name, baseDir, dataDir=None): @property def dataLocation(self): - return os.path.join(self.basePath, self.dataDir, self.name) + return self.getDataLocation(self.name, self.basePath, self.dataDir) + + @staticmethod + def getDataLocation(name, basePath, dataDir=""): + return os.path.join(basePath, dataDir, name) def hasFile(self, fileName): return os.path.isfile(os.path.join(self.dataLocation, fileName)) + + def wipe(self): + """ + IMPORTANT: calling this method will destroy local data + :return: + """ + self.wipeDataLocation(self.dataLocation) + + @staticmethod + def wipeDataLocation(dataLocation): + try: + shutil.rmtree(dataLocation) + except Exception as ex: + logger.debug("Error while removing temporary directory {}".format( + ex)) diff --git a/plenum/common/init_util.py b/plenum/common/init_util.py new file mode 100644 index 0000000000..8529925e9d --- /dev/null +++ b/plenum/common/init_util.py @@ -0,0 +1,22 @@ +from plenum.common.has_file_storage import HasFileStorage +from plenum.common.keygen_utils import initLocalKeys, initNodeKeysForBothStacks + + +def cleanup_environment(name, base_dir): + dataLocation = HasFileStorage.getDataLocation(name, base_dir) + HasFileStorage.wipeDataLocation(dataLocation) + + +def initialize_node_environment(name, base_dir, sigseed=None, + override_keep=False): + """ + transport-agnostic method; in the future when the transport protocol is + abstracted a bit more, this function and the one below will be the same + and likely a method of an interface + """ + cleanup_environment(name, base_dir) + + _, vk = initNodeKeysForBothStacks(name=name, baseDir=base_dir, sigseed=sigseed, + override=override_keep) + + return vk diff --git a/plenum/common/member/__init__.py b/plenum/common/member/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/plenum/common/member/member.py b/plenum/common/member/member.py new file mode 100644 index 0000000000..745aa762b5 --- /dev/null +++ b/plenum/common/member/member.py @@ -0,0 +1,24 @@ +from hashlib import sha256 + +from plenum.common.constants import TXN_TYPE, NYM, TARGET_NYM, TXN_ID, ROLE, VERKEY +from plenum.common.types import f + + +class Member: + """ + Base class for different network member contexts. + """ + @staticmethod + def nym_txn(nym, name, verkey=None, role=None, creator=None): + txn = { + TXN_TYPE: NYM, + TARGET_NYM: nym, + TXN_ID: sha256(name.encode()).hexdigest() + } + if verkey is not None: + txn[VERKEY] = verkey + if creator is not None: + txn[f.IDENTIFIER.nm] = creator + if role is not None: + txn[ROLE] = role + return txn diff --git a/plenum/common/member/steward.py b/plenum/common/member/steward.py new file mode 100644 index 0000000000..9ed638f5d9 --- /dev/null +++ b/plenum/common/member/steward.py @@ -0,0 +1,75 @@ +from hashlib import sha256 + +from plenum.common.exceptions import WalletNotSet, WalletNotInitialized +from plenum.common.member.member import Member +from plenum.common.constants import STEWARD, TARGET_NYM, TXN_TYPE, NODE, DATA, \ + CLIENT_IP, ALIAS, CLIENT_PORT, NODE_IP, NODE_PORT, SERVICES, VALIDATOR, \ + TXN_ID +from plenum.common.types import f +from plenum.common.util import hexToFriendly + + +class Steward(Member): + """ + Provides a context for Steward operations. + """ + def __init__(self, name=None, wallet=None): + self.name = name or 'Steward' + str(id(self)) + self._wallet = wallet + self.node = None + + @property + def wallet(self): + if not self._wallet: + raise WalletNotSet + return self._wallet + + @property + def nym(self): + if not self.wallet.defaultId: + raise WalletNotInitialized + return self.wallet.defaultId + + def set_node(self, node, **kwargs): + self.node = node.copy() + if kwargs: + self.node.update(kwargs) + + def generate_genesis_txns(self): + nym_txn = self._nym_txn() + node_txn = self._node_txn() + return [nym_txn, node_txn] + + def _nym_txn(self, creator=None): + return self.nym_txn(self.nym, self.name, + verkey=self.wallet.getVerkey(self.nym), + role=STEWARD, creator=creator) + + def _node_txn(self): + node_nym = hexToFriendly(self.node.verkey) + return self.node_txn(steward_nym=self.nym, + node_name=self.node.name, + nym=node_nym, + ip=self.node.ha[0], + node_port=self.node.ha[1], + client_ip=self.node.cliha[0], + client_port=self.node.cliha[1]) + + @staticmethod + def node_txn(steward_nym, node_name, nym, ip, node_port, client_port, + client_ip=None): + txn = { + TARGET_NYM: nym, + TXN_TYPE: NODE, + f.IDENTIFIER.nm: steward_nym, + DATA: { + CLIENT_IP: client_ip or ip, + ALIAS: node_name, + CLIENT_PORT: client_port, + NODE_IP: ip, + NODE_PORT: node_port, + SERVICES: [VALIDATOR] + }, + TXN_ID: sha256(node_name.encode()).hexdigest() + } + return txn diff --git a/plenum/common/member/trustee.py b/plenum/common/member/trustee.py new file mode 100644 index 0000000000..ffec6ce217 --- /dev/null +++ b/plenum/common/member/trustee.py @@ -0,0 +1,12 @@ +from plenum.common.member.member import Member + + +class Trustee(Member): + """ + Provides a context for Trustee operations. + """ + def __init__(self, name=None, wallet=None): + self.name = name or 'Trustee' + str(id(self)) + self._wallet = wallet + self.node = None + diff --git a/plenum/common/verifier.py b/plenum/common/verifier.py index fabed255c3..3989f25006 100644 --- a/plenum/common/verifier.py +++ b/plenum/common/verifier.py @@ -25,6 +25,8 @@ def __init__(self, verkey, identifier=None): rawIdr = b58decode(identifier) if len(rawIdr) == 32 and not verkey: # assume cryptonym verkey = identifier + + assert verkey, 'verkey must be provided' if verkey[0] == '~': # abbreviated verkey = b58encode(b58decode(identifier) + b58decode(verkey[1:])) diff --git a/plenum/config.py b/plenum/config.py index 7e7482c416..d66a99227e 100644 --- a/plenum/config.py +++ b/plenum/config.py @@ -113,7 +113,7 @@ logRotationInterval = 1 logRotationBackupCount = 10 logRotationMaxBytes = 100 * 1024 * 1024 -logFormat = '{asctime:s} | {levelname:8s} | {filename:20s} ({lineno:d}) | {funcName:s} | {message:s}' +logFormat = '{asctime:s} | {levelname:8s} | {filename:20s} ({lineno: >4}) | {funcName:s} | {message:s}' logFormatStyle='{' logLevel=logging.INFO enableStdOutLogging=True diff --git a/plenum/test/cli/helper.py b/plenum/test/cli/helper.py index 03404ff56f..31ad2b6671 100644 --- a/plenum/test/cli/helper.py +++ b/plenum/test/cli/helper.py @@ -22,7 +22,7 @@ from plenum.test.spy_helpers import getAllArgs from plenum.test.test_client import TestClient from plenum.test.test_node import TestNode, checkPoolReady -from plenum.test.testable import Spyable +from plenum.test.testable import spyable from pygments.token import Token from functools import partial from plenum.test import waits @@ -135,7 +135,7 @@ def lastMsg(self): return self.lastPrintArgs['msg'] -@Spyable(methods=[cli.Cli.print, cli.Cli.printTokens]) +@spyable(methods=[cli.Cli.print, cli.Cli.printTokens]) class TestCli(cli.Cli, TestCliCore): # def __init__(self, *args, **kwargs): # super().__init__(*args, **kwargs) diff --git a/plenum/test/common/test_verifier.py b/plenum/test/common/test_verifier.py new file mode 100644 index 0000000000..8bf21f35ce --- /dev/null +++ b/plenum/test/common/test_verifier.py @@ -0,0 +1,10 @@ +from plenum.common.verifier import DidVerifier + +SAMPLE_ABBR_VERKEY = '~8zH9ZSyZTFPGJ4ZPL5Rvxx' +SAMPLE_IDENTIFIER = '99BgFBg35BehzfSADV5nM4' +EXPECTED_VERKEY = '5SMfqc4NGeQM21NMx3cB9sqop6KCFFC1TqoGKGptdock' + + +def test_create_verifier(): + verifier = DidVerifier(SAMPLE_ABBR_VERKEY, identifier=SAMPLE_IDENTIFIER) + assert verifier.verkey == EXPECTED_VERKEY diff --git a/plenum/test/test_ledger_manager.py b/plenum/test/test_ledger_manager.py index ce90c0d7b8..51eac321c2 100644 --- a/plenum/test/test_ledger_manager.py +++ b/plenum/test/test_ledger_manager.py @@ -1,8 +1,8 @@ from plenum.common.ledger_manager import LedgerManager -from plenum.test.testable import Spyable +from plenum.test.testable import spyable -@Spyable(methods=[LedgerManager.startCatchUpProcess, +@spyable(methods=[LedgerManager.startCatchUpProcess, LedgerManager.catchupCompleted, LedgerManager.processConsistencyProofReq]) class TestLedgerManager(LedgerManager): diff --git a/plenum/test/test_log_rotation.py b/plenum/test/test_log_rotation.py index 5020a7fc76..5b4bb47502 100644 --- a/plenum/test/test_log_rotation.py +++ b/plenum/test/test_log_rotation.py @@ -1,4 +1,3 @@ -import pytest import os import logging import shutil diff --git a/plenum/test/test_testable.py b/plenum/test/test_testable.py index 3024c8fd44..f70d782252 100644 --- a/plenum/test/test_testable.py +++ b/plenum/test/test_testable.py @@ -5,7 +5,7 @@ from stp_core.common.log import getlogger from plenum.server.node import Node -from plenum.test.testable import Spyable +from plenum.test.testable import spyable pr = slice(3, 5) # params and result @@ -78,26 +78,26 @@ def runThroughAssertions(self, z, ovrdCornResult: str = None): class NewTestableTests(TestHelpers): def testNew(self): - X = Spyable(Node) + X = spyable(Node) print(X) def testSpyableBaseClass(self): - SpyBaseClass = Spyable()(NewBaseClass) + SpyBaseClass = spyable()(NewBaseClass) z = SpyBaseClass('x', 'y') self.runThroughAssertions(z) def testSpyableSubClass(self): - SpySubClass = Spyable()(SubClass) + SpySubClass = spyable()(SubClass) z = SpySubClass('x', 'y') self.runThroughAssertions(z) def testSpyableSubClassWithOverride(self): - SpySubClassOvrd = Spyable()(SubClassWithOverride) + SpySubClassOvrd = spyable()(SubClassWithOverride) z = SpySubClassOvrd('x', 'y') self.runThroughAssertions(z, 'hooray!') def testEveryObjectGetsItsOwnSpyLog(self): - SpySubClass = Spyable()(SubClass) + SpySubClass = spyable()(SubClass) y = SpySubClass('a', 'b') z = SpySubClass('x', 'y') self.checkOneInit(y, {'s': 'a', 'p': 'b'}) @@ -105,7 +105,7 @@ def testEveryObjectGetsItsOwnSpyLog(self): def testSpyOnSubsetOfMethods(self): def go(methods, ec: "expected counts"): - SpySubClass = Spyable(methods=methods)(SubClass) + SpySubClass = spyable(methods=methods)(SubClass) z = SpySubClass('a', 'b') self.assertEquals(len(z.spylog), ec[0], "expected certain number of entries in the spy log") @@ -132,7 +132,7 @@ def go(methods, ec: "expected counts"): go([SubClass.eatCorn, SubClass.mymethod], [0, 2, 4]) def testSpyOnOverriddenClassMethod(self): - SpySubClass = Spyable( + SpySubClass = spyable( methods=[SubClassWithOverride.eatCorn, "mymethod"])( SubClassWithOverride) z = SpySubClass('a', 'b') @@ -144,7 +144,7 @@ def testSpyOnOverriddenClassMethod(self): ({'inp': 'hi'}, None)) def testSpyOnOverriddenBaseClassMethod(self): - SpySubClass = Spyable(methods=[NewBaseClass.eatCorn, "mymethod"])( + SpySubClass = spyable(methods=[NewBaseClass.eatCorn, "mymethod"])( SubClassWithOverride) z = SpySubClass('a', 'b') z.mymethod("hi") @@ -156,7 +156,7 @@ def testSpyOnOverriddenBaseClassMethod(self): def testSpyOnCertainClass(self): # known limitation... when super() is called, we are not spy-wrapping # base base class methods. - SpySubClass = Spyable(methods=[NewBaseClass.eatCorn, "mymethod"])( + SpySubClass = spyable(methods=[NewBaseClass.eatCorn, "mymethod"])( SubClassWithOverrideAndSuperCall) z = SpySubClass('a', 'b') z.mymethod("hi") diff --git a/plenum/test/testable.py b/plenum/test/testable.py index c90bab79e5..721a575c7a 100644 --- a/plenum/test/testable.py +++ b/plenum/test/testable.py @@ -1,5 +1,4 @@ import inspect -import logging import time from functools import wraps from typing import Any, List, NamedTuple, Tuple, Optional, Iterable, Union, \ @@ -53,62 +52,67 @@ def count(self, method: SpyableMethod) -> int: return sum(1 for x in self if x.method == method) -def Spyable(name: str = None, methods: SpyableMethods = None, deepLevel: int = None): - def spy(func, isInit, shouldSpy): - - sig = inspect.signature(func) - paramNames = [k for k in sig.parameters] - # TODO Find a better way - if paramNames and paramNames[0] == "self": - paramNames = paramNames[1:] - - # sets up spylog, but doesn't spy on init - def initOnly(self, *args, **kwargs): - self.spylog = SpyLog() - return func(self, *args, **kwargs) - - initOnly.__name__ = func.__name__ - - # sets up spylog, and also spys on init - def initWrap(self, *args, **kwargs): - self.spylog = SpyLog() - return wrap(self, *args, **kwargs) - - initWrap.__name__ = func.__name__ - - # wraps a function call - @wraps(func) - def wrap(self, *args, **kwargs): - start = time.perf_counter() - r = None - try: - r = func(self, *args, **kwargs) - except Exception as ex: - r = ex - raise - finally: - params = {} - if kwargs: - for k, v in kwargs.items(): - params[k] = v - if args: - for i, nm in enumerate(paramNames[:len(args)]): - params[nm] = args[i] - - self.spylog.append(Entry(start, - time.perf_counter(), - func.__name__, - params, - r)) - return r - - return wrap if not isInit else initWrap if shouldSpy else initOnly +def spy(func, is_init, should_spy, spy_log=None): + sig = inspect.signature(func) + paramNames = [k for k in sig.parameters] + # TODO Find a better way + if paramNames and paramNames[0] == "self": + paramNames = paramNames[1:] + # sets up spylog, but doesn't spy on init + def init_only(self, *args, **kwargs): + self.spylog = SpyLog() + return func(self, *args, **kwargs) + + init_only.__name__ = func.__name__ + + # sets up spylog, and also spys on init + def init_wrap(self, *args, **kwargs): + self.spylog = SpyLog() + return wrap(self, *args, **kwargs) + + init_wrap.__name__ = func.__name__ + + # wraps a function call + @wraps(func) + def wrap(self, *args, **kwargs): + start = time.perf_counter() + r = None + try: + r = func(self, *args, **kwargs) + except Exception as ex: + r = ex + raise + finally: + params = {} + if kwargs: + for k, v in kwargs.items(): + params[k] = v + if args: + for i, nm in enumerate(paramNames[:len(args)]): + params[nm] = args[i] + + used_log = spy_log + + if hasattr(self, 'spylog'): + used_log = self.spylog + + used_log.append(Entry(start, + time.perf_counter(), + func.__name__, + params, + r)) + return r + + return wrap if not is_init else init_wrap if should_spy else init_only + + +def spyable(name: str = None, methods: SpyableMethods = None, deep_level: int = None): def decorator(clas): nonlocal name name = name if name else "Spyable" + clas.__name__ - spyable = type(name, (clas,), {}) + spyable_type = type(name, (clas,), {}) morphed = {} # type: Dict[Callable, Callable] matches = [] for nm, func in [(method, getattr(clas, method)) @@ -125,9 +129,9 @@ def decorator(clas): if shouldSpy or isInit: newFunc = spy(func, isInit, shouldSpy) morphed[func] = newFunc - setattr(spyable, nm, newFunc) + setattr(spyable_type, nm, newFunc) logger.debug("in {} added spy on {}". - format(spyable.__name__, nm)) + format(spyable_type.__name__, nm)) matches.append(matched) if methods: @@ -137,7 +141,7 @@ def decorator(clas): "method {} not found, so no spy added".format(m), extra={"cli": False}) - objSearchReplace(spyable, morphed, logMsg="Applying spy remapping", deepLevel=deepLevel) - return spyable + objSearchReplace(spyable_type, morphed, logMsg="Applying spy remapping", deepLevel=deep_level) + return spyable_type return decorator diff --git a/setup.py b/setup.py index 6c6a707bcf..84786ef9c3 100644 --- a/setup.py +++ b/setup.py @@ -61,7 +61,7 @@ data_files=[( (BASE_DIR, ['data/pool_transactions_sandbox', ]) )], - install_requires=['ledger==0.2.13', 'stp==0.1.5', + install_requires=['ledger==0.2.13', 'stp==0.1.8', 'jsonpickle', 'portalocker==0.5.7', 'prompt_toolkit==0.57', 'pyorient', 'pygments', 'ioflo==1.5.4', 'semver', 'base58', 'orderedset', From 5fd64c6f1d310e9835f1fec3da7f34e1bc4c9daf Mon Sep 17 00:00:00 2001 From: Andrei Goncharov Date: Wed, 3 May 2017 16:33:00 +0300 Subject: [PATCH 007/100] Stable release (#156) * proposed abstraction for network interface * initial commit for zstack * test in progress * fixing problem with verify key location * change in remote * test passing in promiscous mode * adding linger time but it does not help * using disconnect on remote and changing from old style to new style coroutines * completing test * comments * removed sleeps * refactoring tests and KITZStack * making basic node tests work * fixing tests in test_node_connection.py * changes for pool transactions * more changes * fixing more tests * fixing more tests * adding whitelist message * adding whitelist message for node catchup tests * added ELEC tag to logs for troubleshooting * fixing a bug where a node with a primary was nominating itself during re-elections * in the middle * patching non-test node and client objects to make script tests work * added dummy placeholders to allow tests to run until abstraction of networking is complete * all tests except one work * changes in scripts * changes for endpoint stack * commenting a log message which makes some tests run slow * fixes in scripts * not printing some logs on CLI * creating node params file * digest recalculation preventaion * checking for port in use for both tcp and udp * ensured new log entries didn't show up in CLI * printing exception with the log * beginnig to abolish key sharing * changes for key sharing * fixing a bug with req reply store and some tests * log messages * fixing logs * removing accidentally committed code * add pattern for hidden files to gitignore * adding disconnect detection * fixing bug for 3 phase messages prior to stable checkpoint * 'gc'ing requests only when checkpoint achived on protocol instances * fixing bug * resolving some race conditions around checkpoints * add option for supresing stdout logging * removing linger and handling exception while sending message through listener * closing auth, stopping prodables when logging error results in BlowUp and upgrating tests * Consistency proof request should specify the target seqNo * skipping a test and raising timeout * skipping test * skipping test * skipping test * skipping test * initial refactoring to move secure transport abstraction into a different project * minor changes * moved some modules to stp * moving node and client stacks * fixed some improper imports * more changes * fixed keystroke error * not calling sorted container's method from a newer api, resolving a bug in catchup when transactions are requested after timeout * [SOV-768] Fixed port not available exception getting ignored and tests failing on windows and linux based system * Removed unused import * flexible timeout and catching exception * Fixed error for port not available exception on windows * Removed extra lines at the end of exceptions file * Moved common logic related to transport to stp * using new names from stp * changes for stp * removing redundant close in orientdb socket * adding tests for catchup scenario and 3 phase message outside water mark * fixing import in test * Fix 'infinit' looping on receiving messages when their amount is very high (#97) * fix 'infinit' looping on receiving messages when their amount is very high * move quotas to config * make zstack get params from config only once * add test for quota * increase message quotas to 100 * Feature Added: Jenkins (#99) * Moved util methods to NetwrorkInterfaces; fixes in raet stack * fix key generation * Hotfix: Deps (#101) * - Added a custom jsonpickle handler for enums to avoid the issue https://github.com/jsonpickle/jsonpickle/issues/135 in jsonpickle 0.9.2 which is the canonical version for Ubuntu 16.04. (#98) - Corrected Cli.restoreWalletByPath method. * Merge branches 'stp' and 'zmq' of github.com:evernym/plenum into stp * handling infinite loop problem and fixing test delay * fixing import * bugfix in test * raising timeout * merge * change in ordered * move connection related exceptions to stp project * fix imports of moved exceptions * fixed dependencies * temporarily disabling tests on Windows * renamed init_plenum_raet_keep to init_plenum_keys * removing unused code and changing script mode * Introduced failing of the pytets session in case any unexpected warnings have been made (#104) * changes to troubleshoot testing for coro not awaited * added support for warning checking across all tests * fixed scope for warnfilters fixture * - Updated the warnings filter. Made the rules for ResourceWarning: "unclosed file", ResourceWarning: "unclosed.*socket\.socket" and RuntimeWarning: "coroutine.*was never awaited" global since they are made by the garbage collector in unpredictable points. - Sorted the rules in the warnings filter alphabetically. * Specialized the warnings filter rule for RuntimeWarning about a coroutine was never awaited (to "msgAll" coroutine). * Added an explicit call of the garbage collector to the tear-down phase of "warncheck" fixture in order to ensure that warnings will be generated and recorded for all the unclosed resources. * Fixed the issue with that a coroutine "msgAll" object was never awaited. Removed the corresponding "ignore" rule from the warnings filter. * - Removed the rules not being actuated currently from the warnings filter. - Replaced use of line numbers with use of message patterns in the warnings filter rules. * Corrected the message pattern for a rule from the warnings filter. * Added an "ignore" rule for ResourceWarning: "unclosed event loop" to the warnings filter. * Returned back the warnings filter rules for DeprecationWarnings in jsonpickle package. Replaced use of line numbers with use of message patterns in these rules. * adding warning * removing looper and checking while closing orientdb * increasing a timeout * replacing warn with warning methods * fixed stp dependency * fixed tests * skip failing test, created SOV-881. * changing logs to print alias * setting resource limit for tests * handling exception while raising resource limits * moved wallet persistence and restoration logic to helper methods so that it can be re-utilized from different places, some other minor refactoring (#106) * moved wallet persistence and restoration logic to helper methods so that it can be re-utilized from different places, some other minor refactoring * removed unwanted error code, refactored list keyrings command so that it correctly works for agent cli * removed unused imports * making CLI accept hex seed of size 64 * move logging to stp repo * mitigating key error in pool manager * changing timeouts so the diconnect check happens twice * handling unavailabily of resource module * adding timeout to test utility method * Versioning fix (#113) * [Closes SOV-905] Big Fixed: Orientdb is not installing with the latest build 0.3.16 * moving error codes to stp * upgrading version of stp * Stp (#116) * use common stp interface for changing of node's HA * get rid of explicitly calling zmq and raet; move all transport stuff into stp interfaces. * fix tests; increment stp version * add AuthMode enum instead of auto parameter * fixed testConnectWithoutKeySharingFails test * increased test timeout * Plenum test runner fix (#115) * Fixed a bug in plenum test runner with an incorrect test results summary pattern. Previously it had been expected that the line with the test results summary must begin with equality signs and end with them. But this line is padded with equality signs only if it is shorter than 80 characters. Now the test results summary patterns don't require equality signs. Instead they may match only the last line in the file. * Corrected test results summary patterns in plenum test runner. * fix import of OP_FIELD_NAME * up version of stp to 0.1.24 * Agent issuer wallet restoration (#117) * supplied keys parameter as True to wallet encoding and decoding functions so that dictionary keys which are custom objects restore properly, otherwise it used to be restored as string oppossed to actual object it was before persistence * minor changes in list keyrings to show issuer wallet for agent as well * minor change to print base wallet first and then any other related wallets * up stp version to 0.1.26 * skip tests in test_new_node_catchup.py * scheduling primary selection * Skip failing tests (#122) * skeip test testChangeNodeHaForNonPrimary due to SOV-941 * skip test testProtocolInstanceCannotBecomeActiveWithLessThanFourServers due to SOV-940 * parametrize test timeouts (#120) * rename expectedWait to expectedNodeInterconnectionTime * add signature for other time expect functions * use named timeouts in conftest * move totalConnections from waits to utils * use named timeout in ensureElectionsDone * use float instead of int for seconds * use default args of ensureElectionsDone where it is possible * use named argument for timeouts * use named timeout in helper * use names for parameters * rename 'timeoutPerReq' of checkSufficientRepliesForRequests and sendReqsToNodesAndVerifySuffReplies to 'customTimeoutPerRequest' to emphasize the fact that there is a default one * use default timeout instead of custom timeout of checkSufficientRepliesForRequests and sendReqsToNodesAndVerifySuffReplies where it can be used; use named argument for timeouts * add comments for two functions with similar names * rename checkSufficientRepliesRecvd to checkSufficientRepliesReceived * rename checkSufficientRepliesForRequests to waitForSufficientRepliesForRequests * add 'returns' to docstrings * fix passing of args to waits.expectedElectionTimeout * use waitForSufficientRepliesForRequests and it's default timeout instead of checkSufficientRepliesReceived everywhere it is possible * update doc lines * create waitForViewChange and use it in ensureView * replace usages of checkViewNoForNodes with eventually by waitForViewChange * add waits.expectedNodeToNodeMessageDeliveryTime and use it in sendMsgAndCheck * rename checkMsg to checkMessageReceived * rename sendMsgAndCheck to sendMessageAndCheckDelivery * add docstring to sendMessageAndCheckDelivery * remove unused helper function * add expectedGetReadyTimeout and use it in checkPoolReady * rename overrideTimeout parameter to customTimeout in checkNodesConnected * use default timeout of checkNodesConnected * create expectedPoolLedgerCheck and expectedClientConnectionTimeout waits and use them * add todo for ensureDisconnectedToNodes * update waits.expectedPoolLedgerCheck * add todo for checkNodesParticipating * add requestIds parameter for waitForSufficientRepliesForRequests * update docstring of waitForSufficientRepliesForRequests * fix waitForSufficientRepliesForRequests * remove unused imports from test_log_filtering * use named timeout in test_status_command * use waits.expectedTransactionExecutionTime in testTransactions * refactor testTransactions * use waitRequestSuccess and waitBalanceChange in test_cli_with_auction_plugin, move them to test.cli.helper * use named timeout in test_basic_client_commands * use named timeout in helper.checkRequest * create waitClientConnected and use it instead of checkClientConnected with eventually * add docstrings * rename checkNodeStarted to waitNodeStarted and use named timeout 'expectedNodeStartUpTimeout' in it * rename expectedGetReadyTimeout to expectedPoolGetReadyTimeout * rename checkAllNodesStarted to waitAllNodesStarted * fix default value of customTimeout of checkPoolReady * create waitAllNodesUp and use it instead of checkAllNodesUp with eventually * create waitReplyCount and use instead of checkReplyCount and eventually * use named timeouts in test_client * use named timeouts in some more tests * add basic implementation for waits.expectedCatchupTime and use it * make expectedCatchupTime get custom ConsistencyProofsTimeout as parameter * use named timeout in testNodeDiscardMessageFromUnknownView * use waits.expectedElectionTimeout and rename timeout arg to custonTimeout in checkEveryNodeHasAtMostOnePrimary * rename timeout argument of plenum/test/node_catchup/test_discard_view_no.py to customTimeout and make it used named timeouts from waits as default * update timeouts in testNodeDiscardMessageFromUnknownView * create waits.expectedRequestStashingTime and use it * add TODO to test_catchup_delayed_nodes * create waitNodeLedgersEquality and use it instead of directo usage of checkNodeLedgersEquality * use waits.expectedPoolLedgerCheck in waitNodeLedgersEquality * use named timeout in testOrderingCase2 * add waits.expectedNominationTimeout and use it * use named timeout in some more tests * add missing empty lines * update waits * add 'TODO[slow-factor]' * update timeouts in the tests * fix testTestNodeDelay and missed import * skip testMultipleRequests test * skip testStatusAfterClientAdded test * fix testInstChangeWithLowerRatioThanDelta * fix test_new_node_catchup.py * fix testChangeHaPersistsPostNodesRestart * fix testAdd2NewNodes * increase expectedElectionTimeout timeout * rename logger.warn => logger.warning * tune timeouts in the tests * make sections in waits.py * add --repat for runner.py * increase expectedCatchupTime timeout * improve runner.py * tune the test timeouts * skip some catchup tests * parametrize test timeouts * rm eventually.py * fix testQueueingReqFromFutureView, testNumOfPrePrepareWithFPlusOneFaults, testNumOfPrepareWithFPlusOneFaults * fix testProtocolInstanceCannotBecomeActiveWithLessThanFourServers * tune propagate and preprepare test timeouts * skip testNumOf*WithFPlusOneFaults * fixed plenum for the latest stp (#127) * fixed plenum for the latest stp * increment stp version * archive runner.py output for all agents (#107) * archive runner.py results * using env variable NODE_NAME instaed of func param for artifacts * configured archiveArtifacts to allow empty/no archive * Do view change if a primary is disconnected (#128) * check whether function is a partial function in getCallableName * add tests for view change when primary goes down * start view change if primary went offline * use startViewChangeIfPrimaryWentOffline instead of doElectionIfNeeded * Unclosed file warnings (#124) * Removed "ignore" rule for ResourceWarning about an unclosed file from the warnings filter. * Fixed some causes of ResourceWarnings about unclosed files. * - Fixed some causes of ResourceWarnings about unclosed files. - Corrected plenum.common.txn_util.updateGenesisPoolTxnFile function. * - Fixed the rest causes of ResourceWarnings about unclosed files. - Removed TimeAndSizeRotatingFileHandler class which is not used anymore (the class with the same name from stp is now used instead). * Updated stp-dev dependency to the new version. * Reverted update of stp-dev dependency. * Skipped the tests in plenum.test.test_log_rotation module since they require stp-dev dependency of a newer version (0.1.28 or higher). * Agent generalization (#126) * refactored test_network_setup; added some role-base helper classes for members of the network * fixed issue with wrong import and minor refactoring * more work on tests * bugfixes * interim changes during refactoring * adding init_utils * cleaning up before keys * removed type hint causing issues; this issue is fixed in Python 3.5.3 with a new typing.Coroutine * added a check in DidVerifier to check that we have a verkey * fixed unreliable tmpdir test fixture * modifications to tests to make more robust when running from command line * changed spyable to be able to monkey patch a single method * Added a run_till_quiet, waits for quiet prods This function will wait till there is a quiet period from the prodables. Dependent on prodables correctly reporting events. Will run forever if there is always activity. * small tweek to log format to keep | alined * changes to fix some issues happening in end-to-end test * using same seed for creating keys * ignore log files * added a small test for verifier * removed some excpetion class that were moved to STP * init Local keys for client in test network * updated to use new API from shared lib * Enabled the tests in plenum.test.test_log_rotation module. (#135) * check for close method before calling it (#136) * fix testNumOf*WithFPlusOneFaults tests * Updated stp dep * added missed __init__.py for common/member * add missing parameter to bootstrap test network (#143) * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * fix the catchup tests (#140) * Close SOV-976 (#144) * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * [Closes SOV-976] Unable to create the genesis transaction files * up stp to 0.1.42 (#148) * [Closes SOV-981] Bug Fixed: Unable to create pool transaction file on the client machine without passing it a node number (#149) * fix generation of trustee txns * fix generation of trustee txns (#151) * fix Signed-off-by: Andrei Goncharov Sign-off-executed-by: toktar Approved-at: h-master --- .gitignore | 3 + Jenkinsfile | 20 +- plenum/client/client.py | 3 +- plenum/common/exceptions.py | 37 +++ plenum/common/keygen_utils.py | 12 +- plenum/common/test_network_setup.py | 297 ++++++++++-------- plenum/common/util.py | 8 +- plenum/persistence/orientdb_store.py | 25 +- plenum/server/node.py | 3 +- plenum/test/conftest.py | 4 +- ..._node_reject_invalid_txn_during_catchup.py | 1 - .../test_node_request_consistency_proof.py | 1 - ...m_of_pre_prepare_with_f_plus_one_faults.py | 1 - ...t_num_of_prepare_with_f_plus_one_faults.py | 1 - .../test/script/test_bootstrap_test_node.py | 13 +- plenum/test/test_client.py | 4 +- plenum/test/test_log_rotation.py | 2 + plenum/test/test_node.py | 10 +- plenum/test/waits.py | 4 + 19 files changed, 282 insertions(+), 167 deletions(-) diff --git a/.gitignore b/.gitignore index 8f7c08978f..81c1cf8e8a 100644 --- a/.gitignore +++ b/.gitignore @@ -68,3 +68,6 @@ docs/source/api_docs/ # hidden files .* + +# log files +*.log diff --git a/Jenkinsfile b/Jenkinsfile index 7bac441e32..d72f2688c7 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -16,16 +16,10 @@ def testUbuntu = { testEnv.inside('--network host') { echo 'Ubuntu Test: Install dependencies' - testHelpers.installDeps() + testHelpers.install() echo 'Ubuntu Test: Test' - def resFile = "test-result.${NODE_NAME}.txt" - try { - sh "python runner.py --pytest \"python -m pytest\" --output \"$resFile\"" - } - finally { - archiveArtifacts allowEmptyArchive: true, artifacts: "$resFile" - } + testHelpers.testRunner(resFile: "test-result.${NODE_NAME}.txt") } } finally { @@ -71,16 +65,10 @@ def testWindowsNoDocker = { testHelpers.createVirtualEnvAndExecute({ python, pip -> echo 'Windows No Docker Test: Install dependencies' - testHelpers.installDepsBat(python, pip) + testHelpers.install(python: python, pip: pip, isVEnv: true) echo 'Windows No Docker Test: Test' - def resFile = "test-result.${NODE_NAME}.txt" - try { - bat "${python} runner.py --pytest \"${python} -m pytest\" --output \"$resFile\"" - } - finally { - archiveArtifacts allowEmptyArchive: true, artifacts: "$resFile" - } + testHelpers.testRunner(resFile: "test-result.${NODE_NAME}.txt", python: python) }) } finally { diff --git a/plenum/client/client.py b/plenum/client/client.py index c936b74c6a..eec7d4fd7e 100644 --- a/plenum/client/client.py +++ b/plenum/client/client.py @@ -76,7 +76,7 @@ def __init__(self, # TODO: Have a way for a client to have a user friendly name. Does it # matter now, it used to matter in some CLI exampples in the past. # self.name = name - self.name = self.stackName + self.name = self.stackName or 'Client~' + str(id(self)) cha = None # If client information already exists is RAET then use that @@ -127,6 +127,7 @@ def __init__(self, self.created = time.perf_counter() # noinspection PyCallingNonCallable + # TODO I think this is a bug here, sighex is getting passed in the seed parameter self.nodestack = self.nodeStackClass(stackargs, self.handleOneNodeMsg, self.nodeReg, diff --git a/plenum/common/exceptions.py b/plenum/common/exceptions.py index 74b67cec5d..36324b4de8 100644 --- a/plenum/common/exceptions.py +++ b/plenum/common/exceptions.py @@ -1,3 +1,4 @@ +from plenum.server.suspicion_codes import Suspicion from re import compile from plenum.server.suspicion_codes import Suspicion @@ -9,6 +10,23 @@ def __init__(self, identifier=None, reqId=None): self.reqId = reqId +class NodeError(Exception): + pass + + +class PortNotAvailableForNodeWebServer(NodeError): + pass + + +class RemoteError(NodeError): + def __init__(self, remote): + self.remote = remote + + +class RemoteNotFound(RemoteError): + pass + + class BaseExc(Exception): # def __init__(self, code: int=None, reason: str=None): # self.code = code @@ -212,6 +230,25 @@ class OrientDBNotRunning(GraphStorageNotAvailable): pass +class WalletError(Exception): + pass + + +class WalletNotSet(WalletError): + pass + + +class WalletNotInitialized(WalletError): + pass + + +class PortNotAvailable(OSError): + def __init__(self, port): + self.port = port + super().__init__("port not available: {}".format(port)) + + class OperationError(Exception): def __init__(self, error): super().__init__("error occurred during operation: {}".format(error)) + diff --git a/plenum/common/keygen_utils.py b/plenum/common/keygen_utils.py index 85a1dc8e34..be307ffda2 100644 --- a/plenum/common/keygen_utils.py +++ b/plenum/common/keygen_utils.py @@ -1,6 +1,7 @@ import os from plenum.common.stacks import nodeStackClass +from stp_core.crypto.util import randomSeed from stp_zmq.util import createCertsFromKeys from plenum.common.constants import CLIENT_STACK_SUFFIX @@ -19,12 +20,15 @@ def initRemoteKeys(name, baseDir, sigseed, verkey, override=False, config=None): override=override) - def initNodeKeysForBothStacks(name, baseDir, sigseed, override=False, config=None): - nodeStackClass.initLocalKeys(name, baseDir, sigseed, override=override) - nodeStackClass.initLocalKeys(name + CLIENT_STACK_SUFFIX, baseDir, sigseed, - override=override) + # `sigseed` is initailsed to keep the seed same for both stacks. + # Both node and client stacks need to have same keys + sigseed = sigseed or randomSeed() + nodeStackClass.initLocalKeys(name + CLIENT_STACK_SUFFIX, baseDir, sigseed, + override=override) + return nodeStackClass.initLocalKeys(name, baseDir, sigseed, + override=override) def areKeysSetup(name, baseDir, config=None): diff --git a/plenum/common/test_network_setup.py b/plenum/common/test_network_setup.py index dee8d86ad9..9ddb86e938 100644 --- a/plenum/common/test_network_setup.py +++ b/plenum/common/test_network_setup.py @@ -1,19 +1,19 @@ import argparse import os -from hashlib import sha256 +from collections import namedtuple + +from ledger.ledger import Ledger from ledger.serializers.compact_serializer import CompactSerializer from stp_core.crypto.nacl_wrappers import Signer from ledger.compact_merkle_tree import CompactMerkleTree -from ledger.ledger import Ledger +from plenum.common.member.member import Member +from plenum.common.member.steward import Steward from plenum.common.keygen_utils import initLocalKeys -from plenum.common.constants import TARGET_NYM, TXN_TYPE, DATA, ALIAS, \ - TXN_ID, NODE, CLIENT_IP, CLIENT_PORT, NODE_IP, NODE_PORT, CLIENT_STACK_SUFFIX, NYM, \ - STEWARD, ROLE, SERVICES, VALIDATOR, TRUSTEE -from plenum.common.types import f -from plenum.common.util import hexToFriendly +from plenum.common.constants import STEWARD, CLIENT_STACK_SUFFIX, TRUSTEE +from plenum.common.util import hexToFriendly, adict class TestNetworkSetup: @@ -46,142 +46,115 @@ def writeNodeParamsFile(filePath, name, nPort, cPort): with open(filePath, 'w') as f: f.writelines(os.linesep.join(contents)) - @staticmethod - def bootstrapTestNodesCore(config, envName, appendToLedgers, - domainTxnFieldOrder, - ips, nodeCount, clientCount, - nodeNum, startingPort, nodeParamsFileName): - baseDir = config.baseDir - if not os.path.exists(baseDir): - os.makedirs(baseDir, exist_ok=True) + @classmethod + def bootstrapTestNodesCore(cls, config, envName, appendToLedgers, + domainTxnFieldOrder, trustee_def, steward_defs, + node_defs, client_defs, localNodes, nodeParamsFileName): - localNodes = not ips + if not localNodes: + localNodes = {} + try: + if isinstance(localNodes, int): + _localNodes = {localNodes} + else: + _localNodes = {int(_) for _ in localNodes} + except BaseException as exc: + raise RuntimeError('nodeNum must be an int or set of ints') from exc - if localNodes: - ips = ['127.0.0.1'] * nodeCount - else: - ips = ips.split(",") - if len(ips) != nodeCount: - if len(ips) > nodeCount: - ips = ips[:nodeCount] - else: - ips += ['127.0.0.1'] * (nodeCount - len(ips)) + baseDir = cls.setup_base_dir(config) - if hasattr(config, "ENVS") and envName: - poolTxnFile = config.ENVS[envName].poolLedger - domainTxnFile = config.ENVS[envName].domainLedger - else: - poolTxnFile = config.poolTransactionsFile - domainTxnFile = config.domainTransactionsFile + poolLedger = cls.init_pool_ledger(appendToLedgers, baseDir, config, + envName) - poolLedger = Ledger(CompactMerkleTree(), - dataDir=baseDir, - fileName=poolTxnFile) + domainLedger = cls.init_domain_ledger(appendToLedgers, baseDir, config, + envName, domainTxnFieldOrder) - domainLedger = Ledger(CompactMerkleTree(), - serializer=CompactSerializer(fields= - domainTxnFieldOrder), - dataDir=baseDir, - fileName=domainTxnFile) + trustee_txn = Member.nym_txn(trustee_def.nym, trustee_def.name, role=TRUSTEE) + domainLedger.add(trustee_txn) - if not appendToLedgers: - poolLedger.reset() - domainLedger.reset() - - trusteeName = "Trustee1" - sigseed = TestNetworkSetup.getSigningSeed(trusteeName) - verkey = Signer(sigseed).verhex - trusteeNym = TestNetworkSetup.getNymFromVerkey(verkey) - txn = { - TARGET_NYM: trusteeNym, - TXN_TYPE: NYM, - # TODO: Trustees dont exist in Plenum, but only in Sovrin. - # This should be moved to Sovrin - ROLE: TRUSTEE, - ALIAS: trusteeName, - TXN_ID: sha256(trusteeName.encode()).hexdigest() - } - domainLedger.add(txn) - - steward1Nym = None - for num in range(1, nodeCount + 1): - stewardName = "Steward" + str(num) - sigseed = TestNetworkSetup.getSigningSeed(stewardName) - verkey = Signer(sigseed).verhex - stewardNym = TestNetworkSetup.getNymFromVerkey(verkey) - txn = { - TARGET_NYM: stewardNym, - TXN_TYPE: NYM, - ROLE: STEWARD, - ALIAS: stewardName, - TXN_ID: sha256(stewardName.encode()).hexdigest() - } - if num == 1: - steward1Nym = stewardNym - else: - # The first steward adds every steward - txn[f.IDENTIFIER.nm] = steward1Nym - domainLedger.add(txn) + for sd in steward_defs: + nym_txn = Member.nym_txn(sd.nym, sd.name, role=STEWARD, + creator=trustee_def.nym) + domainLedger.add(nym_txn) - nodeName = "Node" + str(num) - nodePort, clientPort = startingPort + (num * 2 - 1), startingPort \ - + (num * 2) - ip = ips[num - 1] - sigseed = TestNetworkSetup.getSigningSeed(nodeName) - if nodeNum == num: - _, verkey = initLocalKeys(nodeName, baseDir, sigseed, True, - config=config) - _, verkey = initLocalKeys(nodeName+CLIENT_STACK_SUFFIX, baseDir, - sigseed, True, config=config) + for nd in node_defs: + + if nd.idx in _localNodes: + _, verkey = initLocalKeys(nd.name, baseDir, + nd.sigseed, True, config=config) + _, verkey = initLocalKeys(nd.name+CLIENT_STACK_SUFFIX, baseDir, + nd.sigseed, True, config=config) verkey = verkey.encode() - print("This node with name {} will use ports {} and {} for " - "nodestack and clientstack respectively" - .format(nodeName, nodePort, clientPort)) + assert verkey == nd.verkey - if not localNodes: + if nd.ip != '127.0.0.1': paramsFilePath = os.path.join(baseDir, nodeParamsFileName) print('Nodes will not run locally, so writing ' '{}'.format(paramsFilePath)) TestNetworkSetup.writeNodeParamsFile( - paramsFilePath, nodeName, nodePort, clientPort) + paramsFilePath, nd.name, nd.port, nd.client_port) + print("This node with name {} will use ports {} and {} for " + "nodestack and clientstack respectively" + .format(nd.name, nd.port, nd.client_port)) else: - verkey = Signer(sigseed).verhex - txn = { - TARGET_NYM: TestNetworkSetup.getNymFromVerkey(verkey), - TXN_TYPE: NODE, - f.IDENTIFIER.nm: stewardNym, - DATA: { - CLIENT_IP: ip, - ALIAS: nodeName, - CLIENT_PORT: clientPort, - NODE_IP: ip, - NODE_PORT: nodePort, - SERVICES: [VALIDATOR] - }, - TXN_ID: sha256(nodeName.encode()).hexdigest() - } - poolLedger.add(txn) - - for num in range(1, clientCount + 1): - clientName = "Client" + str(num) - sigseed = TestNetworkSetup.getSigningSeed(clientName) - verkey = Signer(sigseed).verhex - txn = { - f.IDENTIFIER.nm: steward1Nym, - TARGET_NYM: TestNetworkSetup.getNymFromVerkey(verkey), - TXN_TYPE: NYM, - ALIAS: clientName, - TXN_ID: sha256(clientName.encode()).hexdigest() - } + verkey = nd.verkey + node_nym = cls.getNymFromVerkey(verkey) + + node_txn = Steward.node_txn(nd.steward_nym, nd.name, node_nym, + nd.ip, nd.port, nd.client_port) + poolLedger.add(node_txn) + + for cd in client_defs: + txn = Member.nym_txn(cd.nym, cd.name, creator=trustee_def.nym) domainLedger.add(txn) poolLedger.stop() domainLedger.stop() - @staticmethod - def bootstrapTestNodes(config, startingPort, nodeParamsFileName, + @classmethod + def init_pool_ledger(cls, appendToLedgers, baseDir, config, envName): + poolTxnFile = cls.pool_ledger_file_name(config, envName) + pool_ledger = Ledger(CompactMerkleTree(), dataDir=baseDir, + fileName=poolTxnFile) + if not appendToLedgers: + pool_ledger.reset() + return pool_ledger + + @classmethod + def init_domain_ledger(cls, appendToLedgers, baseDir, config, envName, domainTxnFieldOrder): + domainTxnFile = cls.domain_ledger_file_name(config, envName) + ser = CompactSerializer(fields=domainTxnFieldOrder) + domain_ledger = Ledger(CompactMerkleTree(), serializer=ser, + dataDir=baseDir, fileName=domainTxnFile) + if not appendToLedgers: + domain_ledger.reset() + return domain_ledger + + @classmethod + def pool_ledger_file_name(cls, config, envName): + if hasattr(config, "ENVS") and envName: + return config.ENVS[envName].poolLedger + else: + return config.poolTransactionsFile + + @classmethod + def domain_ledger_file_name(cls, config, envName): + if hasattr(config, "ENVS") and envName: + return config.ENVS[envName].domainLedger + else: + return config.domainTransactionsFile + + @classmethod + def setup_base_dir(cls, config): + baseDir = config.baseDir + if not os.path.exists(baseDir): + os.makedirs(baseDir, exist_ok=True) + return baseDir + + @classmethod + def bootstrapTestNodes(cls, config, startingPort, nodeParamsFileName, domainTxnFieldOrder): parser = argparse.ArgumentParser( description="Generate pool transactions for testing") @@ -231,8 +204,78 @@ def bootstrapTestNodes(config, startingPort, nodeParamsFileName, assert nodeNum <= nodeCount, "nodeNum should be less than equal " \ "to nodeCount" - TestNetworkSetup.bootstrapTestNodesCore(config, envName, appendToLedgers, - domainTxnFieldOrder, - ips, nodeCount, clientCount, - nodeNum, startingPort, - nodeParamsFileName) + steward_defs, node_defs = cls.gen_defs(ips, nodeCount, startingPort) + client_defs = cls.gen_client_defs(clientCount) + trustee_def = cls.gen_trustee_def(1) + cls.bootstrapTestNodesCore(config, envName, appendToLedgers, + domainTxnFieldOrder, trustee_def, + steward_defs, node_defs, client_defs, + nodeNum, nodeParamsFileName) + + @classmethod + def gen_defs(cls, ips, nodeCount, starting_port): + """ + Generates some default steward and node definitions for tests + :param ips: array of ip addresses + :param nodeCount: number of stewards/nodes + :param starting_port: ports are assigned incremental starting with this + :return: duple of steward and node definitions + """ + if not ips: + ips = ['127.0.0.1'] * nodeCount + else: + ips = ips.split(",") + if len(ips) != nodeCount: + if len(ips) > nodeCount: + ips = ips[:nodeCount] + else: + ips += ['127.0.0.1'] * (nodeCount - len(ips)) + + steward_defs = [] + node_defs = [] + for i in range(1, nodeCount + 1): + d = adict() + d.name = "Steward" + str(i) + s_sigseed = cls.getSigningSeed(d.name) + s_verkey = Signer(s_sigseed).verhex + d.nym = cls.getNymFromVerkey(s_verkey) + steward_defs.append(d) + + name = "Node" + str(i) + sigseed = cls.getSigningSeed(name) + node_defs.append(NodeDef( + name=name, + ip=ips[i - 1], + port=starting_port + (i * 2) - 1, + client_port=starting_port + (i * 2), + idx=i, + sigseed=sigseed, + verkey=Signer(sigseed).verhex, + steward_nym=d.nym)) + return steward_defs, node_defs + + @classmethod + def gen_client_def(cls, idx): + d = adict() + d.name = "Client" + str(idx) + d.sigseed = cls.getSigningSeed(d.name) + d.verkey = Signer(d.sigseed).verhex + d.nym = cls.getNymFromVerkey(d.verkey) + return d + + @classmethod + def gen_client_defs(cls, clientCount): + return [cls.gen_client_def(idx) for idx in range(1, clientCount + 1)] + + @classmethod + def gen_trustee_def(cls, idx): + d = adict() + d.name = 'Trustee' + str(idx) + d.sigseed = cls.getSigningSeed(d.name) + d.verkey = Signer(d.sigseed).verhex + d.nym = cls.getNymFromVerkey(d.verkey) + return d + + +NodeDef = namedtuple('NodeDef', 'name, ip, port, client_port, ' + 'idx, sigseed, verkey, steward_nym') diff --git a/plenum/common/util.py b/plenum/common/util.py index f4ea48bbe8..906d9eaa74 100644 --- a/plenum/common/util.py +++ b/plenum/common/util.py @@ -240,6 +240,12 @@ def __getitem__(self, key): super(adict, self).__setitem__(key, found) return found + def copy(self): + return self.__copy__() + + def __copy__(self): + return adict(**self) + __setattr__ = __setitem__ __getattr__ = __getitem__ @@ -520,4 +526,4 @@ def getLastModifiedTime(file): filePattern = "*.{}".format(WALLET_FILE_EXTENSION) newest = max(glob.iglob('{}/{}'.format(dir, filePattern)), key=getLastModifiedTime) - return basename(newest) \ No newline at end of file + return basename(newest) diff --git a/plenum/persistence/orientdb_store.py b/plenum/persistence/orientdb_store.py index b9010d9968..a2ced3525e 100644 --- a/plenum/persistence/orientdb_store.py +++ b/plenum/persistence/orientdb_store.py @@ -14,8 +14,7 @@ def __init__(self, user, password, dbName, host="localhost", port=2424, storageType=pyorient.STORAGE_TYPE_MEMORY): self.dbType = dbType try: - self.client = pyorient.OrientDB(host=host, port=port) - self.session_id = self.client.connect(user, password) + self.client = self.new_orientdb_client(host, port, user, password) except pyorient.exceptions.PyOrientConnectionException: raise OrientDBNotRunning("OrientDB connection failed. Check if DB is running " "on port {}".format(port)) @@ -83,6 +82,28 @@ def getPlaceHolderQueryStringFromDict(args: Dict, joiner=", "): items.append(("{} = " + valPlaceHolder).format(key, val)) return joiner.join(items) + @staticmethod + def new_orientdb_client(host, port, user, password): + client = pyorient.OrientDB(host=host, port=port) + session_id = client.connect(user, password) + assert session_id, 'Problem with connecting to OrientDB' + return client + + def wipe(self): + """ + IMPORTANT: this is destructive; use at your own risk + """ + assert self.client._connection, 'Client must be connected to the db' + self.wipe_db(self.client, self.client._connection.db_opened) + + @staticmethod + def wipe_db(client, dbName): + try: + client.db_drop(dbName) + logger.debug("Dropped db {}".format(dbName)) + except Exception as ex: + logger.debug("Error while dropping db {}: {}".format(dbName, ex)) + def close(self): if self.client._connection.connected: self.client.db_close(self.client._connection.db_opened) diff --git a/plenum/server/node.py b/plenum/server/node.py index 44f8c27dc9..dca87a06dd 100644 --- a/plenum/server/node.py +++ b/plenum/server/node.py @@ -559,7 +559,8 @@ def onStopping(self): 'primary storage: {}'.format(self, ex)) try: - self.secondaryStorage.close() + if callable(getattr(self.secondaryStorage, 'close', None)): + self.secondaryStorage.close() except Exception as ex: logger.warning('{} got exception while closing ' 'secondary storage: {}'.format(self, ex)) diff --git a/plenum/test/conftest.py b/plenum/test/conftest.py index 2477c216d6..2fbc854e58 100644 --- a/plenum/test/conftest.py +++ b/plenum/test/conftest.py @@ -16,6 +16,7 @@ import pip import pytest from plenum.common.keygen_utils import initNodeKeysForBothStacks +from stp_core.common.logging.handlers import TestingHandler from stp_core.crypto.util import randomSeed from stp_core.network.port_dispenser import genHa from stp_core.types import HA @@ -28,7 +29,6 @@ from stp_core.loop.eventually import eventually, eventuallyAll from plenum.common.exceptions import BlowUp from stp_core.common.log import getlogger -from stp_core.common.logging.handlers import TestingHandler from stp_core.loop.looper import Looper, Prodable from plenum.common.constants import TXN_TYPE, DATA, NODE, ALIAS, CLIENT_PORT, \ CLIENT_IP, NODE_PORT, NYM, CLIENT_STACK_SUFFIX, PLUGIN_BASE_DIR_PATH @@ -350,7 +350,7 @@ def reqAcked1(looper, nodeSet, client1, sent1, faultyNodes): numerOfNodes = len(nodeSet) # Wait until request received by all nodes - propTimeout = waits.expectedClientRequestPropagationTime(numerOfNodes) + propTimeout = waits.expectedClientToNodeMessageDeliveryTime(numerOfNodes) coros = [partial(checkLastClientReqForNode, node, sent1) for node in nodeSet] looper.run(eventuallyAll(*coros, diff --git a/plenum/test/node_catchup/test_node_reject_invalid_txn_during_catchup.py b/plenum/test/node_catchup/test_node_reject_invalid_txn_during_catchup.py index 61c1ababda..7a8db89dc4 100644 --- a/plenum/test/node_catchup/test_node_reject_invalid_txn_during_catchup.py +++ b/plenum/test/node_catchup/test_node_reject_invalid_txn_during_catchup.py @@ -21,7 +21,6 @@ txnCount = 10 -@pytest.mark.skip(reason='fails, https://evernym.atlassian.net/browse/SOV-928') def testNodeRejectingInvalidTxns(txnPoolNodeSet, nodeCreatedAfterSomeTxns): """ A newly joined node is catching up and sends catchup requests to other diff --git a/plenum/test/node_catchup/test_node_request_consistency_proof.py b/plenum/test/node_catchup/test_node_request_consistency_proof.py index b77904709d..67209dd14c 100644 --- a/plenum/test/node_catchup/test_node_request_consistency_proof.py +++ b/plenum/test/node_catchup/test_node_request_consistency_proof.py @@ -20,7 +20,6 @@ logger = getlogger() -@pytest.mark.skip(reason='fails, https://evernym.atlassian.net/browse/SOV-928') def testNodeRequestingConsProof(txnPoolNodeSet, nodeCreatedAfterSomeTxns): """ All of the 4 old nodes delay the processing of LEDGER_STATUS from the newly diff --git a/plenum/test/node_request/test_pre_prepare/test_num_of_pre_prepare_with_f_plus_one_faults.py b/plenum/test/node_request/test_pre_prepare/test_num_of_pre_prepare_with_f_plus_one_faults.py index 44565233ca..e070a3b546 100644 --- a/plenum/test/node_request/test_pre_prepare/test_num_of_pre_prepare_with_f_plus_one_faults.py +++ b/plenum/test/node_request/test_pre_prepare/test_num_of_pre_prepare_with_f_plus_one_faults.py @@ -55,7 +55,6 @@ def preprepared1WithDelay(looper, nodeSet, propagated1, faultyNodes): timeout=timeouts) -@pytest.mark.skip(reason='SOV-944') def testNumOfPrePrepareWithFPlusOneFaults( afterElection, noRetryReq, diff --git a/plenum/test/node_request/test_prepare/test_num_of_prepare_with_f_plus_one_faults.py b/plenum/test/node_request/test_prepare/test_num_of_prepare_with_f_plus_one_faults.py index d98a1f535a..ae5b467409 100644 --- a/plenum/test/node_request/test_prepare/test_num_of_prepare_with_f_plus_one_faults.py +++ b/plenum/test/node_request/test_prepare/test_num_of_prepare_with_f_plus_one_faults.py @@ -48,6 +48,5 @@ def preprepared1WithDelay(looper, nodeSet, propagated1, faultyNodes): timeout=timeouts) -@pytest.mark.skip(reason='SOV-944') def testNumOfPrepareWithFPlusOneFaults(afterElection, noRetryReq, preprepared1WithDelay): pass diff --git a/plenum/test/script/test_bootstrap_test_node.py b/plenum/test/script/test_bootstrap_test_node.py index 644818703b..7d9e598510 100644 --- a/plenum/test/script/test_bootstrap_test_node.py +++ b/plenum/test/script/test_bootstrap_test_node.py @@ -1,13 +1,22 @@ from plenum.common.test_network_setup import TestNetworkSetup from plenum.common.txn_util import getTxnOrderedFields +from plenum.common.util import randomString portsStart = 9600 def testBootstrapTestNode(tconf): # TODO: Need to add some asserts + steward_defs, node_defs = TestNetworkSetup.gen_defs( + ips=None, nodeCount=4, starting_port=portsStart) + + client_defs = TestNetworkSetup.gen_client_defs(clientCount=1) + trustee_def = TestNetworkSetup.gen_trustee_def(1) + nodeParamsFile = randomString() + TestNetworkSetup.bootstrapTestNodesCore( config=tconf, envName="test", appendToLedgers=False, domainTxnFieldOrder=getTxnOrderedFields(), - ips=None, nodeCount=4, clientCount=1, - nodeNum=1, startingPort=portsStart, nodeParamsFileName='plenum.env') + trustee_def=trustee_def, steward_defs=steward_defs, + node_defs=node_defs, client_defs=client_defs, localNodes=1, + nodeParamsFileName=nodeParamsFile) diff --git a/plenum/test/test_client.py b/plenum/test/test_client.py index 4d50fc45e7..89f2e76fda 100644 --- a/plenum/test/test_client.py +++ b/plenum/test/test_client.py @@ -14,14 +14,14 @@ from plenum.common.types import f from plenum.common.util import bootstrapClientKeys from plenum.test.test_stack import StackedTester, getTestableStack -from plenum.test.testable import Spyable +from plenum.test.testable import spyable from plenum.common.constants import OP_FIELD_NAME logger = getlogger() -@Spyable(methods=[Client.handleOneNodeMsg, Client.resendRequests]) +@spyable(methods=[Client.handleOneNodeMsg, Client.resendRequests]) class TestClient(Client, StackedTester): def __init__(self, *args, **kwargs): self.NodeStackClass = nodeStackClass diff --git a/plenum/test/test_log_rotation.py b/plenum/test/test_log_rotation.py index 5b4bb47502..fb5deefd16 100644 --- a/plenum/test/test_log_rotation.py +++ b/plenum/test/test_log_rotation.py @@ -2,10 +2,12 @@ import logging import shutil import time +import pytest from stp_core.common.logging.TimeAndSizeRotatingFileHandler \ import TimeAndSizeRotatingFileHandler + def cleanFolder(path): if os.path.exists(path): shutil.rmtree(path, ignore_errors=True) diff --git a/plenum/test/test_node.py b/plenum/test/test_node.py index ed6e67c808..d4e318d057 100644 --- a/plenum/test/test_node.py +++ b/plenum/test/test_node.py @@ -39,7 +39,7 @@ from plenum.test.test_ledger_manager import TestLedgerManager from plenum.test.test_stack import StackedTester, getTestableStack, CONNECTED, \ checkRemoteExists, RemoteState, checkState -from plenum.test.testable import Spyable +from plenum.test.testable import spyable from plenum.test import waits logger = getlogger() @@ -185,7 +185,7 @@ def ensureKeysAreSetup(self): pass -@Spyable(methods=[Node.handleOneNodeMsg, +@spyable(methods=[Node.handleOneNodeMsg, Node.handleInvalidClientMsg, Node.processRequest, Node.processOrdered, @@ -248,7 +248,7 @@ def _serviceActions(self): return super()._serviceActions() -@Spyable(methods=[replica.Replica.doPrePrepare, +@spyable(methods=[replica.Replica.doPrePrepare, replica.Replica.canProcessPrePrepare, replica.Replica.canSendPrepare, replica.Replica.isValidPrepare, @@ -395,7 +395,7 @@ def getAllMsgReceived(self, node: NodeRef, method: str = None) -> Tuple: return getAllMsgReceivedForNode(self.getNode(node), method) -@Spyable(methods=[Monitor.isMasterThroughputTooLow, +@spyable(methods=[Monitor.isMasterThroughputTooLow, Monitor.isMasterReqLatencyTooHigh, Monitor.sendThroughput, Monitor.requestOrdered, @@ -740,4 +740,4 @@ def getNonPrimaryReplicas(nodes: Iterable[TestNode], instId: int = 0) -> \ def getAllReplicas(nodes: Iterable[TestNode], instId: int = 0) -> \ Sequence[TestReplica]: - return [node.replicas[instId] for node in nodes] \ No newline at end of file + return [node.replicas[instId] for node in nodes] diff --git a/plenum/test/waits.py b/plenum/test/waits.py index 680340c1a4..f0301f9ca4 100644 --- a/plenum/test/waits.py +++ b/plenum/test/waits.py @@ -98,6 +98,10 @@ def expectedClientConnectionTimeout(fVal): return 3 * fVal +def expectedClientToNodeMessageDeliveryTime(nodeCount): + return 1 * nodeCount + + def expectedClientRequestPropagationTime(nodeCount): return int(2.5 * nodeCount) From 91d90f0abe3167bec88789344c85c31656355ea8 Mon Sep 17 00:00:00 2001 From: Andrei Goncharov Date: Thu, 1 Jun 2017 14:00:26 +0300 Subject: [PATCH 008/100] Stable release (#189) * changes for stp * removing redundant close in orientdb socket * adding tests for catchup scenario and 3 phase message outside water mark * fixing import in test * Fix 'infinit' looping on receiving messages when their amount is very high (#97) * fix 'infinit' looping on receiving messages when their amount is very high * move quotas to config * make zstack get params from config only once * add test for quota * increase message quotas to 100 * Feature Added: Jenkins (#99) * Moved util methods to NetwrorkInterfaces; fixes in raet stack * fix key generation * Hotfix: Deps (#101) * - Added a custom jsonpickle handler for enums to avoid the issue https://github.com/jsonpickle/jsonpickle/issues/135 in jsonpickle 0.9.2 which is the canonical version for Ubuntu 16.04. (#98) - Corrected Cli.restoreWalletByPath method. * Merge branches 'stp' and 'zmq' of github.com:evernym/plenum into stp * handling infinite loop problem and fixing test delay * fixing import * bugfix in test * raising timeout * merge * change in ordered * move connection related exceptions to stp project * fix imports of moved exceptions * fixed dependencies * fixing name * temporarily disabling tests on Windows * renamed init_plenum_raet_keep to init_plenum_keys * removing unused code and changing script mode * Introduced failing of the pytets session in case any unexpected warnings have been made (#104) * changes to troubleshoot testing for coro not awaited * added support for warning checking across all tests * fixed scope for warnfilters fixture * - Updated the warnings filter. Made the rules for ResourceWarning: "unclosed file", ResourceWarning: "unclosed.*socket\.socket" and RuntimeWarning: "coroutine.*was never awaited" global since they are made by the garbage collector in unpredictable points. - Sorted the rules in the warnings filter alphabetically. * Specialized the warnings filter rule for RuntimeWarning about a coroutine was never awaited (to "msgAll" coroutine). * Added an explicit call of the garbage collector to the tear-down phase of "warncheck" fixture in order to ensure that warnings will be generated and recorded for all the unclosed resources. * Fixed the issue with that a coroutine "msgAll" object was never awaited. Removed the corresponding "ignore" rule from the warnings filter. * - Removed the rules not being actuated currently from the warnings filter. - Replaced use of line numbers with use of message patterns in the warnings filter rules. * Corrected the message pattern for a rule from the warnings filter. * Added an "ignore" rule for ResourceWarning: "unclosed event loop" to the warnings filter. * Returned back the warnings filter rules for DeprecationWarnings in jsonpickle package. Replaced use of line numbers with use of message patterns in these rules. * adding warning * removing looper and checking while closing orientdb * increasing a timeout * replacing warn with warning methods * fixed stp dependency * fixed tests * skip failing test, created SOV-881. * adding load script * updating setup * changing logs to print alias * setting resource limit for tests * handling exception while raising resource limits * moved wallet persistence and restoration logic to helper methods so that it can be re-utilized from different places, some other minor refactoring (#106) * moved wallet persistence and restoration logic to helper methods so that it can be re-utilized from different places, some other minor refactoring * removed unwanted error code, refactored list keyrings command so that it correctly works for agent cli * removed unused imports * making CLI accept hex seed of size 64 * move logging to stp repo * mitigating key error in pool manager * changing timeouts so the diconnect check happens twice * handling unavailabily of resource module * some corrects to gc * setting the lastOrdered sequence number of the replica after election * fixing obsolete imports * adding temporary info logs to show up during load testing, problem identified for PREPREPARE getting rejects, state tree has different root at beginning for different nodes * reverting config changes * using ordered json encoder * add declarations of common request handling methods to ReqHandler * refactor method commit * add docstring for ReqHandler, rearrange methods * rename *ReqHandlers to *RequestHandlers * add TODOs about fixing hierarchy of State and PruningState * rename validateReq to validate * add config=None argument to validate method to make sub and super signatures match * rename applyReq to apply * add super initialization, update usage of super methods * update docstring of commit * remove commitReq since it does the same as commit * adding timeout to test utility method * changes for load testing * sanitising log messages * Versioning fix (#113) * changes for request handlers * [Closes SOV-905] Big Fixed: Orientdb is not installing with the latest build 0.3.16 * changing to rlp encoding * moving error codes to stp * upgrading version of stp * req_handler changes * fixing import * temp commit * Stp (#116) * use common stp interface for changing of node's HA * get rid of explicitly calling zmq and raet; move all transport stuff into stp interfaces. * fix tests; increment stp version * add AuthMode enum instead of auto parameter * fixed testConnectWithoutKeySharingFails test * increased test timeout * minor changes to serialization * Plenum test runner fix (#115) * Fixed a bug in plenum test runner with an incorrect test results summary pattern. Previously it had been expected that the line with the test results summary must begin with equality signs and end with them. But this line is padded with equality signs only if it is shorter than 80 characters. Now the test results summary patterns don't require equality signs. Instead they may match only the last line in the file. * Corrected test results summary patterns in plenum test runner. * updating apply of request handler * fix import of OP_FIELD_NAME * up version of stp to 0.1.24 * Agent issuer wallet restoration (#117) * supplied keys parameter as True to wallet encoding and decoding functions so that dictionary keys which are custom objects restore properly, otherwise it used to be restored as string oppossed to actual object it was before persistence * minor changes in list keyrings to show issuer wallet for agent as well * minor change to print base wallet first and then any other related wallets * up stp version to 0.1.26 * skip tests in test_new_node_catchup.py * scheduling primary selection * Skip failing tests (#122) * skeip test testChangeNodeHaForNonPrimary due to SOV-941 * skip test testProtocolInstanceCannotBecomeActiveWithLessThanFourServers due to SOV-940 * leveldb HashStore * parametrize test timeouts (#120) * rename expectedWait to expectedNodeInterconnectionTime * add signature for other time expect functions * use named timeouts in conftest * move totalConnections from waits to utils * use named timeout in ensureElectionsDone * use float instead of int for seconds * use default args of ensureElectionsDone where it is possible * use named argument for timeouts * use named timeout in helper * use names for parameters * rename 'timeoutPerReq' of checkSufficientRepliesForRequests and sendReqsToNodesAndVerifySuffReplies to 'customTimeoutPerRequest' to emphasize the fact that there is a default one * use default timeout instead of custom timeout of checkSufficientRepliesForRequests and sendReqsToNodesAndVerifySuffReplies where it can be used; use named argument for timeouts * add comments for two functions with similar names * rename checkSufficientRepliesRecvd to checkSufficientRepliesReceived * rename checkSufficientRepliesForRequests to waitForSufficientRepliesForRequests * add 'returns' to docstrings * fix passing of args to waits.expectedElectionTimeout * use waitForSufficientRepliesForRequests and it's default timeout instead of checkSufficientRepliesReceived everywhere it is possible * update doc lines * create waitForViewChange and use it in ensureView * replace usages of checkViewNoForNodes with eventually by waitForViewChange * add waits.expectedNodeToNodeMessageDeliveryTime and use it in sendMsgAndCheck * rename checkMsg to checkMessageReceived * rename sendMsgAndCheck to sendMessageAndCheckDelivery * add docstring to sendMessageAndCheckDelivery * remove unused helper function * add expectedGetReadyTimeout and use it in checkPoolReady * rename overrideTimeout parameter to customTimeout in checkNodesConnected * use default timeout of checkNodesConnected * create expectedPoolLedgerCheck and expectedClientConnectionTimeout waits and use them * add todo for ensureDisconnectedToNodes * update waits.expectedPoolLedgerCheck * add todo for checkNodesParticipating * add requestIds parameter for waitForSufficientRepliesForRequests * update docstring of waitForSufficientRepliesForRequests * fix waitForSufficientRepliesForRequests * remove unused imports from test_log_filtering * use named timeout in test_status_command * use waits.expectedTransactionExecutionTime in testTransactions * refactor testTransactions * use waitRequestSuccess and waitBalanceChange in test_cli_with_auction_plugin, move them to test.cli.helper * use named timeout in test_basic_client_commands * use named timeout in helper.checkRequest * create waitClientConnected and use it instead of checkClientConnected with eventually * add docstrings * rename checkNodeStarted to waitNodeStarted and use named timeout 'expectedNodeStartUpTimeout' in it * rename expectedGetReadyTimeout to expectedPoolGetReadyTimeout * rename checkAllNodesStarted to waitAllNodesStarted * fix default value of customTimeout of checkPoolReady * create waitAllNodesUp and use it instead of checkAllNodesUp with eventually * create waitReplyCount and use instead of checkReplyCount and eventually * use named timeouts in test_client * use named timeouts in some more tests * add basic implementation for waits.expectedCatchupTime and use it * make expectedCatchupTime get custom ConsistencyProofsTimeout as parameter * use named timeout in testNodeDiscardMessageFromUnknownView * use waits.expectedElectionTimeout and rename timeout arg to custonTimeout in checkEveryNodeHasAtMostOnePrimary * rename timeout argument of plenum/test/node_catchup/test_discard_view_no.py to customTimeout and make it used named timeouts from waits as default * update timeouts in testNodeDiscardMessageFromUnknownView * create waits.expectedRequestStashingTime and use it * add TODO to test_catchup_delayed_nodes * create waitNodeLedgersEquality and use it instead of directo usage of checkNodeLedgersEquality * use waits.expectedPoolLedgerCheck in waitNodeLedgersEquality * use named timeout in testOrderingCase2 * add waits.expectedNominationTimeout and use it * use named timeout in some more tests * add missing empty lines * update waits * add 'TODO[slow-factor]' * update timeouts in the tests * fix testTestNodeDelay and missed import * skip testMultipleRequests test * skip testStatusAfterClientAdded test * fix testInstChangeWithLowerRatioThanDelta * fix test_new_node_catchup.py * fix testChangeHaPersistsPostNodesRestart * fix testAdd2NewNodes * increase expectedElectionTimeout timeout * rename logger.warn => logger.warning * tune timeouts in the tests * make sections in waits.py * add --repat for runner.py * increase expectedCatchupTime timeout * improve runner.py * tune the test timeouts * skip some catchup tests * parametrize test timeouts * rm eventually.py * fix testQueueingReqFromFutureView, testNumOfPrePrepareWithFPlusOneFaults, testNumOfPrepareWithFPlusOneFaults * fix testProtocolInstanceCannotBecomeActiveWithLessThanFourServers * tune propagate and preprepare test timeouts * skip testNumOf*WithFPlusOneFaults * removing commented code and fixing bugs in pool request handler * lowering Delta in pool_transactions conftest * fix jenkins build (#123) * fixed deps in setup.py * added pypi publishing * updated deps * updated deps stp-dev -> stp-perf-imp * removed unnecessary sources * updated setup.py to hold correct deps names and self name for publishing * fixed plenum for the latest stp (#127) * fixed plenum for the latest stp * increment stp version * archive runner.py output for all agents (#107) * archive runner.py results * using env variable NODE_NAME instaed of func param for artifacts * configured archiveArtifacts to allow empty/no archive * sending last pre-prepare sequence number in consistency proof so already processed request can be ignored, re-enabling tests and re-adding proper timeouts * Feature Added: Deb build * Feature Added: Deb build * Do view change if a primary is disconnected (#128) * check whether function is a partial function in getCallableName * add tests for view change when primary goes down * start view change if primary went offline * use startViewChangeIfPrimaryWentOffline instead of doElectionIfNeeded * processing stashed ordered requests after all ledgers have caught up and applying any request if mode changed while processing and removing obsolete code from pool manager * Unclosed file warnings (#124) * Removed "ignore" rule for ResourceWarning about an unclosed file from the warnings filter. * Fixed some causes of ResourceWarnings about unclosed files. * - Fixed some causes of ResourceWarnings about unclosed files. - Corrected plenum.common.txn_util.updateGenesisPoolTxnFile function. * - Fixed the rest causes of ResourceWarnings about unclosed files. - Removed TimeAndSizeRotatingFileHandler class which is not used anymore (the class with the same name from stp is now used instead). * Updated stp-dev dependency to the new version. * Reverted update of stp-dev dependency. * Skipped the tests in plenum.test.test_log_rotation module since they require stp-dev dependency of a newer version (0.1.28 or higher). * refactoring, documentation and updating setup to get rid of pyorient * refactoring * Updated ledger dep * deprecating orientdb * check state equality too while checking ledger equality in tests * removing obsolete test helper method * Agent generalization (#126) * refactored test_network_setup; added some role-base helper classes for members of the network * fixed issue with wrong import and minor refactoring * more work on tests * bugfixes * interim changes during refactoring * adding init_utils * cleaning up before keys * removed type hint causing issues; this issue is fixed in Python 3.5.3 with a new typing.Coroutine * added a check in DidVerifier to check that we have a verkey * fixed unreliable tmpdir test fixture * modifications to tests to make more robust when running from command line * changed spyable to be able to monkey patch a single method * Added a run_till_quiet, waits for quiet prods This function will wait till there is a quiet period from the prodables. Dependent on prodables correctly reporting events. Will run forever if there is always activity. * small tweek to log format to keep | alined * changes to fix some issues happening in end-to-end test * using same seed for creating keys * ignore log files * added a small test for verifier * removed some excpetion class that were moved to STP * init Local keys for client in test network * Add default values for role and verkey in state and fix test timeouts * refactor * import logger * updated to use new API from shared lib * updated to use new API from shared lib * handle pyorient import * handle pyorient import * increment stp version * Enabled the tests in plenum.test.test_log_rotation module. (#135) * increment stp version * close files in tests * check for close method before calling it (#136) * minor fix in batch creation logic and caching primary replica no in node to avoid recomputation * fix testNumOf*WithFPlusOneFaults tests * Updated stp dep * Merge branches '3pc-batch' and 'master' of github.com:evernym/plenum into 3pc-batch # Conflicts: # plenum/common/test_network_setup.py # plenum/server/node.py # plenum/test/test_node.py # setup.py * add missing __init__ for member package * added missed __init__.py for common/member * skip temporarily a failing test * fix primary disconnection and new primary not being same as old for master instance * add more tests for primary disconnection, same primary not elected in the next view and refactor * add new test for minor network glitch with primary * add missing parameter to bootstrap test network (#143) * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * minor change in a test and removing sip statements * fix the catchup tests (#140) * make tests a little robust * fix test, check for disconnections after processing sent and received messages and little documentation * Close SOV-976 (#144) * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * [Closes SOV-976] Unable to create the genesis transaction files * Merge branches '3pc-batch' and 'master' of github.com:evernym/plenum into 3pc-batch # Conflicts: # plenum/test/node_catchup/test_new_node_catchup.py # setup.py * up stp to 0.1.42 (#148) * refactor, enable test, adding timeout for view change and create remotes as soon as nodestack starts * fix broken test * [Closes SOV-981] Bug Fixed: Unable to create pool transaction file on the client machine without passing it a node number (#149) * increment stp * increment ledger * up stp to 0.1.14 * move state and kv storage to 'state' repo * get rid of orientdb code * fix tests * fix generation of trustee txns * fix genesis txns for trustee * fix generation of trustee txns (#151) * remove obsolete code * client to attempt establishing connections with nodes on start * refactor monitor.isMasterThroughputTooLow * update method for forcing view change cna checking it; intoduce wait method for it * use provoke_and_wait_for_view_change instead of eventually and check * remove unused dependency and skip tests - removed unused dependencies - minor fixes - skip tests that fail in master too (we have separate tickets for this) * increment ledger * increment ledger * enable debug logging in tests * using reverted changes in state interface * increment state version * Removed redundant copying of the file "pool_transactions_sandbox" to the directory "~/.plenum". (It is already done using "data_files" argument of "setup" function call.) (#152) * speed up tests (#147) * speed up test_nodes_with_pool_txns.py tests * reduce the timeout in checkNodesConnected * speed up the pool_transactions test * fix the timeout in test_delay.py * fix the timeout in test_primary_election_case5.py * speed up monitoring tests * revert changes in test_instance_change_with_Delta.py * each test's running time is limited by TestRunningTimeLimitSec * increase pool interconnection timeout on zstack retry timeout * fix timeout in test_delay.py * fix tests, make them faster * speedup the tests, tidy test timeouts * add a timeout into waits * bump stp-dev==0.1.43 * Unskip testProtocolInstanceCannotBecomeActiveWithLessThanFourServers since it works well (#153) * unskip testProtocolInstanceCannotBecomeActiveWithLessThanFourServers since it works well * decrease number of nodes in test_instance_cannot_become_active_with_less_than_four_servers to 13 * fix change back HA, SOV-908 (#154) * fix * Fix testChangeNodeHaForNonPrimary (#157) * unskip test_change_non_primary_node_ha * fix waits name - it was renamed * verify genesis ledgers only if such option set in config * use primaryName instead of isPrimary to check that election is done * add numOfReelections parameter to waits.expectedPoolElectionTimeout * refactor test_node.py * set custom election timeout (numOfReelections=3) in changeNodeHa * remove debug prints * check VerKey is base58 for NODE txn, SOV-988 (#158) * client using a method which makes the code more testable, update to an old test with more checks and removing async from a method where it was not needed * Fix test testChangeNodeHaForPrimary (#160) * unskip testChangeNodeHaForPrimary * simplify conditional in changeNodeHa * node which is going down should not participate in a view change * change formating in testChangeNodeHaForPrimary * refactor to make method overridable and adding an argument to a test function * updated pypi package name for state repo and its version (#159) * Squashed commit of the following: create LedgerInfo class and replace collections of LedgerManager by fields * rename LedgerManager.ledgers to ledgerRegistry * fix usages of LedgerManager.ledgerRegistry * use correct attribute and renamed method * bring change from master and unskip a test * use correct attribute and set state to be committed when received from catchup * formatting * up ledger-3pc-batch version to 0.2.16 * up stp-3pc-batch version to 0.1.15 * improve the check of the arguments in "generate_sovrin_pool_transactions" (#162) * check client input for the NODE txn (#161) * fix testInstChangeWithLowerRatioThanDelta - decrease number of messages in a batch * decrease timeout to fix provoke_and_wait_for_view_change * fixing test * Fix post 3pc-batch merge (#163) * remove development options from Jenkinsfile * skip some broken tests * Update versions and skip windows build steps (#164) * switch off windows related build steps * update versions of ledger and stp * fix name * up ledger and stp version * skip test testNodeRequestingTxns * using correct timeout in tests * move some enums to correct location, using ledger's string encoding methods and test for stashing requests * bump dependency version * fix timeout in tests * make order of ledger sync customizable, add tranformer for transactions before adding to ledger, update seqno map db for transactions from catchup and update tests to check this * temporarily change config variable * fix regression where node starts catchup process if sent a consistency proff * bumping dependency version * bumping dependency version * bumping dependency version * Fix redundant reconnections and refactor (#165) * refactor retryForExpected * use maintainConnections in retryForExpected instead of connect * refactor resendRequests * up stp version to 0.1.49 * replace CLIENT_REQACK_TIMEOUT by CLIENT_REPLY_TIMEOUT when calling _filterExpected for REPLYs * up ledger to 0.2.19 * Add new helper-methods for conversion base58 to hex (#166) * increment state-trie and ledger versions (#168) * Make changes to support ChunkedFileStore (#167) * update _defaultStore of Ledger to make it correspond to original one * remove initialization of domain ledger - now it is done by ledger and file store it uses * replace manual copying of transaction files by usage of defaultFile * increase timeout for testNodeRequestingTxns * skip test testInstChangeWithLowerRatioThanDelta because it fails intermittently * up ledger version to 0.2.28 * Bump dependency version * [Closes SOV-980] Bug Fixed: A node should start catchup process if it realises that it has lagged behind the other node (#172) * [Closes SOV-980] Bug Fixed: A node should start catchup process if it realises that it has lagged behind the other node * Fixed failing tests * Fixed unsed imports * Update conftest.py * Increased test time global limit * Reverted timeouts * Added logs * Fixed filtering CPs * Fixed filtering CPs * Fixed filtering CPs * Input validation (#170) * Input sanitization: Add base logic and test cases * Input sanitization: add NODE and NYM txn, fixes * Input sanitization: implement node and client messages * roll away node to node validation * role field is option for NYM txn * fix tests * fixes for sovrin-node * implement validation for merkle root hash * uncomment new ConsistencyProof message implementation * add "nullable" property for FieldBase * fix usage of alphabet * add JsonField * add validation on message level * use hash size range instead of one value; use base58.alphabet instead of own list * fix usage hasSizes; made error comment more verbose * uncomment new implementation LedgerStatus message * uncomment new implementation for Prepare message and fix fields types * roll away node to node checkes * check each node connected * improve log messages * fix testAdd2NewNodes * Improvement of validation rules (#173) * fix Node txn, INDY-9 * add tests for the Node operation * - Provided all the disabled tests with the ticket references. (#176) - Enabled the tests testChangeNodeHaForPrimary and testChangeNodeHaForNonPrimary on non-Windows platforms. - Commented out parametrizers at the disabled tests in plenum.test.input_validation.test_common_checks module. * Make SERVICES field required for add NODE txn * Added another test scenario for catchup (#178) * Added another test scenario for catchup fix * stable release Signed-off-by: Andrei Goncharov Sign-off-executed-by: toktar Approved-at: h-master --- .venv/lib64 | 1 - Jenkinsfile | 10 +- examples/orientdb.py | 160 --- examples/simple_node.py | 2 +- plenum/cli/cli.py | 38 +- plenum/client/client.py | 175 +-- plenum/client/id_data.py | 26 - plenum/client/pool_manager.py | 8 - plenum/common/batched.py | 30 +- plenum/common/constants.py | 24 +- plenum/common/exceptions.py | 11 +- plenum/common/init_util.py | 9 +- plenum/common/ledger.py | 108 ++ plenum/common/ledger_info.py | 62 + plenum/common/ledger_manager.py | 1024 ++++++++-------- plenum/common/looper.py | 0 plenum/common/member/member.py | 2 +- plenum/common/message_processor.py | 2 - plenum/common/messages/__init__.py | 0 plenum/common/messages/client_request.py | 68 ++ plenum/common/messages/fields.py | 261 ++++ plenum/common/messages/message_base.py | 108 ++ plenum/common/request.py | 12 +- plenum/common/script_helper.py | 9 +- plenum/common/signer_did.py | 4 +- plenum/common/signer_simple.py | 3 +- plenum/common/signing.py | 22 +- plenum/common/stack_manager.py | 42 +- plenum/common/stacks.py | 5 +- plenum/common/startable.py | 7 - plenum/common/test_network_setup.py | 73 +- plenum/common/txn_util.py | 66 +- plenum/common/types.py | 253 +++- plenum/common/util.py | 38 +- plenum/config.py | 107 +- plenum/persistence/client_document_store.py | 157 --- plenum/persistence/client_req_rep_store.py | 13 +- .../persistence/client_req_rep_store_file.py | 30 +- plenum/persistence/graph_store.py | 47 - plenum/persistence/leveldb_hash_store.py | 93 ++ plenum/persistence/orientdb_graph_store.py | 94 -- plenum/persistence/orientdb_hash_store.py | 138 --- plenum/persistence/orientdb_store.py | 138 --- plenum/persistence/req_id_to_txn.py | 43 + plenum/persistence/secondary_storage.py | 45 - plenum/persistence/storage.py | 28 +- plenum/persistence/util.py | 26 + plenum/server/client_authn.py | 35 +- plenum/server/domain_req_handler.py | 143 +++ plenum/server/models.py | 19 +- plenum/server/monitor.py | 107 +- plenum/server/node.py | 1047 ++++++++++------- plenum/server/pool_manager.py | 235 ++-- plenum/server/pool_req_handler.py | 196 +++ plenum/server/primary_elector.py | 157 +-- plenum/server/propagator.py | 62 +- plenum/server/replica.py | 765 +++++++----- plenum/server/req_handler.py | 68 ++ plenum/server/router.py | 4 +- plenum/server/suspicion_codes.py | 25 +- plenum/test/batching_3pc/__init__.py | 0 plenum/test/batching_3pc/conftest.py | 24 + plenum/test/batching_3pc/helper.py | 37 + .../test/batching_3pc/test_basic_batching.py | 104 ++ .../test/batching_3pc/test_batch_rejection.py | 89 ++ .../batching_3pc/test_batching_scenarios.py | 54 + .../test/batching_3pc/test_client_requests.py | 6 + plenum/test/batching_3pc/test_state_proof.py | 0 .../test/blacklist/test_blacklist_client.py | 2 +- ..._blacklist_node_on_multiple_nominations.py | 2 +- ...t_node_on_multiple_primary_declarations.py | 2 +- .../test_message_outside_watermark.py | 8 +- .../test_message_outside_watermark1.py | 7 +- .../checkpoints/test_stable_checkpoint.py | 12 +- plenum/test/cli/helper.py | 15 +- plenum/test/cli/test_basic_client_commands.py | 2 +- .../cli/test_cli_with_auction_req_plugin.py | 66 +- .../test/cli/test_cli_with_bank_req_plugin.py | 41 +- plenum/test/cli/test_status_command.py | 4 +- plenum/test/client/test_client.py | 8 +- plenum/test/client/test_client_authn.py | 4 +- plenum/test/client/test_client_retry.py | 50 +- .../test_client_sends_to_f_plus_one_nodes.py | 39 +- plenum/test/conftest.py | 38 +- plenum/test/delayers.py | 15 +- plenum/test/helper.py | 133 ++- plenum/test/input_validation/__init__.py | 0 plenum/test/input_validation/conftest.py | 0 plenum/test/input_validation/fields.py | 147 +++ plenum/test/input_validation/helper.py | 319 +++++ plenum/test/input_validation/messages.py | 189 +++ .../input_validation/test_client_node_op.py | 47 + .../input_validation/test_common_checks.py | 56 + .../test_handle_one_node_message.py | 20 + plenum/test/instances/helper.py | 4 + plenum/test/instances/test_commit_digest.py | 55 - ...come_active_with_less_than_four_servers.py | 14 +- .../test_multiple_instance_change_msgs.py | 6 +- plenum/test/malicious_behaviors_client.py | 3 +- plenum/test/malicious_behaviors_node.py | 79 +- plenum/test/monitoring/conftest.py | 18 + .../test_instance_change_with_Delta.py | 39 +- .../test_instance_change_with_req_Lambda.py | 25 +- .../monitoring/test_post_monitoring_stats.py | 12 +- plenum/test/node_catchup/conftest.py | 5 +- plenum/test/node_catchup/helper.py | 69 +- .../test_catchup_delayed_nodes.py | 8 +- .../node_catchup/test_catchup_scenarios.py | 6 +- ...test_catchup_while_new_request_incoming.py | 15 +- .../test/node_catchup/test_discard_view_no.py | 11 +- .../node_catchup/test_new_node_catchup.py | 99 +- .../test_node_catchup_after_disconnect.py | 35 + ...test_node_catchup_after_lost_connection.py | 35 + ..._node_reject_invalid_txn_during_catchup.py | 36 +- .../test_node_request_consistency_proof.py | 15 +- .../test_node_request_missing_transactions.py | 36 +- .../test/node_request/node_request_helper.py | 47 +- .../test_no_forwarding_without_election.py | 50 + ..._ordering_when_pre_prepare_not_received.py | 12 +- .../test_order/test_request_ordering_1.py | 2 +- .../test_order/test_request_ordering_2.py | 9 +- .../test_non_primary_sends_a_pre_prepare.py | 31 +- .../stats_consumer/plugin_stats_consumer.py | 7 +- plenum/test/pool_transactions/conftest.py | 22 +- plenum/test/pool_transactions/helper.py | 152 ++- .../pool_transactions/test_adding_stewards.py | 22 +- ...t_change_ha_persists_post_nodes_restart.py | 18 +- .../test_client_with_pool_txns.py | 5 +- .../test_multiple_clients.py | 41 - .../test_nodes_ha_change_back.py | 56 + .../test_nodes_with_pool_txns.py | 225 +++- plenum/test/primary_election/helpers.py | 17 +- .../test_primary_election_case1.py | 18 +- .../test_primary_election_case2.py | 12 +- .../test_primary_election_case4.py | 18 +- .../test_primary_election_case5.py | 21 +- .../test_primary_election_contested.py | 2 +- ...test_primary_election_with_clear_winner.py | 4 +- .../test_primary_election_with_tie.py | 2 +- .../test_primary_selection.py | 2 +- .../test_propagate_recvd_before_request.py | 6 +- ...y_marked_suspicious_for_sending_prepare.py | 6 +- .../test_replica_reject_same_pre_prepare.py | 21 +- plenum/test/script/helper.py | 47 +- .../script/test_change_non_primary_node_ha.py | 13 +- .../script/test_change_primary_node_ha.py | 13 +- plenum/test/storage/helper.py | 2 +- .../test/storage/test_leveldb_hash_store.py | 56 + .../test/storage/test_orientdb_hash_store.py | 75 -- plenum/test/storage/test_orientdb_version.py | 13 - plenum/test/test_delay.py | 5 +- plenum/test/test_ledger_manager.py | 3 +- plenum/test/test_log_rotation.py | 2 +- plenum/test/test_node.py | 157 ++- plenum/test/test_node_basic.py | 6 - plenum/test/test_node_connection.py | 28 +- plenum/test/test_node_request.py | 4 +- .../test_round_trip_with_one_faulty_node.py | 5 +- plenum/test/test_stack.py | 25 +- plenum/test/test_verif_merkle_proof.py | 2 + plenum/test/view_change/conftest.py | 2 + plenum/test/view_change/helper.py | 68 ++ ...st_discard_inst_chng_msg_from_past_view.py | 13 +- .../test_elections_after_view_change.py | 6 +- .../test_instance_change_msg_checking.py | 4 +- ..._master_primary_different_from_previous.py | 93 ++ .../test_queueing_req_from_future_view.py | 141 ++- plenum/test/view_change/test_view_change.py | 14 +- .../test_view_change_happens_post_timeout.py | 4 + .../test_view_change_not_gamable.py | 5 + ..._changes_if_backup_primary_disconnected.py | 32 - ..._changes_if_master_primary_disconnected.py | 55 +- ..._changed_if_backup_primary_disconnected.py | 37 + ...mary_disconnected_from_less_than_quorum.py | 79 ++ ...ew_not_changed_when_short_disconnection.py | 76 ++ plenum/test/waits.py | 255 +++- .../zstack_tests/test_zstack_reconnection.py | 7 +- scripts/load.py | 42 + scripts/start_plenum_node | 1 + setup.py | 17 +- tutorial/tutorial.py | 2 +- 181 files changed, 7263 insertions(+), 3624 deletions(-) delete mode 120000 .venv/lib64 delete mode 100644 examples/orientdb.py delete mode 100644 plenum/client/id_data.py create mode 100644 plenum/common/ledger.py create mode 100644 plenum/common/ledger_info.py create mode 100644 plenum/common/looper.py create mode 100644 plenum/common/messages/__init__.py create mode 100644 plenum/common/messages/client_request.py create mode 100644 plenum/common/messages/fields.py create mode 100644 plenum/common/messages/message_base.py delete mode 100644 plenum/persistence/client_document_store.py delete mode 100644 plenum/persistence/graph_store.py create mode 100644 plenum/persistence/leveldb_hash_store.py delete mode 100644 plenum/persistence/orientdb_graph_store.py delete mode 100644 plenum/persistence/orientdb_hash_store.py delete mode 100644 plenum/persistence/orientdb_store.py create mode 100644 plenum/persistence/req_id_to_txn.py delete mode 100644 plenum/persistence/secondary_storage.py create mode 100644 plenum/persistence/util.py create mode 100644 plenum/server/domain_req_handler.py create mode 100644 plenum/server/pool_req_handler.py create mode 100644 plenum/server/req_handler.py create mode 100644 plenum/test/batching_3pc/__init__.py create mode 100644 plenum/test/batching_3pc/conftest.py create mode 100644 plenum/test/batching_3pc/helper.py create mode 100644 plenum/test/batching_3pc/test_basic_batching.py create mode 100644 plenum/test/batching_3pc/test_batch_rejection.py create mode 100644 plenum/test/batching_3pc/test_batching_scenarios.py create mode 100644 plenum/test/batching_3pc/test_client_requests.py create mode 100644 plenum/test/batching_3pc/test_state_proof.py create mode 100644 plenum/test/input_validation/__init__.py create mode 100644 plenum/test/input_validation/conftest.py create mode 100644 plenum/test/input_validation/fields.py create mode 100644 plenum/test/input_validation/helper.py create mode 100644 plenum/test/input_validation/messages.py create mode 100644 plenum/test/input_validation/test_client_node_op.py create mode 100644 plenum/test/input_validation/test_common_checks.py create mode 100644 plenum/test/input_validation/test_handle_one_node_message.py delete mode 100644 plenum/test/instances/test_commit_digest.py create mode 100644 plenum/test/node_catchup/test_node_catchup_after_disconnect.py create mode 100644 plenum/test/node_catchup/test_node_catchup_after_lost_connection.py create mode 100644 plenum/test/node_request/test_no_forwarding_without_election.py delete mode 100644 plenum/test/pool_transactions/test_multiple_clients.py create mode 100644 plenum/test/pool_transactions/test_nodes_ha_change_back.py create mode 100644 plenum/test/storage/test_leveldb_hash_store.py delete mode 100644 plenum/test/storage/test_orientdb_hash_store.py delete mode 100644 plenum/test/storage/test_orientdb_version.py create mode 100644 plenum/test/view_change/helper.py create mode 100644 plenum/test/view_change/test_master_primary_different_from_previous.py create mode 100644 plenum/test/view_change/test_view_change_happens_post_timeout.py create mode 100644 plenum/test/view_change/test_view_change_not_gamable.py delete mode 100644 plenum/test/view_change/test_view_changes_if_backup_primary_disconnected.py create mode 100644 plenum/test/view_change/test_view_not_changed_if_backup_primary_disconnected.py create mode 100644 plenum/test/view_change/test_view_not_changed_when_primary_disconnected_from_less_than_quorum.py create mode 100644 plenum/test/view_change/test_view_not_changed_when_short_disconnection.py create mode 100644 scripts/load.py diff --git a/.venv/lib64 b/.venv/lib64 deleted file mode 120000 index 7951405f85..0000000000 --- a/.venv/lib64 +++ /dev/null @@ -1 +0,0 @@ -lib \ No newline at end of file diff --git a/Jenkinsfile b/Jenkinsfile index d72f2688c7..2a47d9cc9a 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -10,8 +10,6 @@ def testUbuntu = { checkout scm echo 'Ubuntu Test: Build docker image' - orientdb.start() - def testEnv = dockerHelpers.build(name) testEnv.inside('--network host') { @@ -24,7 +22,6 @@ def testUbuntu = { } finally { echo 'Ubuntu Test: Cleanup' - orientdb.stop() step([$class: 'WsCleanup']) } } @@ -60,9 +57,6 @@ def testWindowsNoDocker = { echo 'Windows No Docker Test: Checkout csm' checkout scm - echo 'Windows No Docker Test: drop orientdb databases' - orientdb.cleanupWindows() - testHelpers.createVirtualEnvAndExecute({ python, pip -> echo 'Windows No Docker Test: Install dependencies' testHelpers.install(python: python, pip: pip, isVEnv: true) @@ -77,7 +71,5 @@ def testWindowsNoDocker = { } } - - -//testAndPublish(name, [ubuntu: testUbuntu, windows: testWindowsNoDocker, windowsNoDocker: testWindowsNoDocker]) testAndPublish(name, [ubuntu: testUbuntu]) + diff --git a/examples/orientdb.py b/examples/orientdb.py deleted file mode 100644 index c58d6d4fbf..0000000000 --- a/examples/orientdb.py +++ /dev/null @@ -1,160 +0,0 @@ -import pyorient - -client = pyorient.OrientDB("localhost", 2424) -dbName = "test" -user = "root" -password = "password" -session_id = client.connect(user, password) -try: - client.db_drop(dbName, pyorient.STORAGE_TYPE_MEMORY) -except pyorient.exceptions.PyOrientException: - pass - -client.db_create(dbName, pyorient.DB_TYPE_GRAPH, pyorient.STORAGE_TYPE_MEMORY) -# client.db_create(dbName, pyorient.DB_TYPE_GRAPH, pyorient.STORAGE_TYPE_PLOCAL) -client.db_exists(dbName, pyorient.STORAGE_TYPE_MEMORY) -client.db_list() -# Need to open the db for doing read/writes on it -client.db_open(dbName, user, password) - -cmd1 = client.command("create class Animal extends V") - -cmd2 = client.command("create vertex Animal set name = 'rat', specie = 'rodent'") - -cmd3 = client.query("select * from Animal") - -### Create the vertex and insert the food values - -cmd4 = client.command('create class Food extends V') -cmd5 = client.command("create vertex Food set name = 'pea', color = 'green'") - -### Create the edge for the Eat action -cmd6 = client.command('create class Eat extends E') - -### Lets the rat likes to eat pea -eat_edges = client.command( - "create edge Eat from (" - "select from Animal where name = 'rat'" - ") to (" - "select from Food where name = 'pea'" - ")" -) - -# Create edges using record id -eat_edges_1 = client.command("CREATE EDGE Eat FROM {} to {}".format(cmd3[0]._rid, cmd5[0]._rid)) - -### Who eats the peas? -pea_eaters = client.command("select expand( in( Eat )) from Food where name = 'pea'") -for animal in pea_eaters: - print(animal.name, animal.specie) -'rat rodent' - -### What each animal eats? -animal_foods = client.command("select expand( out( Eat )) from Animal") -for food in animal_foods: - animal = client.query( - "select name from ( select expand( in('Eat') ) from Food where name = 'pea' )" - )[0] - print(food.name, food.color, animal.name) -'pea green rat' - -client.command("CREATE CLASS Car EXTENDS V") -client.command("CREATE CLASS Owns EXTENDS E") -client.command("CREATE CLASS Person EXTENDS V") - -client.command("CREATE VERTEX Person SET name = 'Luca'") -client.command("CREATE VERTEX Person SET name = 'Luca1'") -client.command("CREATE VERTEX Person SET name = 'Luca2'") - -client.command("CREATE VERTEX Car SET name = 'Ferrari Modena'") -client.command("CREATE VERTEX Car SET name = 'Ferrari Modena1'") -client.command("CREATE VERTEX Car SET name = 'Ferrari Modena2'") - -cmd7 = client.command("select * from Person where name = 'Luca1' limit 1") - -cmd8 = client.command("select * from Person where name = 'Luca1'") - -cmd9 = client.command("CREATE EDGE Owns FROM ( SELECT FROM Person where name = 'Luca1') TO ( SELECT FROM Car where name='Ferrari Modena2')") - -client.command("create class auto extends V") -client.command("create property auto.name string") -client.command("create index auto.name unique") - -client.command("create class rides extends E") -client.command("create property rides.out link Person") -client.command("create property rides.in link auto") - -client.command("create class bike extends auto") -client.command("create class cycle extends auto") - -client.command("create vertex bike SET name = 'bik1'") -client.command("create vertex bike SET name = 'bike2'") -client.command("create vertex cycle SET name = 'cycle1'") -client.command("create vertex cycle SET name = 'cycle2'") - -b= client.command("select * from auto where name='bike2'")[0] -print(b._class) - -client.command("create class cycler extends E") -client.command("create property cycler.out link Person") -client.command("create property cycler.in link cycle") -client.command("create edge cycler from ( SELECT FROM Person where name = 'Luca') TO ( SELECT FROM cycle where name='cycle1')") - -# client.command("create edge cycler from ( SELECT FROM Person where name = 'Luca1') TO ( SELECT FROM Car where name='Ferrari Modena2')") - -client.command("select expand (in('cycler')) from cycle where name = 'cycle1'") -client.command("select expand (in('cycler')) from cycle where name = 'cycle2'") - -client.command("create edge rides from ( SELECT FROM Person where name = 'Luca1') TO ( SELECT FROM cycle where name='cycle1')") -client.command("create edge rides from ( SELECT FROM Person where name = 'Luca2') TO ( SELECT FROM bike where name='bike2')") - -client.command("create class profile") -client.command("create property profile.id string") -client.command("create index profile.id unique") -client.command("insert into profile set name = 'Luca', age = 21, id = '1'") -client.command("update profile set name = 'Luca1', role = None upsert where id = '1'") -client.command("update profile set name = 'Luca2' upsert where id = '2'") - -client.command("create class Version") -client.command("create property Version.v string") -client.command("create property Version.release string") -client.command("create class Package") -client.command("create property Package.versions embeddedlist Version") - -r = client.command('insert into Package set versions = [{"v":"1.0.1", "release":"monday"}]') -client.command('update %s add versions = [{"v":"1.0.2", "release":"tuesday"}]' % r[0]._rid) - -cmd = ( - # "begin;" - "create class Address;" - "create property Address.street String;" - "create property Address.city String;" - "create class Client;" - "create property Client.name String;" - "create property Client.phones embeddedSet String;" - "create property Client.addresses embeddedList Address;" - "insert into client set name = 'James Bond', phones = ['1234', '34567'], addresses = [{'city':'Shanghai', 'zip':'3999'}, {'city':'New York', 'street':'57th Ave'}];" - "update client add addresses = [{'city':'London', 'zip':'67373'}];" - # "commit;" -) - -client.batch(cmd) -client.command("update Client add addresses = [{'city':'Delhi', 'zip':'2'}]") -client.command("select from Client")[0].oRecordData -client.command("update Client add addresses = {'city':'Mumbai', 'zip':'3'}") -client.command("select from Client")[0].oRecordData - -cmd = ( - "create class report;" - "create property report.id integer;" - "create property report.marks embeddedmap string;" - "insert into report set id = 1, marks={'p': 100, 'm': 'A', 'c': 2.4};" -) -client.batch(cmd) - -client.command("select from report where marks[p] = 100")[0].oRecordData -client.command("select from report where marks.p = 100")[0].oRecordData - -client.command("update report set marks.m = 'B' where id = 1") -client.command("update report set marks.b = 21 where id = 1") -client.command("update report set date = 1460128589.825324 return after $this.marks where id = 1") \ No newline at end of file diff --git a/examples/simple_node.py b/examples/simple_node.py index 339241b7c3..54bdaeabba 100755 --- a/examples/simple_node.py +++ b/examples/simple_node.py @@ -46,7 +46,7 @@ def run_node(): # see simple_client.py joe_verkey = b'cffbb88a142be2f62d1b408818e21a2f' \ b'887c4442ae035a260d4cc2ec28ae24d6' - node.clientAuthNr.addClient("Joe", joe_verkey) + node.clientAuthNr.addIdr("Joe", joe_verkey) looper.add(node) node.startKeySharing() diff --git a/plenum/cli/cli.py b/plenum/cli/cli.py index eb24d9446b..c8f482aec3 100644 --- a/plenum/cli/cli.py +++ b/plenum/cli/cli.py @@ -6,7 +6,6 @@ from os.path import basename, dirname from typing import Dict, Iterable -import pyorient from jsonpickle import json from ledger.compact_merkle_tree import CompactMerkleTree @@ -28,16 +27,16 @@ getAllGrams from plenum.cli.phrase_word_completer import PhraseWordCompleter from plenum.client.wallet import Wallet -from plenum.common.exceptions import NameAlreadyExists, GraphStorageNotAvailable, \ - KeysNotFoundException +from plenum.common.exceptions import NameAlreadyExists, KeysNotFoundException from plenum.common.keygen_utils import learnKeysFromOthers, tellKeysToOthers, areKeysSetup from plenum.common.plugin_helper import loadPlugins from stp_core.crypto.util import cleanSeed, seedFromHex from stp_raet.util import getLocalEstateData from plenum.common.signer_simple import SimpleSigner from plenum.common.stack_manager import TxnStackManager -from plenum.common.constants import TXN_TYPE, TARGET_NYM, TXN_ID, DATA, IDENTIFIER, \ - NODE, ALIAS, NODE_IP, NODE_PORT, CLIENT_PORT, CLIENT_IP, VERKEY, BY, CLIENT_STACK_SUFFIX +from plenum.common.constants import TXN_TYPE, TARGET_NYM, DATA, IDENTIFIER, \ + NODE, ALIAS, NODE_IP, NODE_PORT, CLIENT_PORT, CLIENT_IP, VERKEY, BY, \ + CLIENT_STACK_SUFFIX from plenum.common.transactions import PlenumTransactions from prompt_toolkit.utils import is_windows, is_conemu_ansi from stp_core.network.port_dispenser import genHa @@ -523,7 +522,6 @@ def _addOldGenesisCommand(self, matchedVars): txn = { TXN_TYPE: typ, TARGET_NYM: destId, - TXN_ID: sha256(randomString(6).encode()).hexdigest(), } if matchedVars.get(IDENTIFIER): txn[IDENTIFIER] = getFriendlyIdentifier(matchedVars.get(IDENTIFIER)) @@ -915,7 +913,7 @@ def newNode(self, nodeName: str): basedirpath=self.basedirpath, pluginPaths=self.pluginPaths, config=self.config) - except (GraphStorageNotAvailable, KeysNotFoundException) as e: + except KeysNotFoundException as e: self.print(str(e), Token.BoldOrange) return self.nodes[name] = node @@ -928,7 +926,7 @@ def newNode(self, nodeName: str): self.bootstrapKey(self.activeWallet, node) for identifier, verkey in self.externalClientKeys.items(): - node.clientAuthNr.addClient(identifier, verkey) + node.clientAuthNr.addIdr(identifier, verkey) nodes.append(node) return nodes @@ -1051,7 +1049,7 @@ def newClient(self, clientName, def bootstrapKey(wallet, node, identifier=None): identifier = identifier or wallet.defaultId assert identifier, "Client has no identifier" - node.clientAuthNr.addClient(identifier, wallet.getVerkey(identifier)) + node.clientAuthNr.addIdr(identifier, wallet.getVerkey(identifier)) def clientExists(self, clientName): return clientName in self.clients @@ -1278,7 +1276,7 @@ def _addKeyAction(self, matchedVars): return self.externalClientKeys[identifier] = verkey for n in self.nodes.values(): - n.clientAuthNr.addClient(identifier, verkey) + n.clientAuthNr.addIdr(identifier, verkey) return True def _addSignerToGivenWallet(self, signer, wallet: Wallet=None, @@ -1974,26 +1972,6 @@ def cleanUp(self): except FileNotFoundError: pass - client = pyorient.OrientDB(self.config.OrientDB["host"], - self.config.OrientDB["port"]) - user = self.config.OrientDB["user"] - password = self.config.OrientDB["password"] - client.connect(user, password) - - def dropdbs(): - i = 0 - names = [n for n in - client.db_list().oRecordData['databases'].keys()] - for nm in names: - try: - client.db_drop(nm) - i += 1 - except: - continue - return i - - dropdbs() - def __hash__(self): return hash((self.name, self.unique_name, self.basedirpath)) diff --git a/plenum/client/client.py b/plenum/client/client.py index eec7d4fd7e..dc5040d6ae 100644 --- a/plenum/client/client.py +++ b/plenum/client/client.py @@ -12,6 +12,7 @@ from typing import List, Union, Dict, Optional, Tuple, Set, Any, \ Iterable +from plenum.common.ledger import Ledger from plenum.common.stacks import nodeStackClass from stp_core.crypto.nacl_wrappers import Signer from stp_core.network.auth_mode import AuthMode @@ -31,9 +32,10 @@ from plenum.common.motor import Motor from plenum.common.plugin_helper import loadPlugins from plenum.common.request import Request -from plenum.common.startable import Status, LedgerState, Mode +from plenum.common.startable import Status, Mode from plenum.common.constants import REPLY, POOL_LEDGER_TXNS, \ - LEDGER_STATUS, CONSISTENCY_PROOF, CATCHUP_REP, REQACK, REQNACK, OP_FIELD_NAME + LEDGER_STATUS, CONSISTENCY_PROOF, CATCHUP_REP, REQACK, REQNACK, REJECT, OP_FIELD_NAME, \ + POOL_LEDGER_ID, TXN_TIME, LedgerState from plenum.common.txn_util import getTxnOrderedFields from plenum.common.types import Reply, f, LedgerStatus, TaggedTuples from plenum.common.util import getMaxFailures, checkIfMoreThanFSameItems, rawToFriendly @@ -105,7 +107,7 @@ def __init__(self, self.mode = None HasPoolManager.__init__(self) self.ledgerManager = LedgerManager(self, ownedByNode=False) - self.ledgerManager.addLedger(0, self.ledger, + self.ledgerManager.addLedger(POOL_LEDGER_ID, self.ledger, postCatchupCompleteClbk=self.postPoolLedgerCaughtUp, postTxnAddedToLedgerClbk=self.postTxnFromCatchupAddedToLedger) else: @@ -220,7 +222,7 @@ def start(self, loop): else: super().start(loop) self.nodestack.start() - self.nodestack.maintainConnections() + self.nodestack.maintainConnections(force=True) if self._ledger: self.ledgerManager.setLedgerCanSync(0, True) self.mode = Mode.starting @@ -249,7 +251,7 @@ def submitReqs(self, *reqs: Request) -> List[Request]: for request in reqs: if self.mode == Mode.discovered and self.hasSufficientConnections: logger.debug('Client {} sending request {}'.format(self, request)) - self.nodestack.send(request) + self.send(request) self.expectingFor(request) else: logger.debug("{} pending request since in mode {} and " @@ -273,7 +275,7 @@ def handleOneNodeMsg(self, wrappedMsg, excludeFromCli=None) -> None: CATCHUP_REP) printOnCli = not excludeFromCli and msg.get(OP_FIELD_NAME) not \ in ledgerTxnTypes - logger.debug("Client {} got msg from node {}: {}". + logger.info("Client {} got msg from node {}: {}". format(self.name, frm, msg), extra={"cli": printOnCli}) if OP_FIELD_NAME in msg: @@ -300,6 +302,9 @@ def handleOneNodeMsg(self, wrappedMsg, excludeFromCli=None) -> None: elif msg[OP_FIELD_NAME] == REQNACK: self.reqRepStore.addNack(msg, frm) self.gotExpected(msg, frm) + elif msg[OP_FIELD_NAME] == REJECT: + self.reqRepStore.addReject(msg, frm) + self.gotExpected(msg, frm) elif msg[OP_FIELD_NAME] == REPLY: result = msg[f.RESULT.nm] identifier = msg[f.RESULT.nm][f.IDENTIFIER.nm] @@ -321,21 +326,17 @@ def _statusChanged(self, old, new): # do nothing for now pass - def stop(self, *args, **kwargs): - super().stop(*args, **kwargs) - self.txnLog.close() - if self._ledger is not None: - self._ledger.stop() - if hasattr(self, 'hashStore') and self.hashStore is not None: - self.hashStore.close() - def onStopping(self, *args, **kwargs): logger.debug('Stopping client {}'.format(self)) self.nodestack.nextCheck = 0 self.nodestack.stop() if self._ledger: - self.ledgerManager.setLedgerState(0, LedgerState.not_synced) + self.ledgerManager.setLedgerState(POOL_LEDGER_ID, LedgerState.not_synced) self.mode = None + self._ledger.stop() + if self.hashStore and not self.hashStore.closed: + self.hashStore.close() + self.txnLog.close() def getReply(self, identifier: str, reqId: int) -> Optional[Reply]: """ @@ -470,7 +471,7 @@ def flushMsgsPendingConnection(self): .format(queueSize)) while self.reqsPendingConnection: req, signer = self.reqsPendingConnection.popleft() - self.nodestack.send(req, signer=signer) + self.send(req, signer=signer) def expectingFor(self, request: Request, nodes: Optional[Set[str]]=None): nodes = nodes or {r.name for r in self.nodestack.remotes.values() @@ -491,7 +492,7 @@ def gotExpected(self, msg, frm): # would fetch the reply or the client might just lose REQACK and not # REPLY so when REPLY received, request does not need to be resent colls = (self.expectingAcksFor, self.expectingRepliesFor) - elif msg[OP_FIELD_NAME] == REQNACK: + elif msg[OP_FIELD_NAME] in (REQNACK, REJECT): container = msg colls = (self.expectingAcksFor, self.expectingRepliesFor) else: @@ -513,41 +514,51 @@ def gotExpected(self, msg, frm): def stopRetrying(self): self.stopRepeating(self.retryForExpected, strict=False) + def _filterExpected(self, now, queue, retryTimeout, maxRetry): + deadRequests = [] + aliveRequests = {} + notAnsweredNodes = set() + for requestKey, (expectedFrom, lastTried, retries) in queue.items(): + if now < lastTried + retryTimeout: + continue + if retries >= maxRetry: + deadRequests.append(requestKey) + continue + if requestKey not in aliveRequests: + aliveRequests[requestKey] = set() + aliveRequests[requestKey].update(expectedFrom) + notAnsweredNodes.update(expectedFrom) + return deadRequests, aliveRequests, notAnsweredNodes + def retryForExpected(self): now = time.perf_counter() - keys = {} - nodesNotSendingAck = set() - - # Collect nodes which did not send REQACK - clearKeys = [] - for reqKey, (expectedFrom, lastTried, retries) in \ - self.expectingAcksFor.items(): - if now > (lastTried + self.config.CLIENT_REQACK_TIMEOUT): - if retries < self.config.CLIENT_MAX_RETRY_ACK: - if reqKey not in keys: - keys[reqKey] = set() - keys[reqKey].update(expectedFrom) - nodesNotSendingAck.update(expectedFrom) - else: - clearKeys.append(reqKey) - for k in clearKeys: - self.expectingAcksFor.pop(k) - - # Collect nodes which did not send REPLY - clearKeys = [] - for reqKey, (expectedFrom, lastTried, retries) in \ - self.expectingRepliesFor.items(): - if now > (lastTried + self.config.CLIENT_REPLY_TIMEOUT): - if retries < self.config.CLIENT_MAX_RETRY_REPLY: - if reqKey not in keys: - keys[reqKey] = set() - keys[reqKey].update(expectedFrom) - else: - clearKeys.append(reqKey) - for k in clearKeys: - self.expectingRepliesFor.pop(k) - - for nm in nodesNotSendingAck: + + requestsWithNoAck, aliveRequests, notAckedNodes = \ + self._filterExpected(now, + self.expectingAcksFor, + self.config.CLIENT_REQACK_TIMEOUT, + self.config.CLIENT_MAX_RETRY_ACK) + + requestsWithNoReply, aliveRequests, notRepliedNodes = \ + self._filterExpected(now, + self.expectingRepliesFor, + self.config.CLIENT_REPLY_TIMEOUT, + self.config.CLIENT_MAX_RETRY_REPLY) + + for requestKey in requestsWithNoAck: + logger.debug('{} have got no ACKs for {} and will not try again' + .format(self, requestKey)) + self.expectingAcksFor.pop(requestKey) + + for requestKey in requestsWithNoReply: + logger.debug('{} have got no REPLYs for {} and will not try again' + .format(self, requestKey)) + self.expectingRepliesFor.pop(requestKey) + + if notAckedNodes: + logger.debug('{} going to retry for {}' + .format(self, self.expectingAcksFor.keys())) + for nm in notAckedNodes: try: remote = self.nodestack.getRemote(nm) except RemoteNotFound: @@ -555,46 +566,52 @@ def retryForExpected(self): continue logger.debug('Remote {} of {} being joined since REQACK for not ' 'received for request'.format(remote, self)) - self.nodestack.connect(name=remote.name) - if keys: + + # This makes client to reconnect + # even if pool is just busy and cannot answer quickly, + # that's why using maintainConnections instead + # self.nodestack.connect(name=remote.name) + self.nodestack.maintainConnections() + + if aliveRequests: # Need a delay in case connection has to be established with some # nodes, a better way is not to assume the delay value but only # send requests once the connection is established. Also it is # assumed that connection is not established if a node not sending - # REQACK/REQNACK/REPLY, but a little better way is to compare the - # value in stats of the stack and look for changes in count of + # REQACK/REQNACK/REJECT/REPLY, but a little better way is to compare + # the value in stats of the stack and look for changes in count of # `message_reject_rx` but that is not very helpful either since # it does not record which node rejected - delay = 3 if nodesNotSendingAck else 0 - self._schedule(partial(self.resendRequests, keys), delay) + delay = 3 if notAckedNodes else 0 + self._schedule(partial(self.resendRequests, aliveRequests), delay) def resendRequests(self, keys): for key, nodes in keys.items(): - if nodes: - request = self.reqRepStore.getRequest(*key) - logger.debug('{} resending request {} to {}'. - format(self, request, nodes)) - self.sendToNodes(request, nodes) - now = time.perf_counter() - if key in self.expectingAcksFor: - _, _, c = self.expectingAcksFor[key] - self.expectingAcksFor[key] = (nodes, now, c + 1) - if key in self.expectingRepliesFor: - _, _, c = self.expectingRepliesFor[key] - self.expectingRepliesFor[key] = (nodes, now, c + 1) + if not nodes: + continue + request = self.reqRepStore.getRequest(*key) + logger.debug('{} resending request {} to {}'. + format(self, request, nodes)) + self.sendToNodes(request, nodes) + now = time.perf_counter() + for queue in [self.expectingAcksFor, self.expectingRepliesFor]: + if key in queue: + _, _, retries = queue[key] + queue[key] = (nodes, now, retries + 1) def sendLedgerStatus(self, nodeName: str): - ledgerStatus = LedgerStatus(0, self.ledger.size, self.ledger.root_hash) + ledgerStatus = LedgerStatus(POOL_LEDGER_ID, self.ledger.size, + self.ledger.root_hash) rid = self.nodestack.getRemote(nodeName).uid - self.nodestack.send(ledgerStatus, rid) + self.send(ledgerStatus, rid) def send(self, msg: Any, *rids: Iterable[int], signer: Signer = None): self.nodestack.send(msg, *rids, signer=signer) def sendToNodes(self, msg: Any, names: Iterable[str]): rids = [rid for rid, r in self.nodestack.remotes.items() if r.name in names] - self.nodestack.send(msg, *rids) + self.send(msg, *rids) @staticmethod def verifyMerkleProof(*replies: Tuple[Reply]) -> bool: @@ -611,16 +628,16 @@ def verifyMerkleProof(*replies: Tuple[Reply]) -> bool: verifier = MerkleVerifier() fields = getTxnOrderedFields() serializer = CompactSerializer(fields=fields) + ignored = {F.auditPath.name, F.seqNo.name, F.rootHash.name, TXN_TIME} for r in replies: seqNo = r[f.RESULT.nm][F.seqNo.name] - rootHash = base64.b64decode( - r[f.RESULT.nm][F.rootHash.name].encode()) - auditPath = [base64.b64decode( - a.encode()) for a in r[f.RESULT.nm][F.auditPath.name]] - filtered = ((k, v) for (k, v) in r[f.RESULT.nm].items() - if k not in - [F.auditPath.name, F.seqNo.name, F.rootHash.name]) - result = serializer.serialize(dict(filtered)) + rootHash = Ledger.strToHash( + r[f.RESULT.nm][F.rootHash.name]) + auditPath = [Ledger.strToHash(a) for a in + r[f.RESULT.nm][F.auditPath.name]] + filtered = dict((k, v) for (k, v) in r[f.RESULT.nm].items() + if k not in ignored) + result = serializer.serialize(filtered) verifier.verify_leaf_inclusion(result, seqNo - 1, auditPath, STH(tree_size=seqNo, diff --git a/plenum/client/id_data.py b/plenum/client/id_data.py deleted file mode 100644 index 9297403821..0000000000 --- a/plenum/client/id_data.py +++ /dev/null @@ -1,26 +0,0 @@ -# DEPR -# Deprecated in favour of passing request id store for each specific client -# class IdData: -# -# def __init__(self, -# signer: Signer=None, -# lastReqId: int=0): -# self.signer = signer -# self._lastReqId = lastReqId -# -# def __getstate__(self): -# return { -# 'key': self.signer.seedHex.decode(), -# 'lastReqId': self.lastReqId -# } -# -# def __setstate__(self, obj): -# self.signer = SimpleSigner(seed=unhexlify(obj['key'].encode())) -# self._lastReqId = obj['lastReqId'] -# -# @property -# def lastReqId(self): -# return self._lastReqId -# -# def refresh(self): -# self._lastReqId += 1 \ No newline at end of file diff --git a/plenum/client/pool_manager.py b/plenum/client/pool_manager.py index 7139760f86..46be8db260 100644 --- a/plenum/client/pool_manager.py +++ b/plenum/client/pool_manager.py @@ -10,7 +10,6 @@ NODE_PORT, CLIENT_IP, CLIENT_PORT, VERKEY, SERVICES, VALIDATOR, CLIENT_STACK_SUFFIX from plenum.common.types import PoolLedgerTxns, f, HA from plenum.common.util import getMaxFailures -from plenum.common.txn_util import updateGenesisPoolTxnFile from stp_core.common.log import getlogger logger = getlogger() @@ -54,13 +53,6 @@ def poolTxnReceived(self, msg: PoolLedgerTxns, frm): if len(txns) > 0: txn = json.loads(txns[0]) self.addToLedger(txn) - if self.config.UpdateGenesisPoolTxnFile: - # Adding sequence number field since needed for safely - # updating genesis file - txn[F.seqNo.name] = len(self.ledger) - updateGenesisPoolTxnFile(self.config.baseDir, - self.config.poolTransactionsFile, - txn) self.tempNodeTxns.pop(seqNo) else: logger.error("{} has not got enough similar node " diff --git a/plenum/common/batched.py b/plenum/common/batched.py index ce83273ac3..2f821e6740 100644 --- a/plenum/common/batched.py +++ b/plenum/common/batched.py @@ -14,6 +14,7 @@ class Batched(MessageProcessor): """ A mixin to allow batching of requests to be send to remotes. + Assumes a Stack (ZStack or RStack) is mixed """ def __init__(self): @@ -29,10 +30,9 @@ def _enqueue(self, msg: Any, rid: int, signer: Signer) -> None: :param msg: the message to enqueue :param rid: the id of the remote node """ - payload = self.prepForSending(msg, signer) if rid not in self.outBoxes: self.outBoxes[rid] = deque() - self.outBoxes[rid].append(payload) + self.outBoxes[rid].append(msg) def _enqueueIntoAllRemotes(self, msg: Any, signer: Signer) -> None: """ @@ -52,11 +52,16 @@ def send(self, msg: Any, *rids: Iterable[int], signer: Signer = None) -> None: :param rids: ids of the remotes to whose outBoxes this message must be enqueued """ + # Signing (if required) and serializing before enqueueing otherwise + # each call to `_enqueue` will have to sign it and `transmit` will try + # to serialize it which is waste of resources + serializedPayload = self.signAndSerialize(msg, signer) + if rids: for r in rids: - self._enqueue(msg, r, signer) + self._enqueue(serializedPayload, r, signer) else: - self._enqueueIntoAllRemotes(msg, signer) + self._enqueueIntoAllRemotes(serializedPayload, signer) def flushOutBoxes(self) -> None: """ @@ -73,7 +78,8 @@ def flushOutBoxes(self) -> None: if len(msgs) == 1: msg = msgs.popleft() # Setting timeout to never expire - self.transmit(msg, rid, timeout=self.messageTimeout) + self.transmit(msg, rid, timeout=self.messageTimeout, + serialized=True) logger.trace( "{} sending msg {} to {}".format(self, msg, dest)) else: @@ -81,17 +87,17 @@ def flushOutBoxes(self) -> None: "{} batching {} msgs to {} into one transmission". format(self, len(msgs), dest)) logger.trace(" messages: {}".format(msgs)) - batch = Batch([], None) - while msgs: - batch.messages.append(msgs.popleft()) + batch = Batch(list(msgs), None) + msgs.clear() # don't need to sign the batch, when the composed msgs are # signed - payload = self.prepForSending(batch) + payload = self.signAndSerialize(batch) logger.trace("{} sending payload to {}: {}".format(self, dest, payload)) # Setting timeout to never expire - self.transmit(payload, rid, timeout=self.messageTimeout) + self.transmit(payload, rid, timeout=self.messageTimeout, + serialized=True) for rid in removedRemotes: logger.warning("{} rid {} has been removed".format(self, rid), extra={"cli": False}) @@ -115,3 +121,7 @@ def doProcessReceived(self, msg, frm, ident): return None msg[f.MSGS.nm] = relevantMsgs return msg + + def signAndSerialize(self, msg, signer=None): + payload = self.prepForSending(msg, signer) + return self.serializeMsg(payload) diff --git a/plenum/common/constants.py b/plenum/common/constants.py index 2968305141..05b3f430ec 100644 --- a/plenum/common/constants.py +++ b/plenum/common/constants.py @@ -1,5 +1,5 @@ # inter-node communication -from enum import IntEnum +from enum import IntEnum, unique from plenum.common.roles import Roles from plenum.common.transactions import PlenumTransactions @@ -12,8 +12,8 @@ BATCH = "BATCH" REQACK = "REQACK" - REQNACK = "REQNACK" +REJECT = "REJECT" POOL_LEDGER_TXNS = "POOL_LEDGER_TXNS" @@ -100,7 +100,18 @@ class ClientBootStrategy(IntEnum): class StorageType(IntEnum): File = 1 Ledger = 2 - OrientDB = 3 + + +class KeyValueStorageType(IntEnum): + Leveldb = 1 + Memory = 2 + + +@unique +class LedgerState(IntEnum): + not_synced = 1 # Still gathering consistency proofs + syncing = 2 # Got sufficient consistency proofs, will be sending catchup requests and waiting for their replies + synced = 3 # Got replies for all catchup requests, indicating catchup complete for the ledger OP_FIELD_NAME = "op" @@ -110,12 +121,13 @@ class StorageType(IntEnum): NODE_BLACKLISTER_SUFFIX = "BLN" NODE_PRIMARY_STORAGE_SUFFIX = "PS" -NODE_SECONDARY_STORAGE_SUFFIX = "SS" NODE_TXN_STORE_SUFFIX = "TS" NODE_HASH_STORE_SUFFIX = "HS" HS_FILE = "file" -HS_ORIENT_DB = "orientdb" HS_MEMORY = "memory" +HS_LEVELDB = 'leveldb' -PLUGIN_BASE_DIR_PATH = "PluginBaseDirPath" \ No newline at end of file +PLUGIN_BASE_DIR_PATH = "PluginBaseDirPath" +POOL_LEDGER_ID = 0 +DOMAIN_LEDGER_ID = 1 diff --git a/plenum/common/exceptions.py b/plenum/common/exceptions.py index 36324b4de8..4aeb677bd1 100644 --- a/plenum/common/exceptions.py +++ b/plenum/common/exceptions.py @@ -115,6 +115,7 @@ class KeysNotFoundException(Exception): class SuspiciousNode(BaseExc): def __init__(self, node: str, suspicion: Suspicion, offendingMsg): + node = node.decode() if isinstance(node, bytes) else node self.code = suspicion.code if suspicion else None self.reason = suspicion.reason if suspicion else None p = compile(r'(\b\w+)(:(\d+))?') @@ -189,6 +190,8 @@ class DataDirectoryNotFound(StorageException): class DBConfigNotFound(StorageException): pass +class KeyValueStorageConfigNotFound(StorageException): + pass class UnsupportedOperation(Exception): pass @@ -222,14 +225,6 @@ class NameAlreadyExists(Exception): pass -class GraphStorageNotAvailable(Exception): - pass - - -class OrientDBNotRunning(GraphStorageNotAvailable): - pass - - class WalletError(Exception): pass diff --git a/plenum/common/init_util.py b/plenum/common/init_util.py index 8529925e9d..1329fa71ac 100644 --- a/plenum/common/init_util.py +++ b/plenum/common/init_util.py @@ -9,14 +9,9 @@ def cleanup_environment(name, base_dir): def initialize_node_environment(name, base_dir, sigseed=None, override_keep=False): - """ - transport-agnostic method; in the future when the transport protocol is - abstracted a bit more, this function and the one below will be the same - and likely a method of an interface - """ cleanup_environment(name, base_dir) - _, vk = initNodeKeysForBothStacks(name=name, baseDir=base_dir, sigseed=sigseed, - override=override_keep) + _, vk = initNodeKeysForBothStacks(name=name, baseDir=base_dir, + sigseed=sigseed, override=override_keep) return vk diff --git a/plenum/common/ledger.py b/plenum/common/ledger.py new file mode 100644 index 0000000000..fdfc882fb4 --- /dev/null +++ b/plenum/common/ledger.py @@ -0,0 +1,108 @@ +from copy import copy +from typing import List, Tuple + +import base58 + +from ledger.stores.chunked_file_store import ChunkedFileStore +from ledger.stores.file_store import FileStore + +from ledger.ledger import Ledger as _Ledger + + +class Ledger(_Ledger): + @staticmethod + def _defaultStore(dataDir, + logName, + ensureDurability, + defaultFile=None) -> FileStore: + return ChunkedFileStore(dataDir, + logName, + isLineNoKey=True, + storeContentHash=False, + ensureDurability=ensureDurability, + defaultFile=defaultFile) + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # Merkle tree of containing transactions that have not yet been + # committed but optimistically applied. + self.uncommittedTxns = [] + self.uncommittedRootHash = None + self.uncommittedTree = None + + def appendTxns(self, txns: List): + # These transactions are not yet committed so they do not go to + # the ledger + uncommittedSize = self.size + len(self.uncommittedTxns) + self.uncommittedTree = self.treeWithAppliedTxns(txns, + self.uncommittedTree) + self.uncommittedRootHash = self.uncommittedTree.root_hash + self.uncommittedTxns.extend(txns) + if txns: + return (uncommittedSize+1, uncommittedSize+len(txns)), txns + else: + return (uncommittedSize, uncommittedSize), txns + + def commitTxns(self, count: int) -> Tuple[Tuple[int, int], List]: + """ + The number of txns from the beginning of `uncommittedTxns` to commit + :param count: + :return: a tuple of 2 seqNos indicating the start and end of sequence + numbers of the committed txns + """ + committedSize = self.size + for txn in self.uncommittedTxns[:count]: + self.append(txn) + committedTxns = self.uncommittedTxns[:count] + self.uncommittedTxns = self.uncommittedTxns[count:] + if not self.uncommittedTxns: + self.uncommittedTree = None + self.uncommittedRootHash = None + # Do not change `uncommittedTree` or `uncommittedRootHash` + # if there are any `uncommittedTxns` since the ledger still has a + # valid uncommittedTree and a valid root hash which are + # different from the committed ones + return (committedSize + 1, committedSize + count), committedTxns + + def appendCommittedTxns(self, txns: List): + # Called while receiving committed txns from other nodes + for txn in txns: + self.append(txn) + + def discardTxns(self, count: int): + """ + The number of txns in `uncommittedTxns` which have to be + discarded + :param count: + :return: + """ + self.uncommittedTxns = self.uncommittedTxns[:-count] + if not self.uncommittedTxns: + self.uncommittedTree = None + self.uncommittedRootHash = None + else: + self.uncommittedTree = self.treeWithAppliedTxns(self.uncommittedTxns) + self.uncommittedRootHash = self.uncommittedTree.root_hash + + def treeWithAppliedTxns(self, txns: List, currentTree=None): + """ + Return a copy of merkle tree after applying the txns + :param txns: + :return: + """ + currentTree = currentTree or self.tree + # Copying the tree is not a problem since its a Compact Merkle Tree + # so the size of the tree would be 32*(lg n) bytes where n is the + # number of leaves (no. of txns) + tempTree = copy(currentTree) + for txn in txns: + tempTree.append(self.serializeLeaf(txn)) + return tempTree + + @staticmethod + def hashToStr(h): + return base58.b58encode(h) + + @staticmethod + def strToHash(s): + return base58.b58decode(s) diff --git a/plenum/common/ledger_info.py b/plenum/common/ledger_info.py new file mode 100644 index 0000000000..0fc7c60e84 --- /dev/null +++ b/plenum/common/ledger_info.py @@ -0,0 +1,62 @@ +from collections import deque + +from plenum.common.constants import LedgerState +from plenum.common.ledger import Ledger + + +class LedgerInfo: + def __init__(self, + ledger: Ledger, + state: LedgerState, + canSync, + preCatchupStartClbk, + postCatchupStartClbk, + preCatchupCompleteClbk, + postCatchupCompleteClbk, + postTxnAddedToLedgerClbk, + verifier): + + self.ledger = ledger + + self.state = state + self.canSync = canSync + self.preCatchupStartClbk = preCatchupStartClbk + self.postCatchupStartClbk = postCatchupStartClbk + self.preCatchupCompleteClbk = preCatchupCompleteClbk + self.postCatchupCompleteClbk = postCatchupCompleteClbk + self.postTxnAddedToLedgerClbk = postTxnAddedToLedgerClbk + self.verifier = verifier + + # Ledger statuses received while the ledger was not ready to be synced + # (`canSync` was set to False) + self.stashedLedgerStatuses = deque() + + # Tracks which nodes claim that this node's ledger status is ok + # If a quorum of nodes (2f+1) say its up to date then mark the catchup + # process as completed + self.ledgerStatusOk = set() + + # Dictionary of consistency proofs received for the ledger + # in process of catching up + # Key is the node name and value is a consistency proof + self.recvdConsistencyProofs = {} + + self.catchUpTill = None + + # Catchup replies that need to be applied to the ledger + self.receivedCatchUpReplies = [] + + # Keep track of received replies from different senders + self.recvdCatchupRepliesFrm = {} + + # Tracks the beginning of consistency proof timer. Timer starts when the + # node gets f+1 consistency proofs. If the node is not able to begin + # the catchup process even after the timer expires then it requests + # consistency proofs. + self.consistencyProofsTimer = None + + # Tracks the beginning of catchup reply timer. Timer starts after the + # node sends catchup requests. If the node is not able to finish the + # the catchup process even after the timer expires then it requests + # missing transactions. + self.catchupReplyTimer = None diff --git a/plenum/common/ledger_manager.py b/plenum/common/ledger_manager.py index abe37fa76c..eb9e6d54cd 100644 --- a/plenum/common/ledger_manager.py +++ b/plenum/common/ledger_manager.py @@ -1,87 +1,54 @@ import heapq import operator -from base64 import b64encode, b64decode from collections import Callable -from collections import deque -from copy import copy from functools import partial from random import shuffle -from typing import Any, List, Dict, Set, Tuple +from typing import Any, List, Dict, Tuple import math from typing import Optional import time -from ledger.ledger import Ledger +from plenum.common.ledger import Ledger from ledger.merkle_verifier import MerkleVerifier from ledger.util import F -from plenum.common.startable import LedgerState -from plenum.common.types import LedgerStatus, CatchupRep, ConsistencyProof, f, \ - CatchupReq, ConsProofRequest +from plenum.common.types import LedgerStatus, CatchupRep, \ + ConsistencyProof, f, CatchupReq, ConsProofRequest +from plenum.common.constants import POOL_LEDGER_ID, LedgerState, DOMAIN_LEDGER_ID from plenum.common.util import getMaxFailures from plenum.common.config_util import getConfig from stp_core.common.log import getlogger from plenum.server.has_action_queue import HasActionQueue +from plenum.common.ledger_info import LedgerInfo logger = getlogger() class LedgerManager(HasActionQueue): - def __init__(self, owner, ownedByNode: bool=True): + + def __init__(self, + owner, + ownedByNode: bool=True, + postAllLedgersCaughtUp: + Optional[Callable]=None): + self.owner = owner self.ownedByNode = ownedByNode + self.postAllLedgersCaughtUp = postAllLedgersCaughtUp self.config = getConfig() # Needs to schedule actions. The owner of the manager has the - # responsibility of calling its `_serviceActions` method periodically. + # responsibility of calling its `_serviceActions` method periodically HasActionQueue.__init__(self) - # Holds ledgers of different types with their info like the ledger - # object, various callbacks, state (can be synced, is already synced, - # etc). - self.ledgers = {} # type: Dict[int, Dict[str, Any]] - - # Ledger statuses received while the ledger was not ready to be synced - # (`canSync` was set to False) - self.stashedLedgerStatuses = {} # type: Dict[int, deque] - - # Dict of sets with each set corresponding to a ledger - # Each set tracks which nodes claim that this node's ledger status is ok - # , if a quorum of nodes (2f+1) say its up to date then mark the catchup - # process as completed - self.ledgerStatusOk = {} # type: Dict[int, Set] - - # Consistency proofs received in process of catching up. - # Each element of the dict is the dictionary of consistency proofs - # received for the ledger. For each dictionary key is the node name and - # value is a consistency proof. - self.recvdConsistencyProofs = {} # type: Dict[int, Dict[str, - # ConsistencyProof]] - - self.catchUpTill = {} - - # Catchup replies that need to be applied to the ledger. First element - # of the list is a list of transactions that need to be applied to the - # pool transaction ledger and the second element is the list of - # transactions that need to be applied to the domain transaction ledger - self.receivedCatchUpReplies = {} # type: Dict[int, List] - - # Keep track of received replies from different senders - self.recvdCatchupRepliesFrm = {} - # type: Dict[int, Dict[str, List[CatchupRep]]] - - # Tracks the beginning of consistency proof timer. Timer starts when the - # node gets f+1 consistency proofs. If the node is not able to begin - # the catchup process even after the timer expires then it requests - # consistency proofs. - self.consistencyProofsTimers = {} - # type: Dict[int, Optional[float]] - - # Tracks the beginning of catchup reply timer. Timer starts after the - # node sends catchup requests. If the node is not able to finish the - # the catchup process even after the timer expires then it requests - # missing transactions. - self.catchupReplyTimers = {} - # type: Dict[int, Optional[float]] + # Holds ledgers of different types with + # their info like callbacks, state, etc + self.ledgerRegistry = {} # type: Dict[int, LedgerInfo] + + # Largest Pre-Prepare sequence number received during catchup. + # This field is needed to discard any stashed 3PC messages or + # ordered messages since the transactions part of those messages + # will be applied when they are received through the catchup process + self.lastCaughtUpPpSeqNo = -1 def __repr__(self): return self.owner.name @@ -89,164 +56,168 @@ def __repr__(self): def service(self): return self._serviceActions() - def addLedger(self, typ: int, ledger: Ledger, + def addLedger(self, iD: int, ledger: Ledger, preCatchupStartClbk: Callable=None, postCatchupStartClbk: Callable=None, preCatchupCompleteClbk: Callable=None, postCatchupCompleteClbk: Callable=None, postTxnAddedToLedgerClbk: Callable=None): - if typ in self.ledgers: - logger.error("{} already present in ledgers so cannot replace that " - "ledger".format(typ)) + + if iD in self.ledgerRegistry: + logger.error("{} already present in ledgers " + "so cannot replace that ledger".format(iD)) return - self.ledgers[typ] = { - "ledger": ledger, - "state": LedgerState.not_synced, - "canSync": False, - "preCatchupStartClbk": preCatchupStartClbk, - "postCatchupStartClbk": postCatchupStartClbk, - "preCatchupCompleteClbk": preCatchupCompleteClbk, - "postCatchupCompleteClbk": postCatchupCompleteClbk, - "postTxnAddedToLedgerClbk": postTxnAddedToLedgerClbk, - "verifier": MerkleVerifier(ledger.hasher) - } - self.stashedLedgerStatuses[typ] = deque() - self.ledgerStatusOk[typ] = set() - self.recvdConsistencyProofs[typ] = {} - self.catchUpTill[typ] = None - self.receivedCatchUpReplies[typ] = [] - self.recvdCatchupRepliesFrm[typ] = {} - self.consistencyProofsTimers[typ] = None - self.catchupReplyTimers[typ] = None - - def checkIfCPsNeeded(self, ledgerType): - if self.consistencyProofsTimers[ledgerType] is not None: - logger.debug("{} requesting consistency proofs of {} after timeout". - format(self, ledgerType)) - adjustedF = getMaxFailures(self.owner.totalNodes - 1) - recvdConsProof = self.recvdConsistencyProofs[ledgerType] - grpdPrf, nullProofs = self._groupConsistencyProofs(recvdConsProof) - if nullProofs > adjustedF: - return - result = self._latestReliableProof(grpdPrf, - self.ledgers[ledgerType][ - "ledger"]) - if not result: - cpReq = self.getConsistencyProofRequest(ledgerType, grpdPrf) - logger.debug("{} sending consistency proof request: {}". - format(self, cpReq)) - self.send(cpReq) - - self.recvdConsistencyProofs[ledgerType] = {} - self.consistencyProofsTimers[ledgerType] = None - self.recvdCatchupRepliesFrm[ledgerType] = {} - - def checkIfTxnsNeeded(self, ledgerType): - if self.catchupReplyTimers[ledgerType] is not None: - catchupTill = self.catchUpTill[ledgerType] - start, end = getattr(catchupTill, f.SEQ_NO_START.nm), \ - getattr(catchupTill, f.SEQ_NO_END.nm) - ledger = self.ledgers[ledgerType]["ledger"] - catchUpReplies = self.receivedCatchUpReplies[ledgerType] - totalMissing = (end - ledger.size) - len(catchUpReplies) - - if totalMissing: - logger.debug( - "{} requesting {} missing transactions after timeout". - format(self, totalMissing)) - eligibleNodes = list(self.nodestack.conns - - self.blacklistedNodes) - # Shuffling order of nodes so that catchup requests dont go to - # the same nodes. This is done to avoid scenario where a node - # does not reply at all. - # TODO: Need some way to detect nodes that are not responding. - - # TODO: What id all nodes are blacklisted so `eligibleNodes` - # is empty? It will lead to divide by 0. This should not happen - # but its happening. - # https://www.pivotaltracker.com/story/show/130602115 - if not eligibleNodes: - logger.error("{} could not find any node to request " - "transactions from. Catchup process cannot " - "move ahead.".format(self)) - return - shuffle(eligibleNodes) - batchSize = math.ceil(totalMissing/len(eligibleNodes)) - cReqs = [] - lastSeenSeqNo = ledger.size - leftMissing = totalMissing - - def addReqsForMissing(frm, to): - # Add Catchup requests for missing transactions. `frm` and - # `to` are inclusive - missing = to - frm + 1 - numBatches = math.ceil(missing / batchSize) - for i in range(numBatches): - s = frm + (i * batchSize) - e = min(to, frm + ((i + 1) * batchSize) - 1) - req = CatchupReq(ledgerType, s, e, end) - logger.debug("{} creating catchup request {} to {} till {}". - format(self, s, e, end)) - cReqs.append(req) - return missing - - for seqNo, txn in catchUpReplies: - if (seqNo - lastSeenSeqNo) != 1: - missing = addReqsForMissing(lastSeenSeqNo+1, seqNo-1) - leftMissing -= missing - lastSeenSeqNo = seqNo - - # If still missing some transactions from request has not been - # sent then either `catchUpReplies` was empty or it did not have - # transactions till `end` - if leftMissing > 0: - logger.debug("{} still missing {} transactions after " - "looking at receivedCatchUpReplies". - format(self, leftMissing)) - # `catchUpReplies` was empty - if lastSeenSeqNo == ledger.size: - missing = addReqsForMissing(ledger.size+1, end) - leftMissing -= missing - # did not have transactions till `end` - elif lastSeenSeqNo != end: - missing = addReqsForMissing(lastSeenSeqNo + 1, end) - leftMissing -= missing - else: - logger.error("{} still missing {} transactions. " - "Something happened which was not thought " - "of. {} {} {}" - .format(self, leftMissing, start, end, - lastSeenSeqNo)) - if leftMissing: - logger.error( - "{} still missing {} transactions. {} {} {}" - .format(self, leftMissing, start, end, - lastSeenSeqNo)) - - numElgNodes = len(eligibleNodes) - for i, req in enumerate(cReqs): - nodeName = eligibleNodes[i%numElgNodes] - self.send(req, self.nodestack.getRemote(nodeName).uid) - self.catchupReplyTimers[ledgerType] = time.perf_counter() - timeout = self._getCatchupTimeout(len(cReqs), batchSize) - self._schedule(partial(self.checkIfTxnsNeeded, ledgerType), - timeout) - else: - self.catchupReplyTimers[ledgerType] = None - def setLedgerState(self, typ: int, state: LedgerState): - if typ not in self.ledgers: - logger.error("ledger type {} not present in ledgers so cannot set " - "state".format(typ)) + self.ledgerRegistry[iD] = LedgerInfo( + ledger=ledger, + state=LedgerState.not_synced, + canSync=False, + preCatchupStartClbk=preCatchupStartClbk, + postCatchupStartClbk=postCatchupStartClbk, + preCatchupCompleteClbk=preCatchupCompleteClbk, + postCatchupCompleteClbk=postCatchupCompleteClbk, + postTxnAddedToLedgerClbk=postTxnAddedToLedgerClbk, + verifier=MerkleVerifier(ledger.hasher) + ) + + def checkIfCPsNeeded(self, ledgerId): + # TODO: this one not just checks it also initiates + # consistency proof exchange process + # It should be renamed or splat on two different methods + + ledgerInfo = self.getLedgerInfoByType(ledgerId) + + if ledgerInfo.consistencyProofsTimer is None: return - self.ledgers[typ]["state"] = state - def setLedgerCanSync(self, typ: int, canSync: bool): - if typ not in self.ledgers: - logger.error("ledger type {} not present in ledgers so cannot set " - "state".format(typ)) + logger.debug("{} requesting consistency " + "proofs after timeout".format(self)) + + adjustedF = getMaxFailures(self.owner.totalNodes - 1) + proofs = ledgerInfo.recvdConsistencyProofs + groupedProofs, nullProofs = self._groupConsistencyProofs(proofs) + if nullProofs > adjustedF: + return + result = self._latestReliableProof(groupedProofs, ledgerInfo.ledger) + if not result: + cpReq = self.getConsistencyProofRequest(ledgerId, groupedProofs) + logger.debug("{} sending consistency proof request: {}". + format(self, cpReq)) + self.send(cpReq) + ledgerInfo.recvdConsistencyProofs = {} + ledgerInfo.consistencyProofsTimer = None + ledgerInfo.recvdCatchupRepliesFrm = {} + + def checkIfTxnsNeeded(self, ledgerId): + + ledgerInfo = self.ledgerRegistry.get(ledgerId) + ledger = ledgerInfo.ledger + if ledgerInfo.catchupReplyTimer is None: + return + + start = getattr(ledgerInfo.catchUpTill, f.SEQ_NO_START.nm) + end = getattr(ledgerInfo.catchUpTill, f.SEQ_NO_END.nm) + + catchUpReplies = ledgerInfo.receivedCatchUpReplies + totalMissing = (end - ledger.size) - len(catchUpReplies) + + if totalMissing == 0: + ledgerInfo.catchupReplyTimer = None + return + + logger.debug("{} requesting {} missing transactions " + "after timeout".format(self, totalMissing)) + eligibleNodes = list(self.nodestack.conns - + self.blacklistedNodes) + + if not eligibleNodes: + # TODO: What if all nodes are blacklisted so `eligibleNodes` + # is empty? It will lead to divide by 0. This should not happen + # but its happening. + # https://www.pivotaltracker.com/story/show/130602115 + logger.error("{} could not find any node to request " + "transactions from. Catchup process cannot " + "move ahead.".format(self)) + return + + # Shuffling order of nodes so that catchup requests don't go to + # the same nodes. This is done to avoid scenario where a node + # does not reply at all. + # TODO: Need some way to detect nodes that are not responding. + shuffle(eligibleNodes) + batchSize = math.ceil(totalMissing/len(eligibleNodes)) + cReqs = [] + lastSeenSeqNo = ledger.size + leftMissing = totalMissing + + def addReqsForMissing(frm, to): + # Add Catchup requests for missing transactions. + # `frm` and `to` are inclusive + missing = to - frm + 1 + numBatches = int(math.ceil(missing / batchSize)) + for i in range(numBatches): + s = frm + (i * batchSize) + e = min(to, frm + ((i + 1) * batchSize) - 1) + req = CatchupReq(ledgerId, s, e, end) + logger.debug("{} creating catchup request {} to {} till {}". + format(self, s, e, end)) + cReqs.append(req) + return missing + + for seqNo, txn in catchUpReplies: + if (seqNo - lastSeenSeqNo) != 1: + missing = addReqsForMissing(lastSeenSeqNo+1, seqNo-1) + leftMissing -= missing + lastSeenSeqNo = seqNo + + # If still missing some transactions from request has not been + # sent then either `catchUpReplies` was empty or it did not have + # transactions till `end` + if leftMissing > 0: + logger.debug("{} still missing {} transactions after " + "looking at receivedCatchUpReplies". + format(self, leftMissing)) + # `catchUpReplies` was empty + if lastSeenSeqNo == ledger.size: + missing = addReqsForMissing(ledger.size+1, end) + leftMissing -= missing + # did not have transactions till `end` + elif lastSeenSeqNo != end: + missing = addReqsForMissing(lastSeenSeqNo + 1, end) + leftMissing -= missing + else: + logger.error("{} still missing {} transactions. " + "Something happened which was not thought " + "of. {} {} {}" + .format(self, leftMissing, start, end, + lastSeenSeqNo)) + if leftMissing: + logger.error("{} still missing {} transactions. {} {} {}" + .format(self, leftMissing, + start, end, lastSeenSeqNo)) + + numElgNodes = len(eligibleNodes) + for i, req in enumerate(cReqs): + nodeName = eligibleNodes[i%numElgNodes] + self.send(req, self.nodestack.getRemote(nodeName).uid) + + ledgerInfo.catchupReplyTimer = time.perf_counter() + timeout = int(self._getCatchupTimeout(len(cReqs), batchSize)) + self._schedule(partial(self.checkIfTxnsNeeded, ledgerId), timeout) + + def setLedgerState(self, ledgerType: int, state: LedgerState): + if ledgerType not in self.ledgerRegistry: + logger.error("ledger type {} not present in ledgers so " + "cannot set state".format(ledgerType)) return - self.ledgers[typ]["canSync"] = canSync + self.getLedgerInfoByType(ledgerType).state = state + + def setLedgerCanSync(self, ledgerType: int, canSync: bool): + if ledgerType not in self.ledgerRegistry: + logger.error("ledger type {} not present in ledgers so " + "cannot set state".format(ledgerType)) + return + self.getLedgerInfoByType(ledgerType).canSync = canSync def processLedgerStatus(self, status: LedgerStatus, frm: str): logger.debug("{} received ledger status: {} from {}". @@ -262,7 +233,7 @@ def processLedgerStatus(self, status: LedgerStatus, frm: str): logger.debug("{} found ledger status to be null from {}". format(self, frm)) return - ledgerType = getattr(status, f.LEDGER_TYPE.nm) + ledgerId = getattr(status, f.LEDGER_ID.nm) # If this is a node's ledger manager and sender of this ledger status # is a client and its pool ledger is same as this node's pool ledger @@ -270,103 +241,153 @@ def processLedgerStatus(self, status: LedgerStatus, frm: str): # consistency proof: statusFromClient = self.getStack(frm) == self.clientstack if self.ownedByNode and statusFromClient: - if ledgerType != 0: - logger.debug("{} received inappropriate ledger status {} from " - "client {}".format(self, status, frm)) + if ledgerId != POOL_LEDGER_ID: + logger.debug("{} received inappropriate " + "ledger status {} from client {}" + .format(self, status, frm)) return - else: - if self.isLedgerSame(ledgerStatus): - ledger = self.ledgers[0]["ledger"] - ledgerStatus = LedgerStatus(0, ledger.size, - ledger.root_hash) - self.sendTo(ledgerStatus, frm) + if self.isLedgerSame(ledgerStatus): + ledgerInfo = self.getLedgerInfoByType(POOL_LEDGER_ID) + poolLedger = ledgerInfo.ledger + ledgerStatus = LedgerStatus(POOL_LEDGER_ID, + poolLedger.size, + poolLedger.root_hash) + self.sendTo(ledgerStatus, frm) # If a ledger is yet to sync and cannot sync right now, # then stash the ledger status to be processed later - if self.ledgers[ledgerType]["state"] != LedgerState.synced and \ - not self.ledgers[ledgerType]["canSync"]: - self.stashLedgerStatus(ledgerType, status, frm) + ledgerInfo = self.getLedgerInfoByType(ledgerId) + if ledgerInfo.state != LedgerState.synced and not ledgerInfo.canSync: + self.stashLedgerStatus(ledgerId, status, frm) return # If this manager is owned by a node and the node's ledger is ahead of # the received ledger status if self.ownedByNode and self.isLedgerNew(ledgerStatus): consistencyProof = self.getConsistencyProof(ledgerStatus) + if not consistencyProof: + return None self.sendTo(consistencyProof, frm) - if not self.isLedgerOld(ledgerStatus) and not statusFromClient: - # This node's ledger is not older so it will not receive a - # consistency proof unless the other node processes a transaction - # post sending this ledger status - self.recvdConsistencyProofs[ledgerType][frm] = None - self.ledgerStatusOk[ledgerType].add(frm) - if len(self.ledgerStatusOk[ledgerType]) == 2*self.owner.f: - logger.debug("{} found out from {} that its ledger of type {} " - "is latest". - format(self, self.ledgerStatusOk[ledgerType], - ledgerType)) - if self.ledgers[ledgerType]["state"] != LedgerState.synced: - self.catchupCompleted(ledgerType) + if self.isLedgerOld(ledgerStatus): + if ledgerInfo.state == LedgerState.synced: + self.setLedgerCanSync(ledgerId, True) + ledger = self.getLedgerForMsg(ledgerStatus) + ledgerStatus = LedgerStatus(ledgerId, + ledger.size, + ledger.root_hash) + self.sendTo(ledgerStatus, frm) + return + + if statusFromClient: + return + + # This node's ledger is not older so it will not receive a + # consistency proof unless the other node processes a transaction + # post sending this ledger status + ledgerInfo.recvdConsistencyProofs[frm] = None + ledgerInfo.ledgerStatusOk.add(frm) + if len(ledgerInfo.ledgerStatusOk) == 2 * self.owner.f: + logger.debug("{} found out from {} that its " + "ledger of type {} is latest". + format(self, ledgerInfo.ledgerStatusOk, ledgerId)) + if ledgerInfo.state != LedgerState.synced: + self.catchupCompleted(ledgerId) def processConsistencyProof(self, proof: ConsistencyProof, frm: str): logger.debug("{} received consistency proof: {} from {}". format(self, proof, frm)) - ledgerType = getattr(proof, f.LEDGER_TYPE.nm) + ledgerId = getattr(proof, f.LEDGER_ID.nm) + ledgerInfo = self.getLedgerInfoByType(ledgerId) + ledgerInfo.recvdConsistencyProofs[frm] = ConsistencyProof(*proof) + if self.canProcessConsistencyProof(proof): - self.recvdConsistencyProofs[ledgerType][frm] = \ - ConsistencyProof(*proof) - canCatchup, catchUpFrm = self.canStartCatchUpProcess(ledgerType) + canCatchup, catchUpFrm = self.canStartCatchUpProcess(ledgerId) if canCatchup: - self.startCatchUpProcess(ledgerType, catchUpFrm) + self.startCatchUpProcess(ledgerId, catchUpFrm) def canProcessConsistencyProof(self, proof: ConsistencyProof) -> bool: - ledgerType = getattr(proof, f.LEDGER_TYPE.nm) - if self.ledgers[ledgerType]["state"] == LedgerState.not_synced and \ - self.ledgers[ledgerType]["canSync"]: - start, end = getattr(proof, f.SEQ_NO_START.nm), \ - getattr(proof, f.SEQ_NO_END.nm) - # TODO: Should we discard where start is older than the ledger size - ledgerSize = self.ledgers[ledgerType]["ledger"].size - if start > ledgerSize: - self.discard(proof, reason="Start {} is greater than " - "ledger size {}". - format(start, ledgerSize), - logMethod=logger.warn) - return False - elif end <= start: - self.discard(proof, reason="End {} is not greater than " - "start {}".format(end, start), - logMethod=logger.warn) + ledgerId = getattr(proof, f.LEDGER_ID.nm) + ledgerInfo = self.getLedgerInfoByType(ledgerId) + if not ledgerInfo.canSync: + logger.debug("{} cannot process consistency " + "proof since canSync is {}" + .format(self, ledgerInfo.canSync)) + return False + if ledgerInfo.state == LedgerState.syncing: + logger.debug("{} cannot process consistency " + "proof since ledger state is {}" + .format(self, ledgerInfo.state)) + return False + if ledgerInfo.state == LedgerState.synced: + if not self.checkLedgerIsOutOfSync(ledgerInfo): + logger.debug("{} cannot process consistency " + "proof since in state {} and not enough " + "CPs received" + .format(self, ledgerInfo.state)) return False - else: - return True - else: - logger.debug("{} cannot process consistency proof since in state {}" - " and canSync is {}". - format(self, self.ledgers[ledgerType]["state"], - self.ledgers[ledgerType]["canSync"])) + logger.debug("{} is out of sync (based on CPs {} and total " + "node cnt {}) -> updating ledger" + " state from {} to {}" + .format(self, ledgerInfo.recvdConsistencyProofs, + self.owner.totalNodes, + ledgerInfo.state, LedgerState.not_synced)) + self.setLedgerState(ledgerId, LedgerState.not_synced) + if ledgerId == DOMAIN_LEDGER_ID: + ledgerInfo.preCatchupStartClbk() + return self.canProcessConsistencyProof(proof) + + start = getattr(proof, f.SEQ_NO_START.nm) + end = getattr(proof, f.SEQ_NO_END.nm) + # TODO: Should we discard where start is older than the ledger size + ledgerSize = ledgerInfo.ledger.size + if start > ledgerSize: + self.discard(proof, reason="Start {} is greater than " + "ledger size {}". + format(start, ledgerSize), + logMethod=logger.warn) + return False + if end <= start: + self.discard(proof, reason="End {} is not greater than " + "start {}".format(end, start), + logMethod=logger.warn) return False + return True + + def checkLedgerIsOutOfSync(self, ledgerInfo) -> bool: + recvdConsProof = ledgerInfo.recvdConsistencyProofs + # Consider an f value when this node was not connected + currTotalNodes = self.owner.totalNodes - 1 + adjustedF = getMaxFailures(currTotalNodes) + filtered = self._getNotEmptyProofs(recvdConsProof) + return len(filtered) >= (currTotalNodes - adjustedF) def processCatchupReq(self, req: CatchupReq, frm: str): logger.debug("{} received catchup request: {} from {}". format(self, req, frm)) + if not self.ownedByNode: + self.discard(req, reason="Only node can serve catchup requests", + logMethod=logger.warn) + return - start, end = getattr(req, f.SEQ_NO_START.nm), \ - getattr(req, f.SEQ_NO_END.nm) + start = getattr(req, f.SEQ_NO_START.nm) + end = getattr(req, f.SEQ_NO_END.nm) ledger = self.getLedgerForMsg(req) if end < start: self.discard(req, reason="Invalid range", logMethod=logger.warn) return if start > ledger.size: - self.discard(req, reason="{} not able to service since ledger size " - "is {}".format(self, ledger.size), + self.discard(req, reason="{} not able to service since " + "ledger size is {}" + .format(self, ledger.size), logMethod=logger.debug) return # Adjusting for end greater than ledger size if end > ledger.size: - logger.debug("{} does not have transactions till {} so sending only" - " till {}".format(self, end, ledger.size)) + logger.debug("{} does not have transactions till {} " + "so sending only till {}" + .format(self, end, ledger.size)) end = ledger.size # TODO: This is very inefficient for long ledgers @@ -377,51 +398,61 @@ def processCatchupReq(self, req: CatchupReq, frm: str): logger.debug("{} generating consistency proof: {} from {}". format(self, end, req.catchupTill)) - consProof = [b64encode(p).decode() for p in + consProof = [Ledger.hashToStr(p) for p in ledger.tree.consistency_proof(end, req.catchupTill)] - self.sendTo(msg=CatchupRep(getattr(req, f.LEDGER_TYPE.nm), txns, + + for seq_no in txns: + txns[seq_no] = self.owner.update_txn_with_extra_data(txns[seq_no]) + self.sendTo(msg=CatchupRep(getattr(req, f.LEDGER_ID.nm), txns, consProof), to=frm) def processCatchupRep(self, rep: CatchupRep, frm: str): logger.debug("{} received catchup reply from {}: {}". format(self, frm, rep)) - ledgerType = getattr(rep, f.LEDGER_TYPE.nm) txns = self.canProcessCatchupReply(rep) txnsNum = len(txns) if txns else 0 logger.debug("{} found {} transactions in the catchup from {}" .format(self, txnsNum, frm)) - if txns: - ledger = self.getLedgerForMsg(rep) - if frm not in self.recvdCatchupRepliesFrm[ledgerType]: - self.recvdCatchupRepliesFrm[ledgerType][frm] = [] - self.recvdCatchupRepliesFrm[ledgerType][frm].append(rep) - catchUpReplies = self.receivedCatchUpReplies[ledgerType] - # Creating a list of txns sorted on the basis of sequence - # numbers - logger.debug("{} merging all received catchups".format(self)) - catchUpReplies = list(heapq.merge(catchUpReplies, txns, - key=operator.itemgetter(0))) - logger.debug( - "{} merged catchups, there are {} of them now, from {} to {}" - .format(self, len(catchUpReplies), catchUpReplies[0][0], - catchUpReplies[-1][0])) - - numProcessed = self._processCatchupReplies(ledgerType, ledger, - catchUpReplies) - logger.debug( - "{} processed {} catchup replies with sequence numbers {}" - .format(self, numProcessed, [seqNo for seqNo, _ in - catchUpReplies[ - :numProcessed]])) - self.receivedCatchUpReplies[ledgerType] = \ - catchUpReplies[numProcessed:] - if getattr(self.catchUpTill[ledgerType], f.SEQ_NO_END.nm) == \ - ledger.size: - self.catchUpTill[ledgerType] = None - self.catchupCompleted(ledgerType) - - def _processCatchupReplies(self, ledgerType, ledger: Ledger, + if not txns: + return + + ledgerId = getattr(rep, f.LEDGER_ID.nm) + ledger = self.getLedgerInfoByType(ledgerId) + + reallyLedger = self.getLedgerForMsg(rep) + + if frm not in ledger.recvdCatchupRepliesFrm: + ledger.recvdCatchupRepliesFrm[frm] = [] + + ledger.recvdCatchupRepliesFrm[frm].append(rep) + + catchUpReplies = ledger.receivedCatchUpReplies + # Creating a list of txns sorted on the basis of sequence + # numbers + logger.debug("{} merging all received catchups".format(self)) + catchUpReplies = list(heapq.merge(catchUpReplies, txns, + key=operator.itemgetter(0))) + logger.debug( + "{} merged catchups, there are {} of them now, from {} to {}" + .format(self, len(catchUpReplies), catchUpReplies[0][0], + catchUpReplies[-1][0])) + + numProcessed = self._processCatchupReplies(ledgerId, reallyLedger, + catchUpReplies) + logger.debug( + "{} processed {} catchup replies with sequence numbers {}" + .format(self, numProcessed, [seqNo for seqNo, _ in + catchUpReplies[ + :numProcessed]])) + + ledger.receivedCatchUpReplies = catchUpReplies[numProcessed:] + if getattr(ledger.catchUpTill, f.SEQ_NO_END.nm) == reallyLedger.size: + cp = ledger.catchUpTill + ledger.catchUpTill = None + self.catchupCompleted(ledgerId, cp.ppSeqNo) + + def _processCatchupReplies(self, ledgerId, ledger: Ledger, catchUpReplies: List): # Removing transactions for sequence numbers are already # present in the ledger @@ -436,16 +467,16 @@ def _processCatchupReplies(self, ledgerType, ledger: Ledger, seqNo = catchUpReplies[0][0] if seqNo - ledger.seqNo == 1: result, nodeName, toBeProcessed = self.hasValidCatchupReplies( - ledgerType, ledger, seqNo, catchUpReplies) + ledgerId, ledger, seqNo, catchUpReplies) if result: + ledgerInfo = self.getLedgerInfoByType(ledgerId) for _, txn in catchUpReplies[:toBeProcessed]: - merkleInfo = ledger.add(txn) + merkleInfo = ledger.add(self._transform(txn)) txn[F.seqNo.name] = merkleInfo[F.seqNo.name] - self.ledgers[ledgerType]["postTxnAddedToLedgerClbk"]( - ledgerType, txn) - self._removePrcdCatchupReply(ledgerType, nodeName, seqNo) + ledgerInfo.postTxnAddedToLedgerClbk(ledgerId, txn) + self._removePrcdCatchupReply(ledgerId, nodeName, seqNo) return numProcessed + toBeProcessed + \ - self._processCatchupReplies(ledgerType, ledger, + self._processCatchupReplies(ledgerId, ledger, catchUpReplies[toBeProcessed:]) else: if self.ownedByNode: @@ -453,7 +484,7 @@ def _processCatchupReplies(self, ledgerType, ledger: Ledger, reason="Sent transactions " "that could not be " "verified") - self._removePrcdCatchupReply(ledgerType, nodeName, + self._removePrcdCatchupReply(ledgerId, nodeName, seqNo) # Invalid transactions have to be discarded so letting # the caller know how many txns have to removed from @@ -461,49 +492,60 @@ def _processCatchupReplies(self, ledgerType, ledger: Ledger, return numProcessed + toBeProcessed return numProcessed - def _removePrcdCatchupReply(self, ledgerType, node, seqNo): - for i, rep in enumerate(self.recvdCatchupRepliesFrm[ledgerType][node]): + def _removePrcdCatchupReply(self, ledgerId, node, seqNo): + ledgerInfo = self.getLedgerInfoByType(ledgerId) + for i, rep in enumerate(ledgerInfo.recvdCatchupRepliesFrm[node]): if str(seqNo) in getattr(rep, f.TXNS.nm): break - self.recvdCatchupRepliesFrm[ledgerType][node].pop(i) + ledgerInfo.recvdCatchupRepliesFrm[node].pop(i) - def hasValidCatchupReplies(self, ledgerType, ledger, seqNo, catchUpReplies): + def _transform(self, txn): + # Certain transactions other than pool ledger might need to be + # transformed to certain format before applying to the ledger + if not self.ownedByNode: + return txn + else: + return self.owner.transform_txn_for_ledger(txn) + + def hasValidCatchupReplies(self, ledgerId, ledger, seqNo, catchUpReplies): # Here seqNo has to be the seqNo of first transaction of # `catchupReplies` - # Creating a temporary tree which will be used to verify consistency - # proof, by inserting transactions. Duplicating a merkle tree is not - # expensive since we are using a compact merkle tree. - tempTree = copy(ledger.tree) - # Get the batch of transactions in the catchup reply which has sequence # number `seqNo` - nodeName, catchupReply = self._getCatchupReplyForSeqNo(ledgerType, + nodeName, catchupReply = self._getCatchupReplyForSeqNo(ledgerId, seqNo) txns = getattr(catchupReply, f.TXNS.nm) - for s, txn in catchUpReplies[:len(txns)]: - # Add only those transaction in the temporary tree from the above - # batch - # Transfers of odcits in RAET converts integer keys to string - if str(s) in txns: - tempTree.append(ledger.serializeLeaf(txn)) + + # Add only those transaction in the temporary tree from the above + # batch + + # Transfers of odcits in RAET converts integer keys to string + txns = [self._transform(txn) for s, txn in catchUpReplies[:len(txns)] + if str(s) in txns] + + # Creating a temporary tree which will be used to verify consistency + # proof, by inserting transactions. Duplicating a merkle tree is not + # expensive since we are using a compact merkle tree. + tempTree = ledger.treeWithAppliedTxns(txns) proof = getattr(catchupReply, f.CONS_PROOF.nm) - verifier = self.ledgers[ledgerType]["verifier"] - cp = self.catchUpTill[ledgerType] + ledgerInfo = self.getLedgerInfoByType(ledgerId) + verifier = ledgerInfo.verifier + cp = ledgerInfo.catchUpTill finalSize = getattr(cp, f.SEQ_NO_END.nm) finalMTH = getattr(cp, f.NEW_MERKLE_ROOT.nm) try: logger.debug("{} verifying proof for {}, {}, {}, {}, {}". format(self, tempTree.tree_size, finalSize, - tempTree.root_hash, b64decode(finalMTH), - [b64decode(p) for p in proof])) + tempTree.root_hash, Ledger.strToHash(finalMTH), + [Ledger.strToHash(p) for p in proof])) verified = verifier.verify_tree_consistency(tempTree.tree_size, finalSize, tempTree.root_hash, - b64decode(finalMTH), - [b64decode(p) for p in + Ledger.strToHash(finalMTH), + [Ledger.strToHash(p) for p in proof]) except Exception as ex: logger.info("{} could not verify catchup reply {} since {}". @@ -511,10 +553,12 @@ def hasValidCatchupReplies(self, ledgerType, ledger, seqNo, catchUpReplies): verified = False return bool(verified), nodeName, len(txns) - def _getCatchupReplyForSeqNo(self, ledgerType, seqNo): + def _getCatchupReplyForSeqNo(self, ledgerId, seqNo): # This is inefficient if we have large number of nodes but since # number of node are always between 60-120, this is ok. - for k, catchupReps in self.recvdCatchupRepliesFrm[ledgerType].items(): + + ledgerInfo = self.getLedgerInfoByType(ledgerId) + for k, catchupReps in ledgerInfo.recvdCatchupRepliesFrm.items(): for rep in catchupReps: txns = getattr(rep, f.TXNS.nm) # Transfers of odcits in RAET converts integer keys to string @@ -524,10 +568,17 @@ def _getCatchupReplyForSeqNo(self, ledgerType, seqNo): def processConsistencyProofReq(self, req: ConsProofRequest, frm: str): logger.debug("{} received consistency proof request: {} from {}". format(self, req, frm)) - ledgerType = getattr(req, f.LEDGER_TYPE.nm) + if not self.ownedByNode: + self.discard(req, + reason='Only nodes can service this request', + logMethod=logger.warning) + return + + ledgerId = getattr(req, f.LEDGER_ID.nm) seqNoStart = getattr(req, f.SEQ_NO_START.nm) seqNoEnd = getattr(req, f.SEQ_NO_END.nm) - consistencyProof = self._buildConsistencyProof(ledgerType, seqNoStart, + consistencyProof = self._buildConsistencyProof(ledgerId, + seqNoStart, seqNoEnd) # TODO: Build a test for this scenario where a node cannot service a # consistency proof request @@ -535,62 +586,68 @@ def processConsistencyProofReq(self, req: ConsProofRequest, frm: str): self.sendTo(consistencyProof, frm) def canProcessCatchupReply(self, catchupReply: CatchupRep) -> List[Tuple]: - ledgerType = getattr(catchupReply, f.LEDGER_TYPE.nm) - if self.ledgers[ledgerType]["state"] == LedgerState.syncing: - ledger = self.getLedgerForMsg(catchupReply) - # Not relying on a node sending txns in order of sequence no - txns = sorted([(int(s), t) for (s, t) in - getattr(catchupReply, f.TXNS.nm).items()], - key=operator.itemgetter(0)) - anyNew = any([s > ledger.size for s, _ in txns]) - # The transactions should be contiguous in terms of sequence numbers - noGapsOrDups = len(txns) == 0 or \ - (len(txns) == (txns[-1][0] - txns[0][0] + 1)) - if not anyNew: - self.discard(catchupReply, - reason="ledger has size {} and it already contains" - " all transactions in the reply". - format(ledger.size), logMethod=logger.info) - if not noGapsOrDups: - self.discard(catchupReply, - reason="contains duplicates or gaps", - logMethod=logger.info) - if anyNew and noGapsOrDups: - return txns - else: + ledgerId = getattr(catchupReply, f.LEDGER_ID.nm) + ledgerState = self.getLedgerInfoByType(ledgerId).state + if ledgerState != LedgerState.syncing: logger.debug("{} cannot process catchup reply {} since ledger " "is in state {}". - format(self, catchupReply, - self.ledgers[ledgerType]["state"])) + format(self, catchupReply, ledgerState)) + return [] + + ledger = self.getLedgerForMsg(catchupReply) + # Not relying on a node sending txns in order of sequence no + txns = sorted([(int(s), t) for (s, t) in + getattr(catchupReply, f.TXNS.nm).items()], + key=operator.itemgetter(0)) + anyNew = any([s > ledger.size for s, _ in txns]) + # The transactions should be contiguous in terms of sequence numbers + noGapsOrDups = len(txns) == 0 or \ + (len(txns) == (txns[-1][0] - txns[0][0] + 1)) + if not anyNew: + self.discard(catchupReply, + reason="ledger has size {} and it already contains" + " all transactions in the reply". + format(ledger.size), logMethod=logger.info) + if not noGapsOrDups: + self.discard(catchupReply, + reason="contains duplicates or gaps", + logMethod=logger.info) + if anyNew and noGapsOrDups: + return txns # ASSUMING NO MALICIOUS NODES # Assuming that all nodes have the same state of the system and no node # is lagging behind. So if two new nodes are added in quick succession in a # high traffic environment, this logic is faulty - def canStartCatchUpProcess(self, ledgerType: int): - recvdConsProof = self.recvdConsistencyProofs[ledgerType] + def canStartCatchUpProcess(self, ledgerId: int): + ledgerInfo = self.getLedgerInfoByType(ledgerId) + recvdConsProof = ledgerInfo.recvdConsistencyProofs # Consider an f value when this node was not connected adjustedF = getMaxFailures(self.owner.totalNodes - 1) if len(recvdConsProof) == (adjustedF+1): - self.consistencyProofsTimers[ledgerType] = time.perf_counter() - self._schedule(partial(self.checkIfCPsNeeded, ledgerType), - self.config.ConsistencyProofsTimeout*( + # At least once correct node believes that this node is behind. + + # Start timer that will expire in some time and if till that time + # enough CPs are not received, then explicitly request CPs + # from other nodes, see `checkIfCPsNeeded` + + ledgerInfo.consistencyProofsTimer = time.perf_counter() + self._schedule(partial(self.checkIfCPsNeeded, ledgerId), + self.config.ConsistencyProofsTimeout * ( self.owner.totalNodes - 1)) - if len(recvdConsProof) > 2*adjustedF: + if len(recvdConsProof) > 2 * adjustedF: logger.debug("{} deciding on the basis of CPs {} and f {}". format(self, recvdConsProof, adjustedF)) grpdPrf, nullProofs = self._groupConsistencyProofs(recvdConsProof) - # If more than f nodes were found to be at the same state then this # node's state is good too if nullProofs > adjustedF: return True, None - result = self._latestReliableProof(grpdPrf, - self.ledgers[ledgerType]["ledger"]) + ledgerInfo.ledger) + cp = ConsistencyProof(ledgerId, *result) if result else None + return bool(result),cp - return bool(result), (None if not result else ConsistencyProof( - ledgerType, *result)) logger.debug("{} cannot start catchup since received only {} " "consistency proofs but need at least {}". format(self, len(recvdConsProof), 2*adjustedF + 1)) @@ -607,7 +664,8 @@ def _groupConsistencyProofs(self, proofs): getattr(proof, f.SEQ_NO_END.nm) if (start, end) not in recvdPrf: recvdPrf[(start, end)] = {} - key = (getattr(proof, f.OLD_MERKLE_ROOT.nm), + key = (getattr(proof, f.PP_SEQ_NO.nm), + getattr(proof, f.OLD_MERKLE_ROOT.nm), getattr(proof, f.NEW_MERKLE_ROOT.nm), tuple(getattr(proof, f.HASHES.nm))) recvdPrf[(start, end)][key] = recvdPrf[(start, end)]. \ @@ -622,9 +680,10 @@ def _reliableProofs(self, groupedProofs): adjustedF = getMaxFailures(self.owner.totalNodes - 1) result = {} for (start, end), val in groupedProofs.items(): - for (oldRoot, newRoot, hashes), count in val.items(): + for (lastPpSeqNo, oldRoot, newRoot, hashes), count in val.items(): if count > adjustedF: - result[(start, end)] = (oldRoot, newRoot, hashes) + result[(start, end)] = (lastPpSeqNo, oldRoot, newRoot, + hashes) # There would be only one correct proof for a range of # sequence numbers break @@ -633,7 +692,8 @@ def _reliableProofs(self, groupedProofs): def _latestReliableProof(self, groupedProofs, ledger): reliableProofs = self._reliableProofs(groupedProofs) latest = None - for (start, end), (oldRoot, newRoot, hashes) in reliableProofs.items(): + for (start, end), (lastPpSeqNo, oldRoot, newRoot, hashes) in \ + reliableProofs.items(): # TODO: Can we do something where consistency proof's start is older # than the current ledger's size and proof's end is larger # than the current ledger size. @@ -641,66 +701,85 @@ def _latestReliableProof(self, groupedProofs, ledger): if start != ledger.size: continue if latest is None: - latest = (start, end) + (oldRoot, newRoot, hashes) + latest = (start, end) + (lastPpSeqNo, oldRoot, newRoot, hashes) elif latest[1] < end: - latest = (start, end) + (oldRoot, newRoot, hashes) + latest = (start, end) + (lastPpSeqNo, oldRoot, newRoot, hashes) return latest - def getConsistencyProofRequest(self, ledgerType, groupedProofs): + def getConsistencyProofRequest(self, ledgerId, groupedProofs): # Choose the consistency proof which occurs median number of times in # grouped proofs. Not choosing the highest since some malicious nodes # might be sending non-existent sequence numbers and not choosing the # lowest since that might not be enough as some nodes must be lagging # behind a lot or some malicious nodes might send low sequence numbers. proofs = sorted(groupedProofs.items(), key=lambda t: max(t[1].values())) - ledger = self.ledgers[ledgerType]["ledger"] - return ConsProofRequest(ledgerType, ledger.size, proofs[len(proofs)//2][0][1]) + ledger = self.getLedgerInfoByType(ledgerId).ledger + return ConsProofRequest(ledgerId, + ledger.size, + proofs[len(proofs) // 2][0][1]) - def startCatchUpProcess(self, ledgerType: int, proof: ConsistencyProof): + def startCatchUpProcess(self, ledgerId: int, proof: ConsistencyProof): logger.debug("{} started catching up with consistency proof {}". format(self, proof)) - if ledgerType not in self.ledgers: + if ledgerId not in self.ledgerRegistry: self.discard(proof, reason="Unknown ledger type {}". - format(ledgerType)) + format(ledgerId)) return - self.ledgers[ledgerType]["state"] = LedgerState.syncing - self.consistencyProofsTimers[ledgerType] = None - self.recvdConsistencyProofs[ledgerType] = {} - if proof is not None: - self.ledgers[ledgerType]["state"] = LedgerState.syncing - p = ConsistencyProof(*proof) - rids = [self.nodestack.getRemote(nm).uid for nm in - self.nodestack.conns] - reqs = self.getCatchupReqs(p) - for req in zip(reqs, rids): - self.send(*req) - self.catchUpTill[ledgerType] = p - if reqs: - self.catchupReplyTimers[ledgerType] = time.perf_counter() - timeout = self._getCatchupTimeout( - len(reqs), - getattr(reqs[0], f.SEQ_NO_END.nm) - - getattr(reqs[0], f.SEQ_NO_START.nm) + 1) - self._schedule(partial(self.checkIfTxnsNeeded, ledgerType), - timeout) - else: - self.catchupCompleted(ledgerType) + + if proof is None: + self.catchupCompleted(ledgerId) + return + + ledgerInfo = self.getLedgerInfoByType(ledgerId) + ledgerInfo.state = LedgerState.syncing + ledgerInfo.consistencyProofsTimer = None + ledgerInfo.recvdConsistencyProofs = {} + + p = ConsistencyProof(*proof) + rids = [self.nodestack.getRemote(nm).uid for nm in + self.nodestack.conns] + reqs = self.getCatchupReqs(p) + for req in zip(reqs, rids): + self.send(*req) + ledgerInfo.catchUpTill = p + if reqs: + ledgerInfo.catchupReplyTimer = time.perf_counter() + batchSize = getattr(reqs[0], f.SEQ_NO_END.nm) - \ + getattr(reqs[0], f.SEQ_NO_START.nm) + 1 + timeout = self._getCatchupTimeout(len(reqs), batchSize) + self._schedule(partial(self.checkIfTxnsNeeded, ledgerId), + timeout) def _getCatchupTimeout(self, numRequest, batchSize): return numRequest * (self.config.CatchupTransactionsTimeout + - .1*batchSize) - - def catchupCompleted(self, ledgerType: int): - self.catchupReplyTimers[ledgerType] = None - logger.debug("{} completed catching up ledger {}".format(self, - ledgerType)) - if ledgerType not in self.ledgers: + 0.1 * batchSize) + + def catchupCompleted(self, ledgerId: int, lastPpSeqNo: int=-1): + # Since multiple ledger will be caught up and catchups might happen + # multiple times for a single ledger, the largest seen + # ppSeqNo needs to be known. + if self.lastCaughtUpPpSeqNo < lastPpSeqNo: + self.lastCaughtUpPpSeqNo = lastPpSeqNo + + ledgerInfo = self.getLedgerInfoByType(ledgerId) + ledgerInfo.catchupReplyTimer = None + logger.debug("{} completed catching up ledger {}" + .format(self, ledgerId)) + if ledgerId not in self.ledgerRegistry: logger.error("{} called catchup completed for ledger {}". - format(self, ledgerType)) + format(self, ledgerId)) return - self.ledgers[ledgerType]["canSync"] = False - self.ledgers[ledgerType]["state"] = LedgerState.synced - self.ledgers[ledgerType]["postCatchupCompleteClbk"]() + + ledgerInfo.canSync = False + ledgerInfo.state = LedgerState.synced + ledgerInfo.ledgerStatusOk = set() + ledgerInfo.recvdConsistencyProofs = {} + ledgerInfo.postCatchupCompleteClbk() + + if self.postAllLedgersCaughtUp: + if all(l.state == LedgerState.synced + for l in self.ledgerRegistry.values()): + self.postAllLedgersCaughtUp() def getCatchupReqs(self, consProof: ConsistencyProof): nodeCount = len(self.nodestack.conns) @@ -711,8 +790,8 @@ def getCatchupReqs(self, consProof: ConsistencyProof): s = start + 1 e = min(s + batchLength - 1, end) for i in range(nodeCount): - reqs.append(CatchupReq(getattr(consProof, f.LEDGER_TYPE.nm), - s, e, end)) + req = CatchupReq(getattr(consProof, f.LEDGER_ID.nm), s, e, end) + reqs.append(req) s = e + 1 e = min(s + batchLength - 1, end) if s > end: @@ -721,23 +800,34 @@ def getCatchupReqs(self, consProof: ConsistencyProof): def getConsistencyProof(self, status: LedgerStatus): ledger = self.getLedgerForMsg(status) # type: Ledger - ledgerType = getattr(status, f.LEDGER_TYPE.nm) + ledgerId = getattr(status, f.LEDGER_ID.nm) seqNoStart = getattr(status, f.TXN_SEQ_NO.nm) seqNoEnd = ledger.size - return self._buildConsistencyProof(ledgerType, seqNoStart, seqNoEnd) + return self._buildConsistencyProof(ledgerId, seqNoStart, seqNoEnd) + + def _getNotEmptyProofs(self, proofs): + return [proof for frm, proof in proofs.items() if proof] + + def _buildConsistencyProof(self, ledgerId, seqNoStart, seqNoEnd): + + ledger = self.getLedgerInfoByType(ledgerId).ledger - def _buildConsistencyProof(self, ledgerType, seqNoStart, seqNoEnd): - ledger = self.ledgers[ledgerType]["ledger"] ledgerSize = ledger.size if seqNoStart > ledgerSize: - logger.error("{} cannot build consistency proof from {} since its " - "ledger size is {}".format(self, seqNoStart, - ledgerSize)) + logger.error("{} cannot build consistency proof from {} " + "since its ledger size is {}" + .format(self, seqNoStart, ledgerSize)) return if seqNoEnd > ledgerSize: - logger.error("{} cannot build consistency proof till {} since its " - "ledger size is {}".format(self, seqNoEnd, ledgerSize)) + logger.error("{} cannot build consistency " + "proof till {} since its ledger size is {}" + .format(self, seqNoEnd, ledgerSize)) + return + if seqNoEnd < seqNoStart: + self.error('{} cannot build consistency proof since end {} is ' + 'lesser than start {}'.format(self, seqNoEnd, seqNoStart)) return + if seqNoStart == 0: # Consistency proof for an empty tree cannot exist. Using the root # hash now so that the node which is behind can verify that @@ -747,26 +837,28 @@ def _buildConsistencyProof(self, ledgerType, seqNoStart, seqNoEnd): else: proof = ledger.tree.consistency_proof(seqNoStart, seqNoEnd) oldRoot = ledger.tree.merkle_tree_hash(0, seqNoStart) + newRoot = ledger.tree.merkle_tree_hash(0, seqNoEnd) + ppSeqNo = self.owner.ppSeqNoForTxnSeqNo(ledgerId, seqNoEnd) + logger.debug('{} found ppSeqNo {} for ledger {} seqNo {}'. + format(self, ppSeqNo, ledgerId, seqNoEnd)) return ConsistencyProof( - ledgerType, + ledgerId, seqNoStart, seqNoEnd, - b64encode(oldRoot).decode(), - b64encode(newRoot).decode(), - [b64encode(p).decode() for p in - proof] + ppSeqNo, + Ledger.hashToStr(oldRoot), + Ledger.hashToStr(newRoot), + [Ledger.hashToStr(p) for p in proof] ) def _compareLedger(self, status: LedgerStatus): - ledgerType = getattr(status, f.LEDGER_TYPE.nm) + ledgerId = getattr(status, f.LEDGER_ID.nm) seqNo = getattr(status, f.TXN_SEQ_NO.nm) ledger = self.getLedgerForMsg(status) - logger.debug( - "{} comparing its ledger {} of size {} with {}".format(self, - ledgerType, - ledger.seqNo, - seqNo)) + logger.debug("{} comparing its ledger {} " + "of size {} with {}" + .format(self,ledgerId, ledger.seqNo, seqNo)) return ledger.seqNo - seqNo def isLedgerOld(self, status: LedgerStatus): @@ -779,49 +871,45 @@ def isLedgerSame(self, status: LedgerStatus): return self._compareLedger(status) == 0 def getLedgerForMsg(self, msg: Any) -> Ledger: - typ = getattr(msg, f.LEDGER_TYPE.nm) - if typ in self.ledgers: - return self.ledgers[typ]["ledger"] - else: - self.discard(msg, reason="Invalid ledger msg type") + ledgerType = getattr(msg, f.LEDGER_ID.nm) + if ledgerType in self.ledgerRegistry: + return self.getLedgerInfoByType(ledgerType).ledger + self.discard(msg, reason="Invalid ledger msg type") - def appendToLedger(self, typ: int, txn: Any) -> Dict: - if typ not in self.ledgers: - logger.error("ledger type {} not present in ledgers so cannot add " - "txn".format(typ)) - return - return self.ledgers[typ]["ledger"].append(txn) + def getLedgerInfoByType(self, ledgerType) -> LedgerInfo: + if ledgerType not in self.ledgerRegistry: + raise ValueError("Invalid ledger type: {}".format(ledgerType)) + return self.ledgerRegistry[ledgerType] + + def appendToLedger(self, ledgerId: int, txn: Any) -> Dict: + ledgerInfo = self.getLedgerInfoByType(ledgerId) + return ledgerInfo.ledger.append(txn) - def stashLedgerStatus(self, ledgerType: int, status, frm: str): + def stashLedgerStatus(self, ledgerId: int, status, frm: str): logger.debug("{} stashing ledger status {} from {}". format(self, status, frm)) - self.stashedLedgerStatuses[ledgerType].append((status, frm)) + ledgerInfo = self.getLedgerInfoByType(ledgerId) + ledgerInfo.stashedLedgerStatuses.append((status, frm)) - def processStashedLedgerStatuses(self, ledgerType: int): - if ledgerType not in self.stashedLedgerStatuses: - logger.error("{} cannot process ledger of type {}". - format(self, ledgerType)) - return 0 + def processStashedLedgerStatuses(self, ledgerId: int): + ledgerInfo = self.getLedgerInfoByType(ledgerId) i = 0 - while self.stashedLedgerStatuses[ledgerType]: - msg, frm = self.stashedLedgerStatuses[ledgerType].pop() + while ledgerInfo.stashedLedgerStatuses: + msg, frm = ledgerInfo.stashedLedgerStatuses.pop() i += 1 self.processLedgerStatus(msg, frm) logger.debug("{} processed {} stashed ledger statuses".format(self, i)) return i def getStack(self, remoteName: str): - if self.ownedByNode: - if self.clientstack.hasRemote(remoteName): - return self.clientstack - else: - pass + if self.ownedByNode and self.clientstack.hasRemote(remoteName): + return self.clientstack if self.nodestack.hasRemote(remoteName): return self.nodestack - else: - logger.error("{} cannot find remote with name {}". - format(self, remoteName)) + + logger.error("{} cannot find remote with name {}" + .format(self, remoteName)) def sendTo(self, msg: Any, to: str): stack = self.getStack(to) @@ -844,10 +932,7 @@ def nodestack(self): @property def clientstack(self): - if self.ownedByNode: - return self.owner.clientstack - else: - logger.debug("{} trying to get clientstack".format(self)) + return self.owner.clientstack if self.ownedByNode else None @property def send(self): @@ -861,5 +946,4 @@ def discard(self): def blacklistedNodes(self): if self.ownedByNode: return self.owner.blacklistedNodes - else: - return set() + return set() diff --git a/plenum/common/looper.py b/plenum/common/looper.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/plenum/common/member/member.py b/plenum/common/member/member.py index 745aa762b5..cfa5144bd9 100644 --- a/plenum/common/member/member.py +++ b/plenum/common/member/member.py @@ -13,7 +13,7 @@ def nym_txn(nym, name, verkey=None, role=None, creator=None): txn = { TXN_TYPE: NYM, TARGET_NYM: nym, - TXN_ID: sha256(name.encode()).hexdigest() + #TXN_ID: sha256(name.encode()).hexdigest() } if verkey is not None: txn[VERKEY] = verkey diff --git a/plenum/common/message_processor.py b/plenum/common/message_processor.py index 5c476ff24d..c8e23b1bcc 100644 --- a/plenum/common/message_processor.py +++ b/plenum/common/message_processor.py @@ -28,7 +28,6 @@ def discard(self, msg, reason, logMethod=logging.error, cliOutput=False): logMethod("{} discarding message {}{}".format(self, msg, reason), extra={"cli": cliOutput}) - def toDict(self, msg: Dict) -> Dict: """ Return a dictionary form of the message @@ -53,7 +52,6 @@ def toDict(self, msg: Dict) -> Dict: tmsg = msg return tmsg - def prepForSending(self, msg: Dict, signer: Signer = None) -> Dict: msg = self.toDict(msg) if signer: diff --git a/plenum/common/messages/__init__.py b/plenum/common/messages/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/plenum/common/messages/client_request.py b/plenum/common/messages/client_request.py new file mode 100644 index 0000000000..11ab241ebb --- /dev/null +++ b/plenum/common/messages/client_request.py @@ -0,0 +1,68 @@ +from plenum.common.constants import * +from plenum.common.messages.fields import * +from plenum.common.messages.message_base import MessageValidator + + +class ClientNodeOperationData(MessageValidator): + schema = ( + (NODE_IP, NetworkIpAddressField(optional=True)), + (NODE_PORT, NetworkPortField(optional=True)), + (CLIENT_IP, NetworkIpAddressField(optional=True)), + (CLIENT_PORT, NetworkPortField(optional=True)), + (ALIAS, NonEmptyStringField()), + (SERVICES, IterableField(ChooseField(values=(VALIDATOR,)), optional=True)), + ) + + def _validate_message(self, dct): + required_ha_fields = {NODE_IP, NODE_PORT, CLIENT_IP, CLIENT_PORT} + ha_fields = {f for f in required_ha_fields if f in dct} + if ha_fields and len(ha_fields) != len(required_ha_fields): + self._raise_missed_fields(*list(required_ha_fields - ha_fields)) + + +class ClientNodeOperation(MessageValidator): + schema = ( + (TXN_TYPE, ConstantField(NODE)), + (DATA, ClientNodeOperationData()), + (TARGET_NYM, IdentifierField()), + (VERKEY, VerkeyField(optional=True)), + ) + + +class ClientNYMOperation(MessageValidator): + schema = ( + (TXN_TYPE, ConstantField(NYM)), + (ALIAS, NonEmptyStringField(optional=True)), + (VERKEY, VerkeyField(optional=True)), + (TARGET_NYM, IdentifierField()), + (ROLE, RoleField(optional=True)), + # TODO: validate role using ChooseField, + # do roles list expandable form outer context + ) + + +class ClientOperationField(MessageValidator): + + operations = { + NODE: ClientNodeOperation(), + NYM: ClientNYMOperation(), + } + + def validate(self, dct): + """ + Choose a schema for client request operation and validate + the operation field. If the schema is not found skips validation. + :param dct: an operation field from client request + :return: raises exception if invalid request + """ + if not isinstance(dct, dict): + # TODO this check should be in side of the validator not here + self._raise_invalid_fields('', dct, 'wrong type') + schema_type = dct.get(TXN_TYPE, None) + if not schema_type: + self._raise_missed_fields(TXN_TYPE) + if schema_type in self.operations: + # check only if the schema is defined + op = self.operations[schema_type] + self._validate_fields_with_schema(dct, op.schema) + self._validate_message(dct) diff --git a/plenum/common/messages/fields.py b/plenum/common/messages/fields.py new file mode 100644 index 0000000000..c8abc6eb9c --- /dev/null +++ b/plenum/common/messages/fields.py @@ -0,0 +1,261 @@ +import ipaddress +import json +import base58 +import re + +from plenum.common.constants import DOMAIN_LEDGER_ID, POOL_LEDGER_ID + + +class FieldValidator: + + def validate(self, val): + raise NotImplementedError + + +class FieldBase(FieldValidator): + _base_types = () + + def __init__(self, optional=False, nullable=False): + self.optional = optional + self.nullable = nullable + + def validate(self, val): + if self.nullable and val is None: + return + type_er = self.__type_check(val) + if type_er: + return type_er + return self._specific_validation(val) + + def _specific_validation(self, val): + raise NotImplementedError + + def __type_check(self, val): + if self._base_types is None: + return # type check is disabled + for t in self._base_types: + if isinstance(val, t): + return + return self._wrong_type_msg(val) + + def _wrong_type_msg(self, val): + types_str = ', '.join(map(lambda x: x.__name__, self._base_types)) + return "expected types '{}', got '{}'" \ + "".format(types_str, type(val).__name__) + + +class NonEmptyStringField(FieldBase): + _base_types = (str,) + + def _specific_validation(self, val): + if not val: + return 'empty string' + + +class SignatureField(FieldBase): + _base_types = (str, type(None)) + # TODO do nothing because EmptySignature should be raised somehow + + def _specific_validation(self, val): + return + + +class RoleField(FieldBase): + _base_types = (str, type(None)) + # TODO implement + + def _specific_validation(self, val): + return + + +class NonNegativeNumberField(FieldBase): + + _base_types = (int,) + + def _specific_validation(self, val): + if val < 0: + return 'negative value' + + +class ConstantField(FieldBase): + _base_types = None + + def __init__(self, value, **kwargs): + super().__init__(**kwargs) + self.value = value + + def _specific_validation(self, val): + if val != self.value: + return 'has to be equal {}'.format(self.value) + + +class IterableField(FieldBase): + + _base_types = (list, tuple) + + def __init__(self, inner_field_type: FieldValidator, **kwargs): + self.inner_field_type = inner_field_type + super().__init__(**kwargs) + + def _specific_validation(self, val): + for v in val: + check_er = self.inner_field_type.validate(v) + if check_er: + return check_er + + +class MapField(FieldBase): + _base_types = (dict, ) + + def __init__(self, key_field: FieldBase, value_field: FieldBase, + **kwargs): + super().__init__(**kwargs) + self._key_field = key_field + self._value_field = value_field + + def _specific_validation(self, val): + for k, v in val.items(): + key_error = self._key_field.validate(k) + if key_error: + return key_error + val_error = self._value_field.validate(v) + if val_error: + return val_error + + +class NetworkPortField(FieldBase): + _base_types = (int,) + + def _specific_validation(self, val): + if val < 0 or val > 65535: + return 'network port out of the range 0-65535' + + +class NetworkIpAddressField(FieldBase): + _base_types = (str,) + _non_valid_addresses = ('0.0.0.0', '0:0:0:0:0:0:0:0', '::') + + def _specific_validation(self, val): + invalid_address = False + try: + ipaddress.ip_address(val) + except ValueError: + invalid_address = True + if invalid_address or val in self._non_valid_addresses: + return 'invalid network ip address ({})'.format(val) + + +class ChooseField(FieldBase): + _base_types = None + + def __init__(self, values, **kwargs): + self._possible_values = values + super().__init__(**kwargs) + + def _specific_validation(self, val): + if val not in self._possible_values: + return "expected '{}' unknown value '{}'" \ + "".format(', '.join(map(str, self._possible_values)), val) + + +class LedgerIdField(ChooseField): + _base_types = (int,) + ledger_ids = (POOL_LEDGER_ID, DOMAIN_LEDGER_ID) + + def __init__(self, **kwargs): + super().__init__(self.ledger_ids, **kwargs) + + +class IdentifierField(NonEmptyStringField): + _base_types = (str, ) + # TODO implement the rules + + +class RequestIdentifierField(FieldBase): + _base_types = (list, tuple) + _length = 2 + + def _specific_validation(self, val): + if len(val) != self._length: + return "should have length {}".format(self._length) + idr_error = NonEmptyStringField().validate(val[0]) + if idr_error: + return idr_error + ts_error = TimestampField().validate(val[1]) + if ts_error: + return ts_error + + +class TieAmongField(FieldBase): + _base_types = (list, tuple) + _length = 2 + # TODO eliminate duplication with RequestIdentifierField + + def _specific_validation(self, val): + if len(val) != self._length: + return "should have length {}".format(self._length) + idr_error = NonEmptyStringField().validate(val[0]) + if idr_error: + return idr_error + ts_error = TimestampField().validate(val[1]) + if ts_error: + return ts_error + + +class VerkeyField(FieldBase): + _base_types = (str, ) + # TODO implement the rules + + def _specific_validation(self, val): + return None + + +class HexField(FieldBase): + _base_types = (str, ) + + def __init__(self, length=None, **kwargs): + super().__init__(**kwargs) + self._length = length + + def _specific_validation(self, val): + try: + int(val, 16) + except ValueError: + return "invalid hex number '{}'".format(val) + if self._length is not None and len(val) != self._length: + return "length should be {} length".format(self._length) + + +class MerkleRootField(FieldBase): + _base_types = (str, ) + + # Raw merkle root is 32 bytes length, + # but when it is base58'ed it is 44 bytes + hashSizes = range(43, 46) + alphabet = base58.alphabet + + def _specific_validation(self, val): + if len(val) not in self.hashSizes: + return 'length should be one of {}'.format(self.hashSizes) + if set(val).isdisjoint(self.alphabet): + return 'should not contains chars other than {}' \ + .format(self.alphabet) + + +class TimestampField(FieldBase): + _base_types = (float, int) + + def _specific_validation(self, val): + # TODO finish implementation + if val < 0: + return 'should be a positive number' + + +class JsonField(FieldBase): + _base_types = (str,) + + def _specific_validation(self, val): + try: + json.loads(val) + except json.decoder.JSONDecodeError: + return 'should be valid JSON string' diff --git a/plenum/common/messages/message_base.py b/plenum/common/messages/message_base.py new file mode 100644 index 0000000000..865b2e9a96 --- /dev/null +++ b/plenum/common/messages/message_base.py @@ -0,0 +1,108 @@ +from operator import itemgetter + +import itertools +from typing import Mapping + +from plenum.common.constants import OP_FIELD_NAME +from plenum.common.messages.fields import FieldValidator + + +class MessageValidator(FieldValidator): + + # the schema has to be an ordered iterable because the message class + # can be create with positional arguments __init__(*args) + schema = () + optional = False + + def validate(self, dct): + self._validate_fields_with_schema(dct, self.schema) + self._validate_message(dct) + + def _validate_fields_with_schema(self, dct, schema): + if not isinstance(dct, dict): + # TODO raise invalid type exception + self._raise_invalid_fields('', dct, 'wrong type') + schema_dct = dict(schema) + required_fields = filter(lambda x: not x[1].optional, schema) + required_field_names = map(lambda x: x[0], required_fields) + missed_required_fields = set(required_field_names) - set(dct) + if missed_required_fields: + self._raise_missed_fields(*missed_required_fields) + for k, v in dct.items(): + if k not in schema_dct: + self._raise_unknown_fields(k, v) + validation_error = schema_dct[k].validate(v) + if validation_error: + self._raise_invalid_fields(k, v, validation_error) + + def _validate_message(self, dct): + return None + + def _raise_missed_fields(self, *fields): + raise TypeError("validation error: missed fields " + "{}".format(', '.join(map(str, fields)))) + + def _raise_unknown_fields(self, field, value): + raise TypeError("validation error: unknown field " + "({}={})".format(field, value)) + + def _raise_invalid_fields(self, field, value, reason): + raise TypeError("validation error: {} " + "({}={})".format(reason, field, value)) + + def _raise_invalid_message(self, reason): + raise TypeError("validation error: {}".format(reason)) + + +class MessageBase(Mapping, MessageValidator): + typename = None + + def __init__(self, *args, **kwargs): + assert not (args and kwargs), '*args, **kwargs cannot be used together' + if args: + input_as_dict = dict(zip(map(itemgetter(0), self.schema), args)) + else: + input_as_dict = kwargs + # remove op field before the validation procedure + input_as_dict.pop(OP_FIELD_NAME, None) + self.validate(input_as_dict) + self._fields = [(k, input_as_dict[k]) for k, _ in self.schema if k in input_as_dict] + + def __getattr__(self, item): + for k, v in self._fields: + if item == k: + return v + raise AttributeError + + def __getitem__(self, key): + if isinstance(key, slice): + r = range(key.start or 0, min([len(self), key.stop or len(self)]), key.step or 1) + return [self._fields[i][1] for i in r] + elif isinstance(key, int): + return self._fields[key][1] + else: + raise TypeError("Invalid argument type.") + + def _asdict(self): + """ + Legacy form TaggedTuple + """ + return self.__dict__ + + @property + def __dict__(self): + """ + Return a dictionary form. + """ + return dict(self._fields + [(OP_FIELD_NAME, self.typename)]) + + @property + def __name__(self): + return self.typename + + def __iter__(self): + for k, v in self._fields: + yield v + + def __len__(self): + return len(self._fields) diff --git a/plenum/common/request.py b/plenum/common/request.py index b20d764180..cfbce0e1bf 100644 --- a/plenum/common/request.py +++ b/plenum/common/request.py @@ -5,7 +5,7 @@ from plenum.common.signing import serializeMsg from plenum.common.constants import REQDIGEST -from plenum.common.types import f, OPERATION +from plenum.common.types import f, OPERATION, ClientMessageValidator class Request: @@ -67,6 +67,9 @@ def fromState(cls, state): cls.__setstate__(obj, state) return obj + def serialized(self): + return serializeMsg(self.__getstate__()) + class ReqDigest(NamedTuple(REQDIGEST, [f.IDENTIFIER, f.REQ_ID, @@ -74,3 +77,10 @@ class ReqDigest(NamedTuple(REQDIGEST, [f.IDENTIFIER, @property def key(self): return self.identifier, self.reqId + + +class SafeRequest(Request, ClientMessageValidator): + + def __init__(self, **kwargs): + self.validate(kwargs) + super().__init__(**kwargs) diff --git a/plenum/common/script_helper.py b/plenum/common/script_helper.py index 242c8c3810..a16fcffc6a 100644 --- a/plenum/common/script_helper.py +++ b/plenum/common/script_helper.py @@ -9,14 +9,12 @@ from plenum.client.client import Client from plenum.client.wallet import Wallet -from plenum.common import util from plenum.common.transactions import PlenumTransactions from plenum.common.roles import Roles from plenum.common.signer_simple import SimpleSigner from plenum.common.constants import TXN_TYPE, TARGET_NYM, DATA, NODE_IP, \ - NODE_PORT, CLIENT_IP, CLIENT_PORT, ALIAS, NODE, CLIENT_STACK_SUFFIX + NODE_PORT, CLIENT_IP, CLIENT_PORT, ALIAS, NODE, CLIENT_STACK_SUFFIX, SERVICES, VALIDATOR from plenum.test import waits -from plenum.test.test_node import getAllReplicas NodeInfoFile = "node-info" GenTxnFile = "genesis_txn" @@ -228,7 +226,8 @@ def submitNodeIpChange(client, stewardWallet, name: str, nym: str, NODE_PORT: int(nodePort), CLIENT_IP: clientIp, CLIENT_PORT: int(clientPort), - ALIAS: name + ALIAS: name, + SERVICES: [VALIDATOR], } } signedOp = stewardWallet.signOp(txn, stewardWallet.defaultId) @@ -255,7 +254,7 @@ def changeHA(looper, config, nodeName, nodeSeed, newNodeHA, client = Client(stewardName, ha=('0.0.0.0', randomClientPort), config=config) looper.add(client) - timeout = waits.expectedClientConnectionTimeout(3) + timeout = waits.expectedClientToPoolConnectionTimeout(4) looper.run(eventually(__checkClientConnected, client, retryWait=1, timeout=timeout)) diff --git a/plenum/common/signer_did.py b/plenum/common/signer_did.py index 7c27925375..52ae5450da 100644 --- a/plenum/common/signer_did.py +++ b/plenum/common/signer_did.py @@ -9,7 +9,7 @@ from plenum.common.signing import serializeMsg from stp_core.types import Identifier from plenum.common.util import rawToFriendly, friendlyToRaw - +from plenum.common.types import f class DidIdentity: def __init__(self, identifier, verkey=None, rawVerkey=None): @@ -85,7 +85,7 @@ def sign(self, msg: Dict) -> Dict: """ Return a signature for the given message. """ - ser = serializeMsg(msg) + ser = serializeMsg(msg, topLevelKeysToIgnore=[f.SIG.nm]) bsig = self.naclSigner.signature(ser) sig = base58.b58encode(bsig) return sig diff --git a/plenum/common/signer_simple.py b/plenum/common/signer_simple.py index 6c3a15fe90..0a58752d42 100644 --- a/plenum/common/signer_simple.py +++ b/plenum/common/signer_simple.py @@ -7,6 +7,7 @@ from stp_core.crypto.signer import Signer from plenum.common.signing import serializeMsg +from plenum.common.types import f from plenum.common.util import hexToFriendly @@ -64,7 +65,7 @@ def sign(self, msg: Dict) -> Dict: """ Return a signature for the given message. """ - ser = serializeMsg(msg) + ser = serializeMsg(msg, topLevelKeysToIgnore=[f.SIG.nm]) bsig = self.naclSigner.signature(ser) sig = base58.b58encode(bsig) return sig diff --git a/plenum/common/signing.py b/plenum/common/signing.py index bd10cf14b1..9ba7c71e9e 100644 --- a/plenum/common/signing.py +++ b/plenum/common/signing.py @@ -27,13 +27,14 @@ from plenum.common.types import f from plenum.common.error import error + logger = getlogger() # by allowing only primitives, it ensures we're signing the whole message acceptableTypes = (str, int, float, list, dict, type(None)) -def serialize(obj, level=0, objname=None): +def serialize(obj, level=0, objname=None, topLevelKeysToIgnore=None): """ Create a string representation of the given object. @@ -49,7 +50,10 @@ def serialize(obj, level=0, objname=None): '1:a|2:b|3:1,2:k' :param obj: the object to serlize - :param level: a parameter used internally for recursion to serialize nested data structures + :param level: a parameter used internally for recursion to serialize nested + data structures + :param topLevelKeysToIgnore: the list of top level keys to ignore for + serialization :return: a string representation of `obj` """ if not isinstance(obj, acceptableTypes): @@ -57,7 +61,11 @@ def serialize(obj, level=0, objname=None): if isinstance(obj, str): return obj if isinstance(obj, dict): - keys = [k for k in obj.keys() if level > 0 or k != f.SIG.nm] # remove signature if top level + if level > 0: + keys = list(obj.keys()) + else: + topLevelKeysToIgnore = topLevelKeysToIgnore or [] + keys = [k for k in obj.keys() if k not in topLevelKeysToIgnore] keys.sort() strs = [] for k in keys: @@ -73,15 +81,19 @@ def serialize(obj, level=0, objname=None): return "" else: return str(obj) + # topLevelKeysToIgnore = topLevelKeysToIgnore or [] + # return ujson.dumps({k:obj[k] for k in obj.keys() if k not in topLevelKeysToIgnore}, sort_keys=True) -def serializeMsg(msg: Mapping): +def serializeMsg(msg: Mapping, topLevelKeysToIgnore=None): """ Serialize a message for signing. :param msg: the message to sign + :param topLevelKeysToIgnore: the top level keys of the Mapping that should + not be included in the serialized form :return: a uft-8 encoded version of `msg` """ - ser = serialize(msg) + ser = serialize(msg, topLevelKeysToIgnore=topLevelKeysToIgnore) logger.trace("serialized msg {} into {}".format(msg, ser)) return ser.encode('utf-8') diff --git a/plenum/common/stack_manager.py b/plenum/common/stack_manager.py index a54de57d2b..67272b5d75 100644 --- a/plenum/common/stack_manager.py +++ b/plenum/common/stack_manager.py @@ -1,19 +1,20 @@ import os import shutil -from abc import abstractproperty +from abc import abstractmethod from collections import OrderedDict from plenum.common.keygen_utils import initRemoteKeys from stp_core.types import HA from stp_core.network.exceptions import RemoteNotFound - +from stp_core.common.log import getlogger from ledger.compact_merkle_tree import CompactMerkleTree -from ledger.ledger import Ledger from ledger.stores.file_hash_store import FileHashStore + from plenum.common.constants import DATA, ALIAS, TARGET_NYM, NODE_IP, CLIENT_IP, \ CLIENT_PORT, NODE_PORT, VERKEY, TXN_TYPE, NODE, SERVICES, VALIDATOR, CLIENT_STACK_SUFFIX from plenum.common.util import cryptonymToHex, updateNestedDict -from stp_core.common.log import getlogger +from plenum.common.ledger import Ledger + logger = getlogger() @@ -24,15 +25,18 @@ def __init__(self, name, basedirpath, isNode=True): self.isNode = isNode self.hashStore = None - @abstractproperty + @property + @abstractmethod def hasLedger(self) -> bool: raise NotImplementedError - @abstractproperty + @property + @abstractmethod def ledgerLocation(self) -> str: raise NotImplementedError - @abstractproperty + @property + @abstractmethod def ledgerFile(self) -> str: raise NotImplementedError @@ -40,21 +44,21 @@ def ledgerFile(self) -> str: @property def ledger(self): if self._ledger is None: - if not self.hasLedger: - defaultTxnFile = os.path.join(self.basedirpath, - self.ledgerFile) - if not os.path.isfile(defaultTxnFile): - raise FileNotFoundError("Pool transactions file not " - "found: {}".format(defaultTxnFile)) - else: - shutil.copy(defaultTxnFile, self.ledgerLocation) + defaultTxnFile = os.path.join(self.basedirpath, + self.ledgerFile) + if not os.path.exists(defaultTxnFile): + logger.debug("Not using default initialization file for " + "pool ledger, since it does not exist: {}" + .format(defaultTxnFile)) + defaultTxnFile = None dataDir = self.ledgerLocation self.hashStore = FileHashStore(dataDir=dataDir) self._ledger = Ledger(CompactMerkleTree(hashStore=self.hashStore), - dataDir=dataDir, - fileName=self.ledgerFile, - ensureDurability=self.config.EnsureLedgerDurability) + dataDir=dataDir, + fileName=self.ledgerFile, + ensureDurability=self.config.EnsureLedgerDurability, + defaultFile=defaultTxnFile) return self._ledger @staticmethod @@ -146,6 +150,7 @@ def stackHaChanged(self, txn, remoteName, nodeOrClientObj): else: nodeOrClientObj.nodeReg[remoteName] = HA(*cliHa) + # Attempt connection at the new HA nodeOrClientObj.nodestack.maintainConnections(force=True) return rid @@ -167,6 +172,7 @@ def stackKeysChanged(self, txn, remoteName, nodeOrClientObj): logger.error("Exception while initializing keep for remote {}". format(ex)) + # Attempt connection with the new keys nodeOrClientObj.nodestack.maintainConnections(force=True) return rid diff --git a/plenum/common/stacks.py b/plenum/common/stacks.py index b6d0ac7e07..76fea65e2a 100644 --- a/plenum/common/stacks.py +++ b/plenum/common/stacks.py @@ -66,6 +66,9 @@ def __init__(self, stackParams: dict, msgHandler: Callable, # TODO: Reconsider defaulting `reSetupAuth` to True. def start(self, restricted=None, reSetupAuth=True): KITZStack.start(self, restricted=restricted, reSetupAuth=reSetupAuth) + # Calling service lifecycle to allow creation of remotes + # that this stack needs to connect to + # self.serviceLifecycle() logger.info("{} listening for other nodes at {}:{}". format(self, *self.ha), extra={"tags": ["node-listening"]}) @@ -132,7 +135,5 @@ def start(self): extra={"tags": ["node-listening"]}) - - nodeStackClass = NodeZStack if config.UseZStack else NodeRStack clientStackClass = ClientZStack if config.UseZStack else ClientRStack diff --git a/plenum/common/startable.py b/plenum/common/startable.py index 1d0408e027..912cc7077f 100644 --- a/plenum/common/startable.py +++ b/plenum/common/startable.py @@ -53,10 +53,3 @@ class Mode(IntEnum): discovered = 3 # caught up with pool txn ledger syncing = 4 # catching up on domain txn ledger participating = 5 # caught up with domain txn ledger - - -@unique -class LedgerState(IntEnum): - not_synced = 1 - syncing = 2 - synced = 3 diff --git a/plenum/common/test_network_setup.py b/plenum/common/test_network_setup.py index 9ddb86e938..438748db73 100644 --- a/plenum/common/test_network_setup.py +++ b/plenum/common/test_network_setup.py @@ -1,4 +1,5 @@ import argparse +import ipaddress import os from collections import namedtuple @@ -159,9 +160,10 @@ def bootstrapTestNodes(cls, config, startingPort, nodeParamsFileName, domainTxnF parser = argparse.ArgumentParser( description="Generate pool transactions for testing") - parser.add_argument('--nodes', required=True, type=int, - help='node count, ' - 'should be less than 100') + parser.add_argument('--nodes', required=True, + help='node count should be less than 100', + type=cls._bootstrapArgsTypeNodeCount, + ) parser.add_argument('--clients', required=True, type=int, help='client count') parser.add_argument('--nodeNum', type=int, @@ -173,11 +175,12 @@ def bootstrapTestNodes(cls, config, startingPort, nodeParamsFileName, domainTxnF 'number of nodes then the ' 'remaining nodes are assigned the loopback ' 'IP, i.e 127.0.0.1', - type=str) + type=cls._bootstrapArgsTypeIps) parser.add_argument('--envName', help='Environment name (test or live)', type=str, + choices=('test', 'live'), default="test", required=False) @@ -187,30 +190,49 @@ def bootstrapTestNodes(cls, config, startingPort, nodeParamsFileName, domainTxnF action='store_true') args = parser.parse_args() - if args.nodes > 100: - print("Cannot run {} nodes for testing purposes as of now. " - "This is not a problem with the protocol but some placeholder" - " rules we put in place which will be replaced by our " - "Governance model. Going to run only 100".format(args.nodes)) - nodeCount = 100 - else: - nodeCount = args.nodes - clientCount = args.clients - nodeNum = args.nodeNum - ips = args.ips - envName = args.envName - appendToLedgers = args.appendToLedgers - if nodeNum: - assert nodeNum <= nodeCount, "nodeNum should be less than equal " \ - "to nodeCount" - - steward_defs, node_defs = cls.gen_defs(ips, nodeCount, startingPort) - client_defs = cls.gen_client_defs(clientCount) + + if args.nodeNum: + assert 0 <= args.nodeNum <= args.nodes, \ + "nodeNum should be less ore equal to nodeCount" + + steward_defs, node_defs = cls.gen_defs(args.ips, args.nodes, startingPort) + client_defs = cls.gen_client_defs(args.clients) trustee_def = cls.gen_trustee_def(1) - cls.bootstrapTestNodesCore(config, envName, appendToLedgers, + cls.bootstrapTestNodesCore(config, args.envName, args.appendToLedgers, domainTxnFieldOrder, trustee_def, steward_defs, node_defs, client_defs, - nodeNum, nodeParamsFileName) + args.nodeNum, nodeParamsFileName) + + @staticmethod + def _bootstrapArgsTypeNodeCount(nodesStrArg): + if not nodesStrArg.isdigit(): + raise argparse.ArgumentTypeError('should be a number') + n = int(nodesStrArg) + if n > 100: + raise argparse.ArgumentTypeError( + "Cannot run {} nodes for testing purposes as of now. " + "This is not a problem with the protocol but some placeholder " + "rules we put in place which will be replaced by our " + "Governance model. Going to run only 100".format(n) + ) + if n <= 0: + raise argparse.ArgumentTypeError('should be > 0') + return n + + @staticmethod + def _bootstrapArgsTypeIps(ipsStrArg): + ips = [] + for ip in ipsStrArg.split(','): + ip = ip.strip() + try: + ipaddress.ip_address(ip) + except ValueError: + raise argparse.ArgumentTypeError( + "'{}' is an invalid IP address".format(ip) + ) + else: + ips.append(ip) + return ips @classmethod def gen_defs(cls, ips, nodeCount, starting_port): @@ -224,7 +246,6 @@ def gen_defs(cls, ips, nodeCount, starting_port): if not ips: ips = ['127.0.0.1'] * nodeCount else: - ips = ips.split(",") if len(ips) != nodeCount: if len(ips) > nodeCount: ips = ips[:nodeCount] diff --git a/plenum/common/txn_util.py b/plenum/common/txn_util.py index c83adf97c9..8f1e12138d 100644 --- a/plenum/common/txn_util.py +++ b/plenum/common/txn_util.py @@ -1,18 +1,12 @@ -import os from collections import OrderedDict -import portalocker - -from ledger.stores.file_hash_store import FileHashStore - -from ledger.util import F - from ledger.compact_merkle_tree import CompactMerkleTree from ledger.ledger import Ledger from ledger.serializers.compact_serializer import CompactSerializer -from plenum.common.constants import TXN_ID, TXN_TIME, TXN_TYPE, TARGET_NYM, ROLE, \ - ALIAS, VERKEY, TYPE, IDENTIFIER, DATA -from plenum.common.types import f +from plenum.common.constants import TXN_TIME, TXN_TYPE, TARGET_NYM, ROLE, \ + ALIAS, VERKEY +from plenum.common.types import f, OPERATION +from plenum.common.request import Request from stp_core.common.log import getlogger @@ -23,8 +17,8 @@ def getTxnOrderedFields(): return OrderedDict([ (f.IDENTIFIER.nm, (str, str)), (f.REQ_ID.nm, (str, int)), - (TXN_ID, (str, str)), - (TXN_TIME, (str, int)), + (f.SIG.nm, (str, str)), + (TXN_TIME, (str, float)), (TXN_TYPE, (str, str)), (TARGET_NYM, (str, str)), (VERKEY, (str, str)), @@ -56,36 +50,18 @@ def createGenesisTxnFile(genesisTxns, targetDir, fileName, fieldOrdering, ledger.stop() -def updateGenesisPoolTxnFile(genesisTxnDir, genesisTxnFile, txn): - # The lock is an advisory lock, it might not work on linux filesystems - # not mounted with option `-o mand`, another approach can be to use a .lock - # file to indicate presence or absence of .lock - genesisFilePath = os.path.join(genesisTxnDir, genesisTxnFile) - try: - # Exclusively lock file in a non blocking manner. Locking is neccessary - # since there might be multiple clients running on a machine so genesis - # files should be updated safely. - # TODO: There is no automated test in the codebase that confirms it. - # It has only been manaully tested in the python terminal. Add a test - # for it using multiple processes writing concurrently - with portalocker.Lock(genesisFilePath, - truncate=None, - flags=portalocker.LOCK_EX | portalocker.LOCK_NB): - seqNo = txn[F.seqNo.name] - fileHashStore = FileHashStore(dataDir=genesisTxnDir) - ledger = Ledger(CompactMerkleTree(hashStore=fileHashStore), - dataDir=genesisTxnDir, fileName=genesisTxnFile) - ledgerSize = len(ledger) - if seqNo - ledgerSize == 1: - ledger.add({k:v for k,v in txn.items() if k != F.seqNo.name}) - logger.debug('Adding transaction with sequence number {} in' - ' genesis pool transaction file'.format(seqNo)) - else: - logger.debug('Already {} genesis pool transactions present so ' - 'transaction with sequence number {} ' - 'not applicable'.format(ledgerSize, seqNo)) - ledger.stop() - fileHashStore.close() - except portalocker.LockException as ex: - logger.error("error occurred during locking file {}: {}". - format(genesisFilePath, str(ex))) +def reqToTxn(req: Request): + """ + Transform a client request such that it can be stored in the ledger. + Also this is what will be returned to the client in the reply + :param req: + :return: + """ + data = req.signingState + res = { + f.IDENTIFIER.nm: req.identifier, + f.REQ_ID.nm: req.reqId, + f.SIG.nm: req.signature + } + res.update(data[OPERATION]) + return res diff --git a/plenum/common/types.py b/plenum/common/types.py index abfe7bb18f..733e13d1ac 100644 --- a/plenum/common/types.py +++ b/plenum/common/types.py @@ -1,13 +1,18 @@ +from typing import NamedTuple, Any, List, Mapping, Optional, TypeVar, Dict, \ + Tuple + import sys from collections import namedtuple -from typing import NamedTuple, Any, List, Mapping, Optional, TypeVar, Dict -from plenum.common.constants import NOMINATE, PRIMARY, REELECTION, REQACK,\ +from plenum.common.constants import NOMINATE, PRIMARY, REELECTION, REQACK, \ ORDERED, PROPAGATE, PREPREPARE, REPLY, COMMIT, PREPARE, BATCH, \ INSTANCE_CHANGE, BLACKLIST, REQNACK, LEDGER_STATUS, CONSISTENCY_PROOF, \ CATCHUP_REQ, CATCHUP_REP, POOL_LEDGER_TXNS, CONS_PROOF_REQUEST, CHECKPOINT, \ - CHECKPOINT_STATE, THREE_PC_STATE, OP_FIELD_NAME - + CHECKPOINT_STATE, THREE_PC_STATE, REJECT, OP_FIELD_NAME, POOL_LEDGER_ID, DOMAIN_LEDGER_ID +from plenum.common.messages.client_request import ClientOperationField +from plenum.common.messages.fields import * +from plenum.common.messages.fields import IdentifierField, NonNegativeNumberField, SignatureField +from plenum.common.messages.message_base import MessageBase, MessageValidator from stp_core.types import HA NodeDetail = NamedTuple("NodeDetail", [ @@ -15,7 +20,6 @@ ("cliname", str), ("cliha", HA)]) - Field = namedtuple("Field", ["nm", "tp"]) @@ -32,6 +36,8 @@ class f: # provides a namespace for reusable field constants RECEIVED_DIGESTS = Field('receivedDigests', Dict[str, str]) SEQ_NO = Field('seqNo', int) PP_SEQ_NO = Field('ppSeqNo', int) # Pre-Prepare sequence number + ORD_SEQ_NO = Field('ordSeqNo', int) # Last PP_SEQ_NO that was ordered + ORD_SEQ_NOS = Field('ordSeqNos', List[int]) # Last ordered seq no of each protocol instance, sent during view change RESULT = Field('result', Any) SENDER_NODE = Field('senderNode', str) REQ_ID = Field('reqId', int) @@ -46,12 +52,16 @@ class f: # provides a namespace for reusable field constants REASON = Field('reason', Any) SENDER_CLIENT = Field('senderClient', str) PP_TIME = Field("ppTime", float) + REQ_IDR = Field("reqIdr", List[Tuple[str, int]]) + DISCARDED = Field("discarded", int) + STATE_ROOT = Field("stateRootHash", str) + TXN_ROOT = Field("txnRootHash", str) MERKLE_ROOT = Field("merkleRoot", str) OLD_MERKLE_ROOT = Field("oldMerkleRoot", str) NEW_MERKLE_ROOT = Field("newMerkleRoot", str) TXN_SEQ_NO = Field("txnSeqNo", int) # 0 for pool transaction ledger, 1 for domain transaction ledger - LEDGER_TYPE = Field("ledgerType", int) + LEDGER_ID = Field("ledgerId", int) SEQ_NO_START = Field("seqNoStart", int) SEQ_NO_END = Field("seqNoEnd", int) CATCHUP_TILL = Field("catchupTill", int) @@ -87,7 +97,7 @@ def melted(self): # noinspection PyProtectedMember -def TaggedTuple(typename, fields): +def TaggedTuple(typename, fields) -> NamedTuple: cls = NamedTuple(typename, fields) if OP_FIELD_NAME in cls._fields: raise RuntimeError("field name '{}' is reserved in TaggedTuple" @@ -96,10 +106,34 @@ def TaggedTuple(typename, fields): cls.typename = typename return cls +OPERATION = 'operation' + + +class ClientMessageValidator(MessageValidator): + schema = ( + (f.IDENTIFIER.nm, IdentifierField()), + (f.REQ_ID.nm, NonNegativeNumberField()), + (OPERATION, ClientOperationField()), + (f.SIG.nm, SignatureField(optional=True)), + (f.DIGEST.nm, NonEmptyStringField(optional=True)), + ) + + +# class Nomination(MessageBase): +# typename = NOMINATE +# +# schema = ( +# (f.NAME.nm, NonEmptyStringField()), +# (f.INST_ID.nm, NonNegativeNumberField()), +# (f.VIEW_NO.nm, NonNegativeNumberField()), +# (f.ORD_SEQ_NO.nm, NonNegativeNumberField()), +# ) Nomination = TaggedTuple(NOMINATE, [ f.NAME, f.INST_ID, - f.VIEW_NO]) + f.VIEW_NO, + f.ORD_SEQ_NO]) + Batch = TaggedTuple(BATCH, [ f.MSGS, @@ -111,6 +145,17 @@ def TaggedTuple(typename, fields): # that would have round number 2. If a node receives a reelection message with # a round number that is not 1 greater than the reelections rounds it has # already seen then it rejects that message + + +# class Reelection(MessageBase): +# typename = REELECTION +# +# schema = ( +# (f.INST_ID.nm, NonNegativeNumberField()), +# (f.ROUND.nm, NonNegativeNumberField()), +# (f.TIE_AMONG.nm, IterableField(TieAmongField())), +# (f.VIEW_NO.nm, NonNegativeNumberField()), +# ) Reelection = TaggedTuple(REELECTION, [ f.INST_ID, f.ROUND, @@ -118,18 +163,26 @@ def TaggedTuple(typename, fields): f.VIEW_NO]) # Declaration of a winner + +# class Primary(MessageBase): +# typename = PRIMARY +# +# schema = ( +# (f.NAME.nm, NonEmptyStringField()), +# (f.INST_ID.nm, NonNegativeNumberField()), +# (f.VIEW_NO.nm, NonNegativeNumberField()), +# (f.ORD_SEQ_NO.nm, NonNegativeNumberField()), +# ) Primary = TaggedTuple(PRIMARY, [ f.NAME, f.INST_ID, - f.VIEW_NO]) + f.VIEW_NO, + f.ORD_SEQ_NO]) BlacklistMsg = NamedTuple(BLACKLIST, [ f.SUSP_CODE, f.NODE_NAME]) - -OPERATION = 'operation' - RequestAck = TaggedTuple(REQACK, [ f.IDENTIFIER, f.REQ_ID]) @@ -139,16 +192,38 @@ def TaggedTuple(typename, fields): f.REQ_ID, f.REASON]) +Reject = TaggedTuple(REJECT, [ + f.IDENTIFIER, + f.REQ_ID, + f.REASON]) + PoolLedgerTxns = TaggedTuple(POOL_LEDGER_TXNS, [ f.TXN ]) + +# class Ordered(MessageBase): +# typename = ORDERED +# schema = ( +# (f.INST_ID.nm, NonNegativeNumberField()), +# (f.VIEW_NO.nm, NonNegativeNumberField()), +# (f.REQ_IDR.nm, IterableField(RequestIdentifierField())), +# (f.PP_SEQ_NO.nm, NonNegativeNumberField()), +# (f.PP_TIME.nm, TimestampField()), +# (f.LEDGER_ID.nm, LedgerIdField()), +# (f.STATE_ROOT.nm, MerkleRootField()), +# (f.TXN_ROOT.nm, MerkleRootField()), +# ) Ordered = NamedTuple(ORDERED, [ f.INST_ID, f.VIEW_NO, - f.IDENTIFIER, - f.REQ_ID, - f.PP_TIME]) + f.REQ_IDR, + f.PP_SEQ_NO, + f.PP_TIME, + f.LEDGER_ID, + f.STATE_ROOT, + f.TXN_ROOT, + ]) # σc, i>~μi # s = client sequence number (comes from Aardvark paper) @@ -156,34 +231,89 @@ def TaggedTuple(typename, fields): # Propagate needs the name of the sender client since every node needs to know # who sent the request to send the reply. If all clients had name same as # their identifier same as client name (stack name, the name which RAET knows) + + +# class Propagate(MessageBase): +# typename = PROPAGATE +# schema = ( +# (f.REQUEST.nm, ClientMessageValidator()), +# (f.SENDER_CLIENT.nm, NonEmptyStringField()), +# ) Propagate = TaggedTuple(PROPAGATE, [ f.REQUEST, f.SENDER_CLIENT]) + +# class PrePrepare(MessageBase): +# typename = PREPREPARE +# schema = ( +# (f.INST_ID.nm, NonNegativeNumberField()), +# (f.VIEW_NO.nm, NonNegativeNumberField()), +# (f.PP_SEQ_NO.nm, NonNegativeNumberField()), +# (f.PP_TIME.nm, TimestampField()), +# (f.REQ_IDR.nm, IterableField(RequestIdentifierField())), +# (f.DISCARDED.nm, NonNegativeNumberField()), +# (f.DIGEST.nm, NonEmptyStringField()), +# (f.LEDGER_ID.nm, LedgerIdField()), +# (f.STATE_ROOT.nm, MerkleRootField()), +# (f.TXN_ROOT.nm, MerkleRootField()), +# ) PrePrepare = TaggedTuple(PREPREPARE, [ f.INST_ID, f.VIEW_NO, f.PP_SEQ_NO, - f.IDENTIFIER, - f.REQ_ID, + f.PP_TIME, + f.REQ_IDR, + f.DISCARDED, f.DIGEST, - f.PP_TIME + f.LEDGER_ID, + f.STATE_ROOT, + f.TXN_ROOT, ]) + +# class Prepare(MessageBase): +# typename = PREPARE +# schema = ( +# (f.INST_ID.nm, NonNegativeNumberField()), +# (f.VIEW_NO.nm, NonNegativeNumberField()), +# (f.PP_SEQ_NO.nm, NonNegativeNumberField()), +# (f.DIGEST.nm, NonEmptyStringField()), +# (f.STATE_ROOT.nm, HexField(length=64, nullable=True)), +# (f.TXN_ROOT.nm, HexField(length=64, nullable=True)), +# ) Prepare = TaggedTuple(PREPARE, [ f.INST_ID, f.VIEW_NO, f.PP_SEQ_NO, f.DIGEST, - f.PP_TIME]) + f.STATE_ROOT, + f.TXN_ROOT, + ]) + +# class Commit(MessageBase): +# typename = COMMIT +# schema = ( +# (f.INST_ID.nm, NonNegativeNumberField()), +# (f.VIEW_NO.nm, NonNegativeNumberField()), +# (f.PP_SEQ_NO.nm, NonNegativeNumberField()), +# ) Commit = TaggedTuple(COMMIT, [ f.INST_ID, f.VIEW_NO, - f.PP_SEQ_NO, - f.DIGEST, - f.PP_TIME]) + f.PP_SEQ_NO + ]) +# class Checkpoint(MessageBase): +# typename = CHECKPOINT +# schema = ( +# (f.INST_ID.nm, NonNegativeNumberField()), +# (f.VIEW_NO.nm, NonNegativeNumberField()), +# (f.SEQ_NO_START.nm, NonNegativeNumberField()), +# (f.SEQ_NO_END.nm, NonNegativeNumberField()), +# (f.DIGEST.nm, NonEmptyStringField()), +# ) Checkpoint = TaggedTuple(CHECKPOINT, [ f.INST_ID, f.VIEW_NO, @@ -191,6 +321,7 @@ def TaggedTuple(typename, fields): f.SEQ_NO_END, f.DIGEST]) + CheckpointState = NamedTuple(CHECKPOINT_STATE, [ f.SEQ_NO, # Current ppSeqNo in the checkpoint f.DIGESTS, # Digest of all the requests in the checkpoint @@ -201,26 +332,61 @@ def TaggedTuple(typename, fields): ]) +# class ThreePCState(MessageBase): +# typename = THREE_PC_STATE +# schema = ( +# (f.INST_ID.nm, NonNegativeNumberField()), +# (f.MSGS.nm, IterableField(ClientMessageValidator())), +# ) ThreePCState = TaggedTuple(THREE_PC_STATE, [ f.INST_ID, f.MSGS]) - Reply = TaggedTuple(REPLY, [f.RESULT]) +# class InstanceChange(MessageBase): +# typename = INSTANCE_CHANGE +# schema = ( +# (f.VIEW_NO.nm, NonNegativeNumberField()), +# (f.REASON.nm, NonNegativeNumberField()), +# (f.ORD_SEQ_NOS.nm, IterableField(NonNegativeNumberField())), +# ) InstanceChange = TaggedTuple(INSTANCE_CHANGE, [ f.VIEW_NO, + f.REASON, + f.ORD_SEQ_NOS, ]) + +# class LedgerStatus(MessageBase): +# typename = LEDGER_STATUS +# schema = ( +# (f.LEDGER_ID.nm, LedgerIdField()), +# (f.TXN_SEQ_NO.nm, NonNegativeNumberField()), +# (f.MERKLE_ROOT.nm, MerkleRootField()), +# ) LedgerStatus = TaggedTuple(LEDGER_STATUS, [ - f.LEDGER_TYPE, + f.LEDGER_ID, f.TXN_SEQ_NO, f.MERKLE_ROOT]) + +# class ConsistencyProof(MessageBase): +# typename = CONSISTENCY_PROOF +# schema = ( +# (f.LEDGER_ID.nm, LedgerIdField()), +# (f.SEQ_NO_START.nm, NonNegativeNumberField()), +# (f.SEQ_NO_END.nm, NonNegativeNumberField()), +# (f.PP_SEQ_NO.nm, NonNegativeNumberField()), +# (f.OLD_MERKLE_ROOT.nm, MerkleRootField()), +# (f.NEW_MERKLE_ROOT.nm, MerkleRootField()), +# (f.HASHES.nm, IterableField(NonEmptyStringField())), +# ) ConsistencyProof = TaggedTuple(CONSISTENCY_PROOF, [ - f.LEDGER_TYPE, + f.LEDGER_ID, f.SEQ_NO_START, f.SEQ_NO_END, + f.PP_SEQ_NO, f.OLD_MERKLE_ROOT, f.NEW_MERKLE_ROOT, f.HASHES @@ -229,22 +395,44 @@ def TaggedTuple(typename, fields): # TODO: Catchup is not a good name, replace it with `sync` or something which # is familiar +# class CatchupReq(MessageBase): +# typename = CATCHUP_REQ +# schema = ( +# (f.LEDGER_ID.nm, LedgerIdField()), +# (f.SEQ_NO_START.nm, NonNegativeNumberField()), +# (f.SEQ_NO_END.nm, NonNegativeNumberField()), +# (f.CATCHUP_TILL.nm, NonNegativeNumberField()), +# ) CatchupReq = TaggedTuple(CATCHUP_REQ, [ - f.LEDGER_TYPE, + f.LEDGER_ID, f.SEQ_NO_START, f.SEQ_NO_END, f.CATCHUP_TILL ]) + +# class CatchupRep(MessageBase): +# typename = CATCHUP_REQ +# schema = ( +# (f.LEDGER_ID.nm, LedgerIdField()), +# (f.TXNS.nm, IterableField(ClientMessageValidator())), +# (f.CONS_PROOF.nm, IterableField(HexField(length=64))), +# ) CatchupRep = TaggedTuple(CATCHUP_REP, [ - f.LEDGER_TYPE, + f.LEDGER_ID, f.TXNS, f.CONS_PROOF ]) - +# class ConsProofRequest(MessageBase): +# typename = CONS_PROOF_REQUEST +# schema = ( +# (f.LEDGER_ID.nm, LedgerIdField()), +# (f.SEQ_NO_START.nm, NonNegativeNumberField()), +# (f.SEQ_NO_END.nm, NonNegativeNumberField()), +# ) ConsProofRequest = TaggedTuple(CONS_PROOF_REQUEST, [ - f.LEDGER_TYPE, + f.LEDGER_ID, f.SEQ_NO_START, f.SEQ_NO_END ]) @@ -261,6 +449,13 @@ def loadRegistry(): for x in dir(this) if callable(getattr(getattr(this, x), "melted", None)) and getattr(getattr(this, x), "_fields", None)} + # attach MessageBase, for pre-testing procedure + # TODO: add MessageBase classes another way + TaggedTuples.update( + {getattr(this, x).typename: getattr(this, x) + for x in dir(this) + if getattr(getattr(this, x), "schema", None) and issubclass(getattr(this, x), MessageBase)} + ) loadRegistry() diff --git a/plenum/common/util.py b/plenum/common/util.py index 906d9eaa74..84368a8233 100644 --- a/plenum/common/util.py +++ b/plenum/common/util.py @@ -71,8 +71,8 @@ def updateNamedTuple(tupleToUpdate: NamedTuple, **kwargs): return tupleToUpdate.__class__(**tplData) -def objSearchReplace(obj: Any, toFrom: Dict[Any, Any], checked: Set[Any] = set(), - logMsg: str = None, deepLevel: int = None) -> None: +def objSearchReplace(obj: Any, toFrom: Dict[Any, Any], checked: Set[Any] = set() + , logMsg: str = None, deepLevel: int = None) -> None: """ Search for an attribute in an object and replace it with another. @@ -294,6 +294,17 @@ def hexToFriendly(hx): return rawToFriendly(raw) +def friendlyToHex(f): + if not isinstance(f, str): + f = f.decode('ascii') + raw = friendlyToRaw(f) + return hexlify(raw) + + +def friendlyToHexStr(f): + return friendlyToHex(f).decode() + + def rawToFriendly(raw): return base58.b58encode(raw) @@ -345,10 +356,26 @@ def updateFieldsWithSeqNo(fields): return r +def compareNamedTuple(tuple1: NamedTuple, tuple2: NamedTuple, *fields): + """ + Compare provided fields of 2 named tuples for equality and returns true + :param tuple1: + :param tuple2: + :param fields: + :return: + """ + tuple1 = tuple1._asdict() + tuple2 = tuple2._asdict() + comp = [] + for field in fields: + comp.append(tuple1[field] == tuple2[field]) + return all(comp) + + def bootstrapClientKeys(identifier, verkey, nodes): # bootstrap client verification key to all nodes for n in nodes: - n.clientAuthNr.addClient(identifier, verkey) + n.clientAuthNr.addIdr(identifier, verkey) def prettyDateDifference(startTime, finishTime=None): @@ -427,6 +454,11 @@ def isMaxCheckTimeExpired(startTime, maxCheckForMillis): return startTimeRounded + maxCheckForMillis < curTimeRounded +def randomSeed(size=32): + return ''.join(random.choice(string.hexdigits) + for _ in range(size)).encode() + + def lxor(a, b): # Logical xor of 2 items, return true when one of them is truthy and # one of them falsy diff --git a/plenum/config.py b/plenum/config.py index d66a99227e..cb816e44eb 100644 --- a/plenum/config.py +++ b/plenum/config.py @@ -1,13 +1,15 @@ + import os import sys from collections import OrderedDict import logging -from plenum.common.constants import ClientBootStrategy +from plenum.common.constants import ClientBootStrategy, HS_FILE, KeyValueStorageType from plenum.common.types import PLUGIN_TYPE_STATS_CONSUMER # Each entry in registry is (stack name, ((host, port), verkey, pubkey)) + nodeReg = OrderedDict([ ('Alpha', ('127.0.0.1', 9701)), ('Beta', ('127.0.0.1', 9703)), @@ -22,36 +24,33 @@ ('DeltaC', ('127.0.0.1', 9708)) ]) -baseDir = "~/.plenum/" - -keyringsDir = "keyrings" - -nodeDataDir = "data/nodes" +baseDir = '~/.plenum/' +keyringsDir = 'keyrings' +nodeDataDir = 'data/nodes' +clientDataDir = 'data/clients' +walletDir = 'wallet' -clientDataDir = "data/clients" +poolTransactionsFile = 'pool_transactions_sandbox' +domainTransactionsFile = 'transactions_sandbox' -domainTransactionsFile = "transactions_sandbox" +poolStateDbName = 'pool_state' +domainStateDbName = 'domain_state' -poolTransactionsFile = "pool_transactions_sandbox" - -walletDir = "wallet" +# There is only one seqNoDB as it maintain the mapping of +# request id to sequence numbers +seqNoDbName = 'seq_no_db' clientBootStrategy = ClientBootStrategy.PoolTxn hashStore = { - "type": "file" + "type": HS_FILE } primaryStorage = None -secondaryStorage = None - -OrientDB = { - "user": "root", - "password": "password", - "host": "127.0.0.1", - "port": 2424 -} +domainStateStorage = KeyValueStorageType.Leveldb +poolStateStorage = KeyValueStorageType.Leveldb +reqIdToTxnStorage = KeyValueStorageType.Leveldb DefaultPluginPath = { # PLUGIN_BASE_DIR_PATH: " Sequence[str]: @@ -51,9 +55,15 @@ def getAcks(self, identifier: str, reqId: int) -> dict: def getNacks(self, identifier: str, reqId: int) -> dict: pass + @abstractmethod + def getRejects(self, identifier: str, reqId: int) -> dict: + pass + def getAllReplies(self, identifier: str, reqId: int): replies = self.getReplies(identifier, reqId) errors = self.getNacks(identifier, reqId) + if not errors: + errors = {**errors, **self.getRejects(identifier, reqId)} return replies, errors @abstractproperty @@ -63,7 +73,4 @@ def txnFieldOrdering(self): # noinspection PyAttributeOutsideInit @property def txnSerializer(self): - # if not self._serializer: - # self._serializer = CompactSerializer(fields=self.txnFieldOrdering) - # return self._serializer return CompactSerializer(fields=self.txnFieldOrdering) diff --git a/plenum/persistence/client_req_rep_store_file.py b/plenum/persistence/client_req_rep_store_file.py index 34b34ad628..1d98638fd7 100644 --- a/plenum/persistence/client_req_rep_store_file.py +++ b/plenum/persistence/client_req_rep_store_file.py @@ -1,12 +1,11 @@ import json import os -from collections import OrderedDict, namedtuple -from typing import Any, Sequence, List, Dict +from collections import namedtuple +from typing import Any, List, Dict -from plenum.common.constants import REQACK, REQNACK, REPLY +from plenum.common.constants import REQACK, REQNACK, REPLY, REJECT from ledger.stores.directory_store import DirectoryStore -from ledger.util import F from plenum.common.has_file_storage import HasFileStorage from plenum.common.txn_util import getTxnOrderedFields from plenum.common.types import f @@ -16,7 +15,7 @@ class ClientReqRepStoreFile(ClientReqRepStore, HasFileStorage): - LinePrefixes = namedtuple('LP', ['Request', REQACK, REQNACK, REPLY]) + LinePrefixes = namedtuple('LP', ['Request', REQACK, REQNACK, REJECT, REPLY]) def __init__(self, name, baseDir): self.baseDir = baseDir @@ -28,8 +27,8 @@ def __init__(self, name, baseDir): os.makedirs(self.dataLocation) self.reqStore = DirectoryStore(self.dataLocation, "Requests") self._serializer = None + self.linePrefixes = self.LinePrefixes('0', 'A', 'N', 'J', 'R') self.delimiter = '~' - self.linePrefixes = self.LinePrefixes('0', 'A', 'N', 'R') @property def lastReqId(self) -> int: @@ -63,6 +62,16 @@ def addNack(self, msg: Any, sender: str): self.delimiter, sender, self.delimiter, reason)) + def addReject(self, msg: Any, sender: str): + idr = msg[f.IDENTIFIER.nm] + reqId = msg[f.REQ_ID.nm] + key = "{}{}".format(idr, reqId) + reason = msg[f.REASON.nm] + self.reqStore.appendToValue(key, "{}{}{}{}{}". + format(self.linePrefixes.REJECT, + self.delimiter, sender, + self.delimiter, reason)) + def addReply(self, identifier: str, reqId: int, sender: str, result: Any) -> int: serializedReply = self.txnSerializer.serialize(result, toBytes=False) @@ -105,6 +114,15 @@ def getNacks(self, identifier: str, reqId: int) -> dict: result[sender] = reason return result + def getRejects(self, identifier: str, reqId: int) -> dict: + nackLines = self._getLinesWithPrefix(identifier, reqId, "{}{}". + format(self.linePrefixes.REJECT, self.delimiter)) + result = {} + for line in nackLines: + sender, reason = line[2:].split(self.delimiter, 1) + result[sender] = reason + return result + @property def txnFieldOrdering(self): fields = getTxnOrderedFields() diff --git a/plenum/persistence/graph_store.py b/plenum/persistence/graph_store.py deleted file mode 100644 index 06f2e42952..0000000000 --- a/plenum/persistence/graph_store.py +++ /dev/null @@ -1,47 +0,0 @@ -from abc import ABC, abstractmethod - - -class GraphStore(ABC): - """ - Interface for graph databases to be used with the Plenum system. - """ - def __init__(self, store): - self.store = store - self.client = store.client - self.bootstrap() - - @abstractmethod - def bootstrap(self): - """ - Setup the database, create the schema etc. - """ - pass - - @abstractmethod - def addEdgeConstraint(self, edgeClass, iN=None, out=None): - """ - Create a constraint on an edge in the graph database - - :param edgeClass: name of the edge type - """ - pass - - @abstractmethod - def createVertex(self, name, **kwargs): - """ - Create a vertex/node in the graph - - :param name: name of the vertex - """ - pass - - @abstractmethod - def createEdge(self, name, frm, to, **kwargs): - """ - Create an edge in the graph - - :param name: name of the edge - :param frm: from vertex - :param to: to vertex - """ - pass diff --git a/plenum/persistence/leveldb_hash_store.py b/plenum/persistence/leveldb_hash_store.py new file mode 100644 index 0000000000..ec3ca68f9a --- /dev/null +++ b/plenum/persistence/leveldb_hash_store.py @@ -0,0 +1,93 @@ +import os + +from ledger.stores.hash_store import HashStore +from state.kv.kv_store_leveldb import KeyValueStorageLeveldb +from stp_core.common.log import getlogger + + +logger = getlogger() + + +class LevelDbHashStore(HashStore): + def __init__(self, dataDir): + self.dataDir = dataDir + self.nodesDbPath = os.path.join(self.dataDir, '_merkleNodes') + self.leavesDbPath = os.path.join(self.dataDir, '_merkleLeaves') + self.nodesDb = None + self.leavesDb = None + self.open() + + def writeLeaf(self, leafHash): + self.leavesDb.put(str(self.leafCount + 1), leafHash) + self.leafCount += 1 + + def writeNode(self, node): + start, height, nodeHash = node + seqNo = self.getNodePosition(start, height) + self.nodesDb.put(str(seqNo), nodeHash) + + def readLeaf(self, seqNo): + return self._readOne(seqNo, self.leavesDb) + + def readNode(self, seqNo): + return self._readOne(seqNo, self.nodesDb) + + def _readOne(self, pos, db): + self._validatePos(pos) + try: + # Converting any bytearray to bytes + return bytes(db.get(str(pos))) + except KeyError: + logger.error("{} does not have position {}".format(db, pos)) + + def readLeafs(self, start, end): + return self._readMultiple(start, end, self.leavesDb) + + def readNodes(self, start, end): + return self._readMultiple(start, end, self.nodesDb) + + def _readMultiple(self, start, end, db): + """ + Returns a list of hashes with serial numbers between start + and end, both inclusive. + """ + self._validatePos(start, end) + # Converting any bytearray to bytes + return [bytes(db.get(str(pos))) for pos in range(start, end+1)] + + @property + def leafCount(self) -> int: + return self.leavesDb.size + + @property + def nodeCount(self) -> int: + return self.nodesDb.size + + @leafCount.setter + def leafCount(self, count: int) -> None: + self._leafCount = count + + @property + def closed(self): + return self.nodesDb is None and self.leavesDb is None + + def open(self): + self.nodesDb = KeyValueStorageLeveldb(self.nodesDbPath) + self.leavesDb = KeyValueStorageLeveldb(self.leavesDbPath) + + def close(self): + self.nodesDb.close() + self.leavesDb.close() + + def reset(self) -> bool: + self.nodesDb.close() + self.nodesDb.drop() + self.nodesDb.open() + + self.leavesDb.close() + self.leavesDb.drop() + self.leavesDb.open() + + self.leafCount = 0 + + return True diff --git a/plenum/persistence/orientdb_graph_store.py b/plenum/persistence/orientdb_graph_store.py deleted file mode 100644 index c8b9650316..0000000000 --- a/plenum/persistence/orientdb_graph_store.py +++ /dev/null @@ -1,94 +0,0 @@ -from typing import Dict - -import pyorient - -from stp_core.common.log import getlogger -from plenum.persistence.graph_store import GraphStore -from plenum.persistence.orientdb_store import OrientDbStore - -logger = getlogger() - - -class OrientDbGraphStore(GraphStore): - def __init__(self, store: OrientDbStore): - assert store.dbType == pyorient.DB_TYPE_GRAPH, \ - "OrientDbGraphStore must be initialized with dbType=DB_TYPE_GRAPH" - super().__init__(store) - - @property - def classesNeeded(self): - raise NotImplementedError - - def bootstrap(self): - self.store.createClasses(self.classesNeeded) - - def createVertexClass(self, className: str, properties: Dict=None): - self.createClass(className, "V", properties) - - def createEdgeClass(self, className: str, properties: Dict=None): - self.createClass(className, "E", properties) - - def createClass(self, className: str, superclass: str, properties: Dict=None): - self.client.command("create class {} extends {}". - format(className, superclass)) - # TODO tried the following to see if it increases performance, but - # it didn't seem to. - # See https://www.mail-archive.com/orient-database@googlegroups.com/msg12419.html - # self.client.command("create class {}".format(className)) - # self.client.command("alter class {} superclass {}". - # format(className, superclass)) - if properties: - self.store.createClassProperties(className, properties) - - def addEdgeConstraint(self, edgeClass, iN=None, out=None): - if iN: - self.client.command("create property {}.in link {}". - format(edgeClass, iN)) - if out: - self.client.command("create property {}.out link {}". - format(edgeClass, out)) - - def createVertex(self, vertexName, **kwargs): - cmd = "create vertex {}".format(vertexName) - return self._createEntity(cmd, **kwargs) - - def createEdge(self, edgeName, edgeFrm, edgeTo, **kwargs): - cmd = "create edge {} from {} to {}".format(edgeName, edgeFrm, edgeTo) - return self._createEntity(cmd, **kwargs) - - def _createEntity(self, createCmd, **kwargs): - if len(kwargs) > 0: - createCmd += " set " - createCmd += self.store.getPlaceHolderQueryStringFromDict(kwargs) - return self.client.command(createCmd)[0] - - def getEntityByUniqueAttr(self, entityClassName, attrName, attrValue): - query = "select from {} where {} = " + \ - ("{}" if isinstance(attrValue, (int, float)) else "'{}'") - query = query.format(entityClassName, attrName, attrValue) - result = self.client.command(query) - return None if not result else result[0] - - def getEntityByAttrs(self, entityClassName, attrs: Dict): - attrStr = self.store.getPlaceHolderQueryStringFromDict(attrs, - joiner=" and ") - query = "select from {} where {}".format(entityClassName, attrStr) - result = self.client.command(query) - return None if not result else result[0] - - def countEntitiesByAttrs(self, entityClassName, attrs: Dict): - attrStr = self.store.getPlaceHolderQueryStringFromDict(attrs, - joiner=" and ") - result = self.client.command("select count(*) from {} where {}". - format(entityClassName, attrStr)) - return result[0].oRecordData['count'] - - def updateEntityWithUniqueId(self, entityClassName, uniqueIdKey, - uniqueIdVal, **kwargs): - if len(kwargs) > 0: - cmd = "update {} set {} where {}".format( - entityClassName, - self.store.getPlaceHolderQueryStringFromDict(kwargs), - self.store.getPlaceHolderQueryStringFromDict({ - uniqueIdKey: uniqueIdVal})) - self.client.command(cmd) \ No newline at end of file diff --git a/plenum/persistence/orientdb_hash_store.py b/plenum/persistence/orientdb_hash_store.py deleted file mode 100644 index 49a64da5c0..0000000000 --- a/plenum/persistence/orientdb_hash_store.py +++ /dev/null @@ -1,138 +0,0 @@ -from base64 import b64encode, b64decode - -from ledger.stores.hash_store import HashStore -from ledger.util import F -from stp_core.common.log import getlogger -from plenum.persistence.orientdb_store import OrientDbStore - - -logger = getlogger() - - -class OrientDbHashStore(HashStore): - """ - Uses OrientDB to store leaf hashes and node hashes - """ - - def __init__(self, store: OrientDbStore): - self.store = store - self.leafHashClass = "LeafHashStore" - self.nodeHashClass = "NodeHashStore" - self.store.createClasses(self.classesNeeded) - self._leafCount = self.leafCount - - def writeLeaf(self, leafHash): - self.store.client.command( - "insert into {} (seqNo, leafHash) values ({}, '{}')".format( - self.leafHashClass, self.leafCount + 1, self._tob64(leafHash))) - self.leafCount += 1 - - def writeNode(self, node): - start, height, nodeHash = node - seqNo = self.getNodePosition(start, height) - self.store.client.command( - "insert into {} (seqNo, nodeHash) values ({}, '{}')".format( - self.nodeHashClass, seqNo, self._tob64(nodeHash))) - - @staticmethod - def _tob64(data): - return b64encode(data).decode() - - @staticmethod - def _fromb64(data): - return b64decode(data.encode()) - - def readLeaf(self, seqNo): - return self._readOne(seqNo, self.leafHashClass, F.leafHash.name) - - def readNode(self, seqNo): - return self._readOne(seqNo, self.nodeHashClass, F.nodeHash.name) - - def _readOne(self, pos, hashClass, attrib): - self._validatePos(pos) - resultSet = self.store.client.command( - "select from {} where seqNo={} limit 1".format( - hashClass, pos)) - if resultSet: - return self._fromb64(resultSet[0].oRecordData[attrib]) - else: - logger.error("{} does not have position {}".format(hashClass, pos)) - - def readLeafs(self, start, end): - return self._readMultiple(start, end, self.leafHashClass, - F.leafHash.name) - - def readNodes(self, start, end): - return self._readMultiple(start, end, self.nodeHashClass, - F.nodeHash.name) - - def _readMultiple(self, start, end, hashClass, attrib): - """ - Returns a list of hashes with serial numbers between start - and end, both inclusive. - """ - self._validatePos(start, end) - resultSet = self.store.client.command( - "select from {} where seqNo between {} and {} order by seqNo asc" - .format(hashClass, start, end)) - return [self._fromb64(r.oRecordData[attrib]) for r in resultSet] - - @property - def leafCount(self) -> int: - result = self.store.client.command("select count(*) from {}". - format(self.leafHashClass)) - return result[0].oRecordData['count'] - - @property - def nodeCount(self) -> int: - result = self.store.client.command("select count(*) from {}". - format(self.nodeHashClass)) - return result[0].oRecordData['count'] - - @leafCount.setter - def leafCount(self, count: int) -> None: - self._leafCount = count - - @staticmethod - def _validatePos(start, end=None): - if end: - assert start < end, "start index must be less than end index" - if start < 1: - raise IndexError( - "seqNo starts from 1, index requested: {}".format(start)) - - def createLeafHashClass(self): - self._createHashClass(self.leafHashClass, { - F.seqNo.name: "long", - F.leafHash.name: "string" - }, F.leafHash.name) - - def createNodeHashClass(self): - self._createHashClass(self.nodeHashClass, { - F.seqNo.name: "long", - F.nodeHash.name: "string" - }, F.nodeHash.name) - - def _createHashClass(self, className, attributes, index): - self.store.createClass(className) - self.store.createClassProperties(className, attributes) - self.store.createIndexOnClass(className, index, "unique") - - @property - def classesNeeded(self): - return [(self.leafHashClass, self.createLeafHashClass), - (self.nodeHashClass, self.createNodeHashClass)] - - def close(self): - self.store.close() - - def reset(self) -> bool: - def trunc(clazz): - self.store.client.command( - "truncate class {}".format(clazz)) - - trunc(self.nodeHashClass) - trunc(self.leafHashClass) - - return True - diff --git a/plenum/persistence/orientdb_store.py b/plenum/persistence/orientdb_store.py deleted file mode 100644 index a2ced3525e..0000000000 --- a/plenum/persistence/orientdb_store.py +++ /dev/null @@ -1,138 +0,0 @@ -from typing import Dict - -import pyorient -from plenum.common.error import error -from plenum.common.exceptions import OrientDBNotRunning -from stp_core.common.log import getlogger - -logger = getlogger() - - -class OrientDbStore: - def __init__(self, user, password, dbName, host="localhost", port=2424, - dbType=pyorient.DB_TYPE_GRAPH, - storageType=pyorient.STORAGE_TYPE_MEMORY): - self.dbType = dbType - try: - self.client = self.new_orientdb_client(host, port, user, password) - except pyorient.exceptions.PyOrientConnectionException: - raise OrientDBNotRunning("OrientDB connection failed. Check if DB is running " - "on port {}".format(port)) - if not self.client.db_exists(dbName, storageType): - self.createDb(dbName, dbType, storageType) - self.client.db_open(dbName, user, password) - if not (self.serverVersion and self.serverVersion[0] >= 2 and - self.serverVersion[1] >= 2): - error("OrientDB version should be atleast 2.2. Current version is {}" - .format(".".join(map(str, self.serverVersion)))) - - @property - def serverVersion(self): - if self.client and self.client.version: - version = self.client.version - return version.major, version.minor, version.build - - def createDb(self, dbName, dbType, storageType): - self.client.db_create(dbName, dbType, storageType) - - def createClasses(self, classesNeeded): - for cls, clbk in classesNeeded: - if not self.classExists(cls): - logger.debug("Creating class {}".format(cls)) - clbk() - else: - logger.debug("Class {} already exists".format(cls)) - - def classExists(self, name: str) -> bool: - r = self.client.command("select from ( select expand( classes ) from " - "metadata:schema ) where name = '{}'". - format(name)) - return bool(r) - - def createClass(self, className): - self.client.command("create class {}".format(className)) - - def createClassProperties(self, className, properties: Dict): - for prpName, typ in properties.items(): - self.client.command("create property {}.{} {}". - format(className, prpName, typ)) - - def createIndexOnClass(self, className: str, prop, indexType=None): - cmd = "create index {}.{}".format(className, prop) - if indexType: - if indexType not in ("unique",): - raise ValueError("Unknown index type {}".format(indexType)) - else: - indexType = "notunique" - cmd += " {}".format(indexType) - self.client.command(cmd) - - def createUniqueIndexOnClass(self, className, uniqueProperty): - self.createIndexOnClass(className, uniqueProperty, "unique") - - def getByRecordIds(self, *rids): - ridStr = ",".join(rids) - return self.client.command("select from [{}]".format(ridStr)) - - @staticmethod - def getPlaceHolderQueryStringFromDict(args: Dict, joiner=", "): - items = [] - for key, val in args.items(): - valPlaceHolder = "{}" if (isinstance(val, (int, float)) or val is None) else "'{}'" - items.append(("{} = " + valPlaceHolder).format(key, val)) - return joiner.join(items) - - @staticmethod - def new_orientdb_client(host, port, user, password): - client = pyorient.OrientDB(host=host, port=port) - session_id = client.connect(user, password) - assert session_id, 'Problem with connecting to OrientDB' - return client - - def wipe(self): - """ - IMPORTANT: this is destructive; use at your own risk - """ - assert self.client._connection, 'Client must be connected to the db' - self.wipe_db(self.client, self.client._connection.db_opened) - - @staticmethod - def wipe_db(client, dbName): - try: - client.db_drop(dbName) - logger.debug("Dropped db {}".format(dbName)) - except Exception as ex: - logger.debug("Error while dropping db {}: {}".format(dbName, ex)) - - def close(self): - if self.client._connection.connected: - self.client.db_close(self.client._connection.db_opened) - - -def createOrientDbInMemStore(config, name, dbType): - """ - Create and return an OrientDb in-memory store used for test cases. - """ - host = "localhost" - port = 2424 - try: - client = pyorient.OrientDB(host=host, port=port) - client.connect(user=config.OrientDB['user'], - password=config.OrientDB['password']) - # except ValueError: - # client.connect(user=config.OrientDB['user'], - # password=config.OrientDB['password']) - except pyorient.exceptions.PyOrientConnectionException: - error("OrientDB connection failed. Check if DB is running " - "on port {}".format(port)) - try: - if client.db_exists(name, pyorient.STORAGE_TYPE_MEMORY): - client.db_drop(name, type=pyorient.STORAGE_TYPE_MEMORY) - # This is to avoid a known bug in OrientDb. - except pyorient.exceptions.PyOrientDatabaseException: - client.db_drop(name, type=pyorient.STORAGE_TYPE_MEMORY) - return OrientDbStore(user=config.OrientDB["user"], - password=config.OrientDB["password"], - dbName=name, - dbType=dbType, - storageType=pyorient.STORAGE_TYPE_MEMORY) diff --git a/plenum/persistence/req_id_to_txn.py b/plenum/persistence/req_id_to_txn.py new file mode 100644 index 0000000000..68c06a0931 --- /dev/null +++ b/plenum/persistence/req_id_to_txn.py @@ -0,0 +1,43 @@ +from hashlib import sha256 +from typing import Optional + +from state.kv.kv_store import KeyValueStorage + + +class ReqIdrToTxn: + """ + Stores a map from client identifier, request id tuple to transaction + sequence number + """ + + def __init__(self, keyValueStorage: KeyValueStorage): + self._keyValueStorage = keyValueStorage + + def getKey(self, identifier, reqId): + h = sha256() + h.update(identifier.encode()) + h.update(str(reqId).encode()) + return h.digest() + + def add(self, identifier, reqId, seqNo): + key = self.getKey(identifier, reqId) + self._keyValueStorage.put(key, str(seqNo)) + + def addBatch(self, batch): + self._keyValueStorage.setBatch([(self.getKey(identifier, reqId), str(seqNo)) + for identifier, reqId, seqNo in batch]) + + def get(self, identifier, reqId) -> Optional[int]: + key = self.getKey(identifier, reqId) + try: + val = self._keyValueStorage.get(key) + return int(val) + except (KeyError, ValueError): + return None + + @property + def size(self): + return self._keyValueStorage.size + + def close(self): + self._keyValueStorage.close() diff --git a/plenum/persistence/secondary_storage.py b/plenum/persistence/secondary_storage.py deleted file mode 100644 index 6aed564fcd..0000000000 --- a/plenum/persistence/secondary_storage.py +++ /dev/null @@ -1,45 +0,0 @@ -""" -The secondary storage is expected to be able to: -1. store and retrieve Client transactions -2. store and retrieve previous replies - -The data stored in the secondary storage may be a replication of -the primary storage's data but can be queried more effectively. -""" -from ledger.util import F -from plenum.common.constants import NYM, STEWARD, ROLE -from plenum.common.constants import TXN_TYPE, TARGET_NYM -from plenum.common.types import f, Reply - - -class SecondaryStorage: - def __init__(self, txnStore, primaryStorage=None): - self._txnStore = txnStore - self._primaryStorage = primaryStorage - - def getReply(self, identifier, reqId, **kwargs): - txn = self._primaryStorage.get(**{f.IDENTIFIER.nm: identifier, - f.REQ_ID.nm: reqId}) - if txn: - seqNo = txn.get(F.seqNo.name) - if seqNo: - txn.update(self._primaryStorage.merkleInfo(seqNo)) - else: - txn = {} - return txn - - def getReplies(self, *txnIds, seqNo=None, **kwargs): - raise NotImplementedError - - def countStewards(self) -> int: - """Count the number of stewards added to the pool transaction store""" - allTxns = self._primaryStorage.getAllTxn().values() - return sum(1 for txn in allTxns if (txn[TXN_TYPE] == NYM) and - (txn.get(ROLE) == STEWARD)) - - def isSteward(self, nym): - for txn in self._primaryStorage.getAllTxn().values(): - if txn[TXN_TYPE] == NYM and txn[TARGET_NYM] == nym and \ - txn.get(ROLE) == STEWARD: - return True - return False diff --git a/plenum/persistence/storage.py b/plenum/persistence/storage.py index 83c2694afa..6b70cbbad3 100644 --- a/plenum/persistence/storage.py +++ b/plenum/persistence/storage.py @@ -1,10 +1,13 @@ +import os from abc import abstractmethod, ABC from ledger.stores.text_file_store import TextFileStore -from plenum.common.exceptions import DataDirectoryNotFound, DBConfigNotFound -from plenum.common.constants import StorageType +from plenum.common.exceptions import DataDirectoryNotFound, KeyValueStorageConfigNotFound +from plenum.common.constants import StorageType, KeyValueStorageType from plenum.common.types import Reply -from plenum.persistence.orientdb_store import OrientDbStore +from state.kv.kv_in_memory import KeyValueStorageInMemory +from state.kv.kv_store import KeyValueStorage +from state.kv.kv_store_leveldb import KeyValueStorageLeveldb class Storage(ABC): @@ -25,17 +28,18 @@ async def get(self, identifier: str, reqId: int, **kwargs): pass +def initKeyValueStorage(keyValueType, dataLocation, keyValueStorageName) -> KeyValueStorage: + if keyValueType == KeyValueStorageType.Leveldb: + kvPath = os.path.join(dataLocation, keyValueStorageName) + return KeyValueStorageLeveldb(kvPath) + elif keyValueType == KeyValueStorageType.Memory: + return KeyValueStorageInMemory() + else: + raise KeyValueStorageConfigNotFound + + def initStorage(storageType, name, dataDir=None, config=None): if storageType == StorageType.File: if dataDir is None: raise DataDirectoryNotFound return TextFileStore(dataDir, name) - elif storageType == StorageType.OrientDB: - if config is None: - raise DBConfigNotFound - orientConf = config.OrientDB - return OrientDbStore(user=orientConf["user"], - password=orientConf["password"], - host=orientConf["host"], - port=orientConf["port"], - dbName=name) diff --git a/plenum/persistence/util.py b/plenum/persistence/util.py new file mode 100644 index 0000000000..28ff0b74f4 --- /dev/null +++ b/plenum/persistence/util.py @@ -0,0 +1,26 @@ +import os + +from copy import deepcopy + +from ledger.util import F + + +def txnsWithSeqNo(seqNoStart, seqNoEnd, txns): + """ + Update each transaction with a sequence number field + """ + txns = deepcopy(txns) + for txn, seqNo in zip(txns, range(seqNoStart, seqNoEnd + 1)): + txn[F.seqNo.name] = seqNo + return txns + + +def txnsWithMerkleInfo(ledger, committedTxns): + """ + Update each transaction with the merkle root hash and audit path + """ + committedTxns = deepcopy(committedTxns) + for txn in committedTxns: + mi = ledger.merkleInfo(txn.get(F.seqNo.name)) + txn.update(mi) + return committedTxns diff --git a/plenum/server/client_authn.py b/plenum/server/client_authn.py index 94319e0467..41f0e66f35 100644 --- a/plenum/server/client_authn.py +++ b/plenum/server/client_authn.py @@ -12,8 +12,10 @@ MissingIdentifier, CouldNotAuthenticate, \ SigningException, InvalidSignatureFormat, UnknownIdentifier from plenum.common.signing import serializeMsg +from plenum.common.constants import VERKEY, ROLE from plenum.common.types import f from plenum.common.verifier import DidVerifier +from plenum.server.domain_req_handler import DomainRequestHandler logger = getlogger() @@ -40,9 +42,9 @@ def authenticate(self, """ @abstractmethod - def addClient(self, identifier, verkey, role=None): + def addIdr(self, identifier, verkey, role=None): """ - Adding a client should be an auditable and authenticated action. + Adding an identifier should be an auditable and authenticated action. Robust implementations of ClientAuthNr would authenticate this operation. @@ -90,7 +92,7 @@ def authenticate(self, sig = base58.b58decode(signature) except Exception as ex: raise InvalidSignatureFormat from ex - ser = self.serializeForSig(msg) + ser = self.serializeForSig(msg, topLevelKeysToIgnore=[f.SIG.nm]) verkey = self.getVerkey(identifier) vr = DidVerifier(verkey, identifier=identifier) isVerified = vr.verify(sig, ser) @@ -103,15 +105,15 @@ def authenticate(self, return identifier @abstractmethod - def addClient(self, identifier, verkey, role=None): + def addIdr(self, identifier, verkey, role=None): pass @abstractmethod def getVerkey(self, identifier): pass - def serializeForSig(self, msg): - return serializeMsg(msg) + def serializeForSig(self, msg, topLevelKeysToIgnore=None): + return serializeMsg(msg, topLevelKeysToIgnore=topLevelKeysToIgnore) class SimpleAuthNr(NaclAuthNr): @@ -120,21 +122,30 @@ class SimpleAuthNr(NaclAuthNr): secure system. """ - def __init__(self): + def __init__(self, state=None): # key: some identifier, value: verification key self.clients = {} # type: Dict[str, Dict] + self.state = state - def addClient(self, identifier, verkey, role=None): + def addIdr(self, identifier, verkey, role=None): if identifier in self.clients: # raise RuntimeError("client already added") logger.error("client already added") self.clients[identifier] = { - "verkey": verkey, - "role": role + VERKEY: verkey, + ROLE: role } def getVerkey(self, identifier): nym = self.clients.get(identifier) if not nym: - raise UnknownIdentifier(identifier) - return nym.get("verkey") + # Querying uncommitted identities since a batch might contain + # both identity creation request and a request by that newly + # created identity, also its possible to have multiple uncommitted + # batches in progress and identity creation request might + # still be in an earlier uncommited batch + nym = DomainRequestHandler.getNymDetails(self.state, + identifier, isCommitted=False) + if not nym: + raise UnknownIdentifier(identifier) + return nym.get(VERKEY) diff --git a/plenum/server/domain_req_handler.py b/plenum/server/domain_req_handler.py new file mode 100644 index 0000000000..2525f1500a --- /dev/null +++ b/plenum/server/domain_req_handler.py @@ -0,0 +1,143 @@ +import json + +from ledger.serializers.json_serializer import JsonSerializer +from ledger.util import F +from plenum.common.constants import TXN_TYPE, NYM, ROLE, STEWARD, TARGET_NYM, VERKEY +from plenum.common.exceptions import UnauthorizedClientRequest +from plenum.common.request import Request +from plenum.common.txn_util import reqToTxn +from plenum.common.types import f +from plenum.persistence.util import txnsWithSeqNo +from plenum.server.req_handler import RequestHandler +from stp_core.common.log import getlogger + +logger = getlogger() + + +class DomainRequestHandler(RequestHandler): + stateSerializer = JsonSerializer() + + def __init__(self, ledger, state, reqProcessors): + super().__init__(ledger, state) + self.reqProcessors = reqProcessors + + def validate(self, req: Request, config=None): + if req.operation.get(TXN_TYPE) == NYM: + origin = req.identifier + error = None + if not self.isSteward(self.state, + origin, isCommitted=False): + error = "Only Steward is allowed to do these transactions" + if req.operation.get(ROLE) == STEWARD: + if self.stewardThresholdExceeded(config): + error = "New stewards cannot be added by other stewards " \ + "as there are already {} stewards in the system".\ + format(config.stewardThreshold) + if error: + raise UnauthorizedClientRequest(req.identifier, + req.reqId, + error) + + def _reqToTxn(self, req: Request): + txn = reqToTxn(req) + for processor in self.reqProcessors: + res = processor.process(req) + txn.update(res) + + return txn + + def apply(self, req: Request): + txn = self._reqToTxn(req) + (start, end), _ = self.ledger.appendTxns([self.transform_txn_for_ledger(txn)]) + self.updateState(txnsWithSeqNo(start, end, [txn])) + return txn + + @staticmethod + def transform_txn_for_ledger(txn): + """ + Some transactions need to be updated before they can be stored in the + ledger, eg. storing certain payload in another data store and only its + hash in the ledger + """ + return txn + + def updateState(self, txns, isCommitted=False): + for txn in txns: + self._updateStateWithSingleTxn(txn, isCommitted=isCommitted) + + def _updateStateWithSingleTxn(self, txn, isCommitted=False): + typ = txn.get(TXN_TYPE) + if typ == NYM: + nym = txn.get(TARGET_NYM) + self.updateNym(nym, txn, isCommitted=isCommitted) + else: + logger.debug('Cannot apply request of type {} to state'.format(typ)) + + def countStewards(self) -> int: + """ + Count the number of stewards added to the pool transaction store + Note: This is inefficient, a production use case of this function + should require an efficient storage mechanism + """ + # TODO: do not load all transactions!!! + allTxns = self.ledger.getAllTxn().values() + return sum(1 for txn in allTxns if (txn[TXN_TYPE] == NYM) and + (txn.get(ROLE) == STEWARD)) + + def stewardThresholdExceeded(self, config) -> bool: + """We allow at most `stewardThreshold` number of stewards to be added + by other stewards""" + return self.countStewards() > config.stewardThreshold + + def updateNym(self, nym, txn, isCommitted=True): + existingData = self.getNymDetails(self.state, nym, + isCommitted=isCommitted) + newData = {} + if not existingData: + # New nym being added to state, set the TrustAnchor + newData[f.IDENTIFIER.nm] = txn[f.IDENTIFIER.nm] + # New nym being added to state, set the role and verkey to None, this makes + # the state data always have a value for `role` and `verkey` since we allow + # clients to omit specifying `role` and `verkey` in the request consider a + # default value of None + newData[ROLE] = None + newData[VERKEY] = None + + if ROLE in txn: + newData[ROLE] = txn[ROLE] + if VERKEY in txn: + newData[VERKEY] = txn[VERKEY] + newData[F.seqNo.name] = txn.get(F.seqNo.name) + existingData.update(newData) + key = nym.encode() + val = self.stateSerializer.serialize(existingData) + self.state.set(key, val) + return existingData + + def hasNym(self, nym, isCommitted: bool = True): + key = nym.encode() + data = self.state.get(key, isCommitted) + return bool(data) + + @staticmethod + def getSteward(state, nym, isCommitted: bool = True): + nymData = DomainRequestHandler.getNymDetails(state, nym, isCommitted) + if not nymData: + return {} + else: + if nymData.get(ROLE) == STEWARD: + return nymData + else: + return {} + + @staticmethod + def isSteward(state, nym, isCommitted: bool = True): + return bool(DomainRequestHandler.getSteward(state, + nym, + isCommitted)) + + @staticmethod + def getNymDetails(state, nym, isCommitted: bool = True): + key = nym.encode() + data = state.get(key, isCommitted) + return json.loads(data.decode()) if data else {} diff --git a/plenum/server/models.py b/plenum/server/models.py index 9eb12b62a5..cfc3f3d6bf 100644 --- a/plenum/server/models.py +++ b/plenum/server/models.py @@ -1,7 +1,7 @@ """ Some model objects used in Plenum protocol. """ -from typing import NamedTuple, Set, Tuple +from typing import NamedTuple, Set, Tuple, Dict from plenum.common.types import Commit, Prepare @@ -11,7 +11,8 @@ InsChgVotes = NamedTuple("InsChg", [ ("viewNo", int), - ("voters", Set[str])]) + ("voters", Set[str]), + ('last_ordered', Dict[str, Dict[int, int]])]) class TrackedMsgs(dict): @@ -120,17 +121,21 @@ class InstanceChanges(TrackedMsgs): """ Stores senders of received instance change requests. Key is the view no and and value is the set of senders + Does not differentiate between reason for view change. Maybe it should, + but the current assumption is that since a malicious node can raise + different suspicions on different nodes, its ok to consider all suspicions + that can trigger a view change as equal """ def newVoteMsg(self, msg): - return InsChgVotes(msg, set()) + return InsChgVotes(msg.viewNo, set(), msg.ordSeqNos) - def getKey(self, viewNo): - return viewNo + def getKey(self, msg): + return msg if isinstance(msg, int) else msg.viewNo # noinspection PyMethodMayBeStatic - def addVote(self, viewNo: int, voter: str): - super().addMsg(viewNo, voter) + def addVote(self, msg: int, voter: str): + super().addMsg(msg, voter) # noinspection PyMethodMayBeStatic def hasView(self, viewNo: int) -> bool: diff --git a/plenum/server/monitor.py b/plenum/server/monitor.py index ff763a3c9e..712851f569 100644 --- a/plenum/server/monitor.py +++ b/plenum/server/monitor.py @@ -190,48 +190,60 @@ def addInstance(self): self.numOrderedRequests.append((0, 0)) self.clientAvgReqLatencies.append({}) - def requestOrdered(self, identifier: str, reqId: int, instId: int, - byMaster: bool = False) -> Optional[float]: + def requestOrdered(self, reqIdrs: List[Tuple[str, int]], instId: int, + byMaster: bool = False) -> Dict: """ Measure the time taken for ordering of a request and return it. Monitor might have been reset due to view change due to which this method returns None """ - if (identifier, reqId) not in self.requestOrderingStarted: - logger.debug("Got ordered request with identifier {} and reqId {} " - "but it was from a previous view". - format(identifier, reqId)) - return now = time.perf_counter() - duration = now - self.requestOrderingStarted[(identifier, reqId)] + durations = {} + for identifier, reqId in reqIdrs: + if (identifier, reqId) not in self.requestOrderingStarted: + logger.debug( + "Got ordered request with identifier {} and reqId {} " + "but it was from a previous view". + format(identifier, reqId)) + continue + duration = now - self.requestOrderingStarted[(identifier, reqId)] + if byMaster: + self.masterReqLatencies[(identifier, reqId)] = duration + self.orderedRequestsInLast.append(now) + self.latenciesByMasterInLast.append((now, duration)) + else: + if instId not in self.latenciesByBackupsInLast: + self.latenciesByBackupsInLast[instId] = [] + self.latenciesByBackupsInLast[instId].append((now, duration)) + + if identifier not in self.clientAvgReqLatencies[instId]: + self.clientAvgReqLatencies[instId][identifier] = (0, 0.0) + totalReqs, avgTime = self.clientAvgReqLatencies[instId][identifier] + # If avg of `n` items is `a`, thus sum of `n` items is `x` where + # `x=n*a` then avg of `n+1` items where `y` is the new item is + # `((n*a)+y)/n+1` + self.clientAvgReqLatencies[instId][identifier] = \ + (totalReqs + 1, (totalReqs * avgTime + duration) / (totalReqs + 1)) + + durations[identifier, reqId] = duration + reqs, tm = self.numOrderedRequests[instId] - self.numOrderedRequests[instId] = (reqs + 1, tm + duration) - if byMaster: - self.masterReqLatencies[(identifier, reqId)] = duration - self.orderedRequestsInLast.append(now) - self.latenciesByMasterInLast.append((now, duration)) - else: - if instId not in self.latenciesByBackupsInLast: - self.latenciesByBackupsInLast[instId] = [] - self.latenciesByBackupsInLast[instId].append((now, duration)) - - if identifier not in self.clientAvgReqLatencies[instId]: - self.clientAvgReqLatencies[instId][identifier] = (0, 0.0) - totalReqs, avgTime = self.clientAvgReqLatencies[instId][identifier] - # If avg of `n` items is `a`, thus sum of `n` items is `x` where - # `x=n*a` then avg of `n+1` items where `y` is the new item is - # `((n*a)+y)/n+1` - self.clientAvgReqLatencies[instId][identifier] = \ - (totalReqs + 1, (totalReqs * avgTime + duration) / (totalReqs + 1)) + orderedNow = len(durations) + self.numOrderedRequests[instId] = (reqs + orderedNow, + tm + sum(durations.values())) # TODO: Inefficient, as on every request a minimum of a large list is # calculated - if min(r[0] for r in self.numOrderedRequests) == (reqs + 1): - self.totalRequests += 1 + if min(r[0] for r in self.numOrderedRequests) == (reqs + orderedNow): + # If these requests is ordered by the last instance then increment + # total requests, but why is this important, why cant is ordering + # by master not enough? + self.totalRequests += orderedNow self.postOnReqOrdered() if 0 == reqs: self.postOnNodeStarted(self.started) - return duration + + return durations def requestUnOrdered(self, identifier: str, reqId: int): """ @@ -269,26 +281,28 @@ def isMasterThroughputTooLow(self): if r is None: logger.debug("{} master throughput is not measurable.". format(self)) + return None + + tooLow = r < self.Delta + if tooLow: + logger.info("{} master throughput ratio {} is lower than " + "Delta {}.".format(self, r, self.Delta)) else: - tooLow = r < self.Delta - if tooLow: - logger.debug("{} master throughput ratio {} is lower than " - "Delta {}.".format(self, r, self.Delta)) - else: - logger.trace("{} master throughput ratio {} is acceptable.". - format(self, r)) - return tooLow + logger.trace("{} master throughput ratio {} is acceptable.". + format(self, r)) + return tooLow def isMasterReqLatencyTooHigh(self): """ Return whether the request latency of the master instance is greater than the acceptable threshold """ - r = self.masterReqLatencyTooHigh or any([lat > self.Lambda for lat - in self.masterReqLatencies.values()]) + r = self.masterReqLatencyTooHigh or \ + next(((key, lat) for key, lat in self.masterReqLatencies.items() if + lat > self.Lambda), None) if r: - logger.debug("{} found master's latency to be higher than the " - "threshold for some or all requests.".format(self)) + logger.info("{} found master's latency {} to be higher than the " + "threshold for request {}.".format(self, r[1], r[0])) else: logger.trace("{} found master's latency to be lower than the " "threshold for all requests.".format(self)) @@ -310,10 +324,11 @@ def isMasterAvgReqLatencyTooHigh(self): logger.trace("{} found master had no record yet for {}". format(self, cid)) return False - if avgLatM[cid] - lat > self.Omega: - logger.debug("{} found difference between master's and " - "backups's avg latency to be higher than the " - "threshold".format(self)) + d = avgLatM[cid] - lat + if d > self.Omega: + logger.info("{} found difference between master's and " + "backups's avg latency {} to be higher than the " + "threshold".format(self, d)) logger.trace( "{}'s master's avg request latency is {} and backup's " "avg request latency is {} ". @@ -356,7 +371,7 @@ def getThroughput(self, instId: int) -> float: reqs, tm = self.numOrderedRequests[instId] return reqs / tm if tm else None - def getInstanceMetrics(self, forAllExcept: int) -> float: + def getInstanceMetrics(self, forAllExcept: int) -> Tuple[Optional[int], Optional[float]]: """ Calculate and return the average throughput of all the instances except the one specified as `forAllExcept`. diff --git a/plenum/server/node.py b/plenum/server/node.py index dca87a06dd..6d5e12d53a 100644 --- a/plenum/server/node.py +++ b/plenum/server/node.py @@ -5,22 +5,13 @@ import shutil import time from binascii import unhexlify +from collections import OrderedDict from collections import deque, defaultdict from contextlib import closing -from hashlib import sha256 from typing import Dict, Any, Mapping, Iterable, List, Optional, \ Sequence, Set, Tuple -import pyorient -from plenum.common.stacks import nodeStackClass, clientStackClass -from stp_core.crypto.signer import Signer -from stp_core.network.network_interface import NetworkInterface -from stp_core.ratchet import Ratchet - -from plenum.common.roles import Roles - from ledger.compact_merkle_tree import CompactMerkleTree -from ledger.ledger import Ledger from ledger.serializers.compact_serializer import CompactSerializer from ledger.stores.file_hash_store import FileHashStore from ledger.stores.hash_store import HashStore @@ -28,49 +19,50 @@ from ledger.util import F from plenum.client.wallet import Wallet from plenum.common.config_util import getConfig +from plenum.common.constants import TXN_TYPE, TXN_TIME, POOL_TXN_TYPES, \ + TARGET_NYM, ROLE, STEWARD, NYM, VERKEY, OP_FIELD_NAME, CLIENT_STACK_SUFFIX, \ + CLIENT_BLACKLISTER_SUFFIX, NODE_BLACKLISTER_SUFFIX, \ + NODE_PRIMARY_STORAGE_SUFFIX, NODE_HASH_STORE_SUFFIX, HS_FILE, DATA, ALIAS, \ + NODE_IP, HS_LEVELDB, POOL_LEDGER_ID, DOMAIN_LEDGER_ID, LedgerState from plenum.common.exceptions import SuspiciousNode, SuspiciousClient, \ MissingNodeOp, InvalidNodeOp, InvalidNodeMsg, InvalidClientMsgType, \ InvalidClientOp, InvalidClientRequest, BaseExc, \ - InvalidClientMessageException, KeysNotFoundException as REx, BlowUp, \ - UnauthorizedClientRequest + InvalidClientMessageException, KeysNotFoundException as REx, BlowUp from plenum.common.has_file_storage import HasFileStorage from plenum.common.keygen_utils import areKeysSetup +from plenum.common.ledger import Ledger from plenum.common.ledger_manager import LedgerManager -from stp_core.common.log import getlogger +from plenum.common.message_processor import MessageProcessor from plenum.common.motor import Motor from plenum.common.plugin_helper import loadPlugins -from plenum.common.request import Request +from plenum.common.request import Request, SafeRequest +from plenum.common.roles import Roles from plenum.common.signer_simple import SimpleSigner -from plenum.common.startable import Status, Mode, LedgerState +from plenum.common.stacks import nodeStackClass, clientStackClass +from plenum.common.startable import Status, Mode from plenum.common.throttler import Throttler -from plenum.common.constants import TXN_TYPE, TXN_ID, TXN_TIME, POOL_TXN_TYPES, \ - TARGET_NYM, ROLE, STEWARD, NYM, VERKEY, OP_FIELD_NAME, CLIENT_STACK_SUFFIX, CLIENT_BLACKLISTER_SUFFIX, \ - NODE_BLACKLISTER_SUFFIX, NODE_PRIMARY_STORAGE_SUFFIX, NODE_SECONDARY_STORAGE_SUFFIX, NODE_HASH_STORE_SUFFIX, \ - HS_FILE, HS_ORIENT_DB from plenum.common.txn_util import getTxnOrderedFields from plenum.common.types import Propagate, \ Reply, Nomination, TaggedTuples, Primary, \ Reelection, PrePrepare, Prepare, Commit, \ Ordered, RequestAck, InstanceChange, Batch, OPERATION, BlacklistMsg, f, \ - RequestNack, HA, \ - LedgerStatus, ConsistencyProof, \ - CatchupReq, CatchupRep, \ + RequestNack, HA, LedgerStatus, ConsistencyProof, CatchupReq, CatchupRep, \ PLUGIN_TYPE_VERIFICATION, PLUGIN_TYPE_PROCESSING, PoolLedgerTxns, \ - ConsProofRequest, ElectionType, ThreePhaseType, Checkpoint, ThreePCState + ConsProofRequest, ElectionType, ThreePhaseType, Checkpoint, ThreePCState, \ + Reject from plenum.common.util import friendlyEx, getMaxFailures -from plenum.common.message_processor import MessageProcessor from plenum.common.verifier import DidVerifier -from plenum.common.constants import DATA, ALIAS, NODE_IP +from plenum.persistence.leveldb_hash_store import LevelDbHashStore +from plenum.persistence.req_id_to_txn import ReqIdrToTxn -from plenum.persistence.orientdb_hash_store import OrientDbHashStore -from plenum.persistence.orientdb_store import OrientDbStore -from plenum.persistence.secondary_storage import SecondaryStorage -from plenum.persistence.storage import Storage, initStorage +from plenum.persistence.storage import Storage, initStorage, initKeyValueStorage +from plenum.persistence.util import txnsWithMerkleInfo from plenum.server import primary_elector from plenum.server import replica from plenum.server.blacklister import Blacklister from plenum.server.blacklister import SimpleBlacklister from plenum.server.client_authn import ClientAuthNr, SimpleAuthNr +from plenum.server.domain_req_handler import DomainRequestHandler from plenum.server.has_action_queue import HasActionQueue from plenum.server.instances import Instances from plenum.server.models import InstanceChanges @@ -85,6 +77,14 @@ from plenum.server.propagator import Propagator from plenum.server.router import Router from plenum.server.suspicion_codes import Suspicions +from state.pruning_state import PruningState +from stp_core.common.log import getlogger +from stp_core.crypto.signer import Signer +from stp_core.network.network_interface import NetworkInterface +from stp_core.ratchet import Ratchet +from stp_zmq.zstack import ZStack + +from state.state import State pluginManager = PluginManager() logger = getlogger() @@ -93,12 +93,12 @@ class Node(HasActionQueue, Motor, Propagator, MessageProcessor, HasFileStorage, HasPoolManager, PluginLoaderHelper): """ - A node in a plenum system. Nodes communicate with each other via the - RAET protocol. https://github.com/saltstack/raet + A node in a plenum system. """ - suspicions = {s.code: s.reason for s in Suspicions.getList()} + suspicions = {s.code: s.reason for s in Suspicions.get_list()} keygenScript = "init_plenum_keys" + _client_request_class = SafeRequest def __init__(self, name: str, @@ -138,13 +138,28 @@ def __init__(self, self.reqProcessors = self.getPluginsByType(pluginPaths, PLUGIN_TYPE_PROCESSING) - self.clientAuthNr = clientAuthNr or self.defaultAuthNr() - - self.requestExecuter = defaultdict(lambda: self.doCustomAction) + self.requestExecuter = defaultdict(lambda: self.executeDomainTxns) Motor.__init__(self) - # HasPoolManager.__init__(self, nodeRegistry, ha, cliname, cliha) + self.hashStore = self.getHashStore(self.name) + self.primaryStorage = storage or self.getPrimaryStorage() + self.ledgerManager = self.getLedgerManager() + self.states = {} # type: Dict[int, State] + + self.ledgerManager.addLedger(DOMAIN_LEDGER_ID, + self.domainLedger, + preCatchupStartClbk=self.preDomainLedgerCatchUp, + postCatchupCompleteClbk=self.postDomainLedgerCaughtUp, + postTxnAddedToLedgerClbk=self.postTxnFromCatchupAddedToLedger) + self.states[DOMAIN_LEDGER_ID] = self.loadDomainState() + self.reqHandler = self.getDomainReqHandler() + self.initDomainState() + + self.clientAuthNr = clientAuthNr or self.defaultAuthNr() + + self.addGenesisNyms() + self.initPoolManager(nodeRegistry, ha, cliname, cliha) if isinstance(self.poolManager, RegistryPoolManager): @@ -169,8 +184,6 @@ def __init__(self, # noinspection PyCallingNonCallable self.clientstack = cls(**kwargs) - # self.clientstack = self.clientStackClass(self.poolManager.cstack, - # self.handleOneClientMsg) self.cliNodeReg = self.poolManager.cliNodeReg @@ -224,10 +237,8 @@ def __init__(self, 'accum': 0 } - # TODO: Create a RecurringCaller that takes a method to call after - # every `n` seconds, also support start and stop methods - # self._schedule(self.checkPerformance, self.perfCheckFreq) self.startRepeating(self.checkPerformance, self.perfCheckFreq) + self.startRepeating(self.checkNodeRequestSpike, self.config .notifierEventTriggeringConfig[ @@ -265,10 +276,10 @@ def __init__(self, # which both require client signature verification self.authnWhitelist = (Nomination, Primary, Reelection, Batch, - PrePrepare, Prepare, + PrePrepare, Prepare, Checkpoint, Commit, InstanceChange, LedgerStatus, ConsistencyProof, CatchupReq, CatchupRep, - ConsProofRequest, Checkpoint, ThreePCState) + ConsProofRequest, ThreePCState) # Map of request identifier, request id to client name. Used for # dispatching the processed requests to the correct client remote @@ -276,20 +287,12 @@ def __init__(self, # case the node crashes before sending the reply to the client self.requestSender = {} # Dict[Tuple[str, int], str] - self.hashStore = self.getHashStore(self.name) - self.initDomainLedger() - self.primaryStorage = storage or self.getPrimaryStorage() - self.secondaryStorage = self.getSecondaryStorage() - self.addGenesisNyms() - self.ledgerManager = self.getLedgerManager() - + # TODO: this and tons of akin stuff should be exterminated if isinstance(self.poolManager, TxnPoolManager): - self.ledgerManager.addLedger(0, self.poolLedger, + self.ledgerManager.addLedger(POOL_LEDGER_ID, self.poolLedger, postCatchupCompleteClbk=self.postPoolLedgerCaughtUp, postTxnAddedToLedgerClbk=self.postTxnFromCatchupAddedToLedger) - self.ledgerManager.addLedger(1, self.domainLedger, - postCatchupCompleteClbk=self.postDomainLedgerCaughtUp, - postTxnAddedToLedgerClbk=self.postTxnFromCatchupAddedToLedger) + self.states[POOL_LEDGER_ID] = self.poolManager.state nodeRoutes.extend([ (LedgerStatus, self.ledgerManager.processLedgerStatus), @@ -323,13 +326,32 @@ def __init__(self, # TODO: Do GC for `msgsForFutureReplicas` self.msgsForFutureReplicas = {} + # Any messages that are intended for view numbers higher than the + # current view. + # TODO: Do GC for `msgsForFutureViews` + self.msgsForFutureViews = {} + self.adjustReplicas() + self._primary_replica_no = None + + # Need to keep track of the time when lost connection with primary, + # help in voting for/against a view change. + self.lost_primary_at = None + + # First view change message received for a view no + self.view_change_started_at = {} + tp = loadPlugins(self.basedirpath) logger.debug("total plugins loaded in node: {}".format(tp)) + # TODO: this is already happening in `start`, why here then? self.logNodeInfo() self._id = None self._wallet = None + self.seqNoDB = self.loadSeqNoDB() + + # Stores the last txn seqNo that was executed for a ledger in a batch + self.batchToSeqNos = OrderedDict() # type: OrderedDict[int, int] @property def id(self): @@ -354,6 +376,19 @@ def initPoolManager(self, nodeRegistry, ha, cliname, cliha): def __repr__(self): return self.name + def getDomainReqHandler(self): + return DomainRequestHandler(self.domainLedger, + self.states[DOMAIN_LEDGER_ID], + self.reqProcessors) + + def loadSeqNoDB(self): + return ReqIdrToTxn( + initKeyValueStorage( + self.config.reqIdToTxnStorage, + self.dataLocation, + self.config.seqNoDbName) + ) + # noinspection PyAttributeOutsideInit def setF(self): nodeNames = set(self.nodeReg.keys()) @@ -365,8 +400,8 @@ def setF(self): @property def poolLedger(self): - return self.poolManager.ledger if isinstance(self.poolManager, - TxnPoolManager) \ + return self.poolManager.ledger \ + if isinstance(self.poolManager, TxnPoolManager) \ else None @property @@ -375,15 +410,30 @@ def domainLedger(self): @property def poolLedgerStatus(self): - return LedgerStatus(0, self.poolLedger.size, + return LedgerStatus(POOL_LEDGER_ID, self.poolLedger.size, self.poolLedger.root_hash) \ if self.poolLedger else None @property def domainLedgerStatus(self): - return LedgerStatus(1, self.domainLedger.size, + return LedgerStatus(DOMAIN_LEDGER_ID, self.domainLedger.size, self.domainLedger.root_hash) + def getLedgerRootHash(self, ledgerId, isCommitted=True): + ledgerInfo = self.ledgerManager.getLedgerInfoByType(ledgerId) + if not ledgerInfo: + raise RuntimeError('Ledger with id {} does not exist') + ledger = ledgerInfo.ledger + if isCommitted: + return ledger.root_hash + return ledger.uncommittedRootHash or ledger.root_hash + + def stateRootHash(self, ledgerId, isCommitted=True): + state = self.states.get(ledgerId) + if not state: + raise RuntimeError('State with id {} does not exist') + return state.committedHeadHash if isCommitted else state.headHash + @property def isParticipating(self): return self.mode == Mode.participating @@ -402,12 +452,22 @@ def getPrimaryStorage(self): """ if self.config.primaryStorage is None: fields = getTxnOrderedFields() + defaultTxnFile = os.path.join(self.config.baseDir, + self.config.domainTransactionsFile) + if not os.path.exists(defaultTxnFile): + logger.debug("Not using default initialization file for " + "domain ledger, since it does not exist: {}" + .format(defaultTxnFile)) + defaultTxnFile = None + return Ledger(CompactMerkleTree(hashStore=self.hashStore), dataDir=self.dataLocation, serializer=CompactSerializer(fields=fields), fileName=self.config.domainTransactionsFile, - ensureDurability=self.config.EnsureLedgerDurability) + ensureDurability=self.config.EnsureLedgerDurability, + defaultFile=defaultTxnFile) else: + # TODO: we need to rethink this functionality return initStorage(self.config.primaryStorage, name=self.name+NODE_PRIMARY_STORAGE_SUFFIX, dataDir=self.dataLocation, @@ -421,50 +481,28 @@ def getHashStore(self, name) -> HashStore: if hsConfig == HS_FILE: return FileHashStore(dataDir=self.dataLocation, fileNamePrefix=NODE_HASH_STORE_SUFFIX) - elif hsConfig == HS_ORIENT_DB: - if hasattr(self, '_orientDbStore'): - store = self._orientDbStore - else: - store = self._getOrientDbStore(name, - pyorient.DB_TYPE_GRAPH) - return OrientDbHashStore(store) + elif hsConfig == HS_LEVELDB: + return LevelDbHashStore(dataDir=self.dataLocation) else: return MemoryHashStore() - def getSecondaryStorage(self) -> SecondaryStorage: - """ - Create and return an instance of secondaryStorage to be - used by this Node. - """ - if self.config.secondaryStorage: - return initStorage(self.config.secondaryStorage, - name=self.name+NODE_SECONDARY_STORAGE_SUFFIX, - dataDir=self.dataLocation, - config=self.config) - else: - return SecondaryStorage(txnStore=None, - primaryStorage=self.primaryStorage) - - def _getOrientDbStore(self, name, dbType) -> OrientDbStore: - """ - Helper method that creates an instance of OrientdbStore. + def getLedgerManager(self) -> LedgerManager: + return LedgerManager(self, ownedByNode=True, + postAllLedgersCaughtUp=self.allLedgersCaughtUp) - :param name: name of the orientdb database - :param dbType: orientdb database type - :return: orientdb store - """ - self._orientDbStore = OrientDbStore( - user=self.config.OrientDB["user"], - password=self.config.OrientDB["password"], - host=self.config.OrientDB["host"], - port=self.config.OrientDB["port"], - dbName=name, - dbType=dbType, - storageType=pyorient.STORAGE_TYPE_PLOCAL) - return self._orientDbStore + def loadDomainState(self): + return PruningState( + initKeyValueStorage( + self.config.domainStateStorage, + self.dataLocation, + self.config.domainStateDbName) + ) - def getLedgerManager(self): - return LedgerManager(self, ownedByNode=True) + @classmethod + def ledgerIdForRequest(cls, request: Request): + assert request.operation[TXN_TYPE] + typ = request.operation[TXN_TYPE] + return cls.ledgerId(typ) def start(self, loop): oldstatus = self.status @@ -476,6 +514,8 @@ def start(self, loop): self.primaryStorage.start(loop, ensureDurability= self.config.EnsureLedgerDurability) + if self.hashStore.closed: + self.hashStore = self.getHashStore(self.name) self.nodestack.start() self.clientstack.start() @@ -488,16 +528,16 @@ def start(self, loop): "".format(self), extra={"cli": "LOW_STATUS", "tags": ["node-key-sharing"]}) else: - self.nodestack.maintainConnections() + self.nodestack.maintainConnections(force=True) if isinstance(self.poolManager, RegistryPoolManager): # Node not using pool ledger so start syncing domain ledger self.mode = Mode.discovered - self.ledgerManager.setLedgerCanSync(1, True) + self.ledgerManager.setLedgerCanSync(DOMAIN_LEDGER_ID, True) else: # Node using pool ledger so first sync pool ledger self.mode = Mode.starting - self.ledgerManager.setLedgerCanSync(0, True) + self.ledgerManager.setLedgerCanSync(POOL_LEDGER_ID, True) self.logNodeInfo() @@ -520,22 +560,6 @@ def connectedNodeCount(self) -> int: """ return len(self.nodestack.conns) + 1 - def stop(self, *args, **kwargs): - super().stop(*args, **kwargs) - - if isinstance(self.hashStore, (FileHashStore, OrientDbHashStore)): - try: - self.hashStore.close() - except Exception as ex: - logger.warning('{} got exception while closing hash store: {}'. - format(self, ex)) - - if isinstance(self.poolManager, TxnPoolManager): - if self.poolManager._ledger is not None: - self.poolManager._ledger.stop() - if self.poolManager.hashStore is not None: - self.poolManager.hashStore.close() - def onStopping(self): """ Actions to be performed on stopping the node. @@ -547,31 +571,57 @@ def onStopping(self): self.reset() - # Stop the txn store - try: - self.primaryStorage.stop() - except Exception as ex: + # Stop the ledgers + ledgers = [self.domainLedger] + if self.poolLedger: + ledgers.append(self.poolLedger) + + for ledger in ledgers: try: - self.primaryStorage.close() + ledger.stop() except Exception as ex: - logger.warning( - '{} got exception while stopping/closing ' - 'primary storage: {}'.format(self, ex)) + logger.warning('{} got exception while stopping ledger: {}'. + format(self, ex)) - try: - if callable(getattr(self.secondaryStorage, 'close', None)): - self.secondaryStorage.close() - except Exception as ex: - logger.warning('{} got exception while closing ' - 'secondary storage: {}'.format(self, ex)) + # Stop the hash stores + hashStores = [self.hashStore] + if self.poolLedger: + ledgers.append(self.poolLedger) + if self.hashStore: + hashStores.append(self.hashStore) + if isinstance(self.poolManager, TxnPoolManager) and self.poolManager.hashStore: + hashStores.append(self.poolManager.hashStore) + hashStores = [hs for hs in hashStores if + isinstance(hs, (FileHashStore, LevelDbHashStore)) + and not hs.closed] + for hs in hashStores: + try: + hs.close() + except Exception as ex: + logger.warning('{} got exception while closing hash store: {}'. + format(self, ex)) self.nodestack.stop() self.clientstack.stop() + self.closeAllKVStores() + self.mode = None if isinstance(self.poolManager, TxnPoolManager): - self.ledgerManager.setLedgerState(0, LedgerState.not_synced) - self.ledgerManager.setLedgerState(1, LedgerState.not_synced) + self.ledgerManager.setLedgerState(POOL_LEDGER_ID, + LedgerState.not_synced) + self.ledgerManager.setLedgerState(DOMAIN_LEDGER_ID, + LedgerState.not_synced) + + def closeAllKVStores(self): + # Clear leveldb lock files + logger.info("{} closing level dbs".format(self), extra={"cli": False}) + for ledgerId in self.ledgerManager.ledgerRegistry: + state = self.getState(ledgerId) + if state: + state.close() + if self.seqNoDB: + self.seqNoDB.close() def reset(self): logger.info("{} reseting...".format(self), extra={"cli": False}) @@ -593,9 +643,6 @@ async def prod(self, limit: int=None) -> int: :param limit: the number of items to be serviced in this attempt :return: total number of messages serviced by this node """ - if self.isGoing(): - self.nodestack.serviceLifecycle() - self.clientstack.serviceClientStack() c = 0 if self.status is not Status.stopped: c += await self.serviceReplicas(limit) @@ -606,6 +653,9 @@ async def prod(self, limit: int=None) -> int: c += self.monitor._serviceActions() c += await self.serviceElector() self.nodestack.flushOutBoxes() + if self.isGoing(): + self.nodestack.serviceLifecycle() + self.clientstack.serviceClientStack() return c async def serviceReplicas(self, limit) -> int: @@ -677,8 +727,14 @@ def onConnsChanged(self, joined: Set[str], left: Set[str]): else: self.status = Status.starting self.elector.nodeCount = self.connectedNodeCount - viewChangeStarted = self.startViewChangeIfPrimaryWentOffline(left) - if not viewChangeStarted and self.isReady(): + + if self.master_primary in joined: + self.lost_primary_at = None + if self.master_primary in left: + logger.debug('{} lost connection to primary of master'.format(self)) + self.lost_master_primary() + + if self.isReady(): self.checkInstances() # TODO: Should we only send election messages when lagged or # otherwise too? @@ -694,17 +750,22 @@ def onConnsChanged(self, joined: Set[str], left: Set[str]): logger.debug("{} communicating view number {} to {}" .format(self, self.viewNo-1, joinedNode)) rid = self.nodestack.getRemote(joinedNode).uid - self.send(InstanceChange(self.viewNo), rid) + self.send( + self._create_instance_change_msg(self.viewNo, 0), + rid) # Send ledger status whether ready (connected to enough nodes) or not - for joinedNode in joined: - self.sendPoolLedgerStatus(joinedNode) - # Send the domain ledger status only when it has discovered enough - # peers otherwise very few peers will know that this node is lagging - # behind and it will not receive sufficient consistency proofs to - # verify the exact state of the ledger. - if self.mode in (Mode.discovered, Mode.participating): - self.sendDomainLedgerStatus(joinedNode) + for n in joined: + self.send_ledger_status_to_newly_connected_node(n) + + def send_ledger_status_to_newly_connected_node(self, node_name): + self.sendPoolLedgerStatus(node_name) + # Send the domain ledger status only when it has discovered enough + # peers otherwise very few peers will know that this node is lagging + # behind and it will not receive sufficient consistency proofs to + # verify the exact state of the ledger. + if self.mode in (Mode.discovered, Mode.participating): + self.sendDomainLedgerStatus(node_name) def newNodeJoined(self, txn): self.setF() @@ -782,22 +843,44 @@ def adjustReplicas(self): return newReplicas + def _dispatch_stashed_msg(self, msg, frm): + if isinstance(msg, ElectionType): + self.sendToElector(msg, frm) + return True + elif isinstance(msg, ThreePhaseType): + self.sendToReplica(msg, frm) + return True + else: + return False + def processStashedMsgsForReplica(self, instId: int): if instId not in self.msgsForFutureReplicas: return i = 0 while self.msgsForFutureReplicas[instId]: msg, frm = self.msgsForFutureReplicas[instId].popleft() - if isinstance(msg, ElectionType): - self.sendToElector(msg, frm) - elif isinstance(msg, ThreePhaseType): - self.sendToReplica(msg, frm) - else: - self.discard(msg, reason="Unknown message type for replica id" - .format(instId), logMethod=logger.warn) + if not self._dispatch_stashed_msg(msg, frm): + self.discard(msg, reason="Unknown message type for replica id " + "{}".format(instId), + logMethod=logger.warn) + i += 1 logger.debug("{} processed {} stashed msgs for replica {}". format(self, i, instId)) + def processStashedMsgsForView(self, view_no: int): + if view_no not in self.msgsForFutureViews: + return + i = 0 + while self.msgsForFutureViews[view_no]: + msg, frm = self.msgsForFutureViews[view_no].popleft() + if not self._dispatch_stashed_msg(msg, frm): + self.discard(msg, reason="Unknown message type for view no " + "{}".format(view_no), + logMethod=logger.warn) + i += 1 + logger.debug("{} processed {} stashed msgs for view no {}". + format(self, i, view_no)) + def decidePrimaries(self): """ Choose the primary replica for each protocol instance in the system @@ -894,9 +977,14 @@ async def serviceReplicaOutBox(self, limit: int=None) -> int: self.processOrdered(msg) else: logger.debug("{} stashing {} since mode is {} and {}". - format(self, msg, self.mode, - recvd)) + format(self, msg, self.mode, recvd)) self.stashedOrderedReqs.append(msg) + elif isinstance(msg, Reject): + reqKey = (msg.identifier, msg.reqId) + reject = Reject(*reqKey, + self.reasonForClientFromException(msg.reason)) + self.transmitToClient(reject, self.requestSender[reqKey]) + self.doneProcessingReq(*reqKey) elif isinstance(msg, Exception): self.processEscalatedException(msg) else: @@ -969,9 +1057,17 @@ def primaryReplicaNo(self) -> Optional[int]: :return: index of the primary """ - for idx, replica in enumerate(self.replicas): - if replica.isPrimary: - return idx + if self._primary_replica_no is None: + for idx, replica in enumerate(self.replicas): + if replica.isPrimary: + self._primary_replica_no = idx + return idx + return self._primary_replica_no + + @property + def master_primary(self) -> Optional[str]: + if self.replicas[0].primaryName: + return self.replicas[0].getNodeName(self.replicas[0].primaryName) return None def msgHasAcceptableInstId(self, msg, frm) -> bool: @@ -982,7 +1078,7 @@ def msgHasAcceptableInstId(self, msg, frm) -> bool: :param msg: the node message to validate :return: """ - instId = getattr(msg, "instId", None) + instId = getattr(msg, f.INST_ID.nm, None) if instId is None or not isinstance(instId, int) or instId < 0: return False if instId >= len(self.msgsToReplicas): @@ -994,39 +1090,26 @@ def msgHasAcceptableInstId(self, msg, frm) -> bool: return False return True - def msgHasAcceptableViewNo(self, msg) -> bool: + def msgHasAcceptableViewNo(self, msg, frm) -> bool: """ Return true if the view no of message corresponds to the current view - no, or a view no in the past that the replicas know of or a view no in - the future - + no or a view no in the future :param msg: the node message to validate :return: """ - viewNo = getattr(msg, "viewNo", None) + viewNo = getattr(msg, f.VIEW_NO.nm, None) if viewNo is None or not isinstance(viewNo, int) or viewNo < 0: return False - corrects = [] - for r in self.replicas: - if not r.primaryNames: - # The replica and thus this node does not know any viewNos - corrects.append(True) - continue - if viewNo in r.primaryNames.keys(): - # Replica has seen primary with this view no - corrects.append(True) - elif viewNo > max(r.primaryNames.keys()): - # msg for a future view no - corrects.append(True) - else: - # Replica has not seen any primary for this `viewNo` and its - # less than the current `viewNo` - corrects.append(False) - r = all(corrects) - if not r: + if viewNo < self.viewNo: self.discard(msg, "un-acceptable viewNo {}" - .format(viewNo), logMethod=logger.debug) - return r + .format(viewNo), logMethod=logger.info) + elif viewNo > self.viewNo: + if viewNo not in self.msgsForFutureViews: + self.msgsForFutureViews[viewNo] = deque() + self.msgsForFutureViews[viewNo].append((msg, frm)) + else: + return True + return False def sendToReplica(self, msg, frm): """ @@ -1036,7 +1119,7 @@ def sendToReplica(self, msg, frm): :param frm: the name of the node which sent this `msg` """ if self.msgHasAcceptableInstId(msg, frm) and \ - self.msgHasAcceptableViewNo(msg): + self.msgHasAcceptableViewNo(msg, frm): self.msgsToReplicas[msg.instId].append((msg, frm)) def sendToElector(self, msg, frm): @@ -1047,7 +1130,7 @@ def sendToElector(self, msg, frm): :param frm: the name of the node which sent this `msg` """ if self.msgHasAcceptableInstId(msg, frm) and \ - self.msgHasAcceptableViewNo(msg): + self.msgHasAcceptableViewNo(msg, frm): logger.debug("{} sending message to elector: {}". format(self, (msg, frm))) self.msgsToElector.append((msg, frm)) @@ -1118,6 +1201,7 @@ def unpackNodeMsg(self, msg, frm) -> None: if isinstance(msg, Batch): logger.debug("{} processing a batch {}".format(self, msg)) for m in msg.messages: + m = self.nodestack.deserializeMsg(m) self.handleOneNodeMsg((m, frm)) else: self.postToNodeInBox(msg, frm) @@ -1129,7 +1213,7 @@ def postToNodeInBox(self, msg, frm): :param msg: a node message :param frm: the name of the node that sent this `msg` """ - logger.debug("{} appending to nodeinxbox {}".format(self, msg)) + logger.debug("{} appending to nodeInbox {}".format(self, msg)) self.nodeInBox.append((msg, frm)) async def processNodeInBox(self): @@ -1168,7 +1252,7 @@ def handleInvalidClientMsg(self, ex, wrappedMsg): msg, frm = wrappedMsg exc = ex.__cause__ if ex.__cause__ else ex friendly = friendlyEx(ex) - reason = "client request invalid: {}".format(friendly) + reason = self.reasonForClientFromException(ex) if isinstance(msg, Request): msg = msg.__getstate__() identifier = msg.get(f.IDENTIFIER.nm) @@ -1195,10 +1279,10 @@ def validateClientMsg(self, wrappedMsg): if all(attr in msg.keys() for attr in [OPERATION, f.IDENTIFIER.nm, f.REQ_ID.nm]): - self.checkValidOperation(msg[f.IDENTIFIER.nm], - msg[f.REQ_ID.nm], - msg[OPERATION]) - cls = Request + self.doStaticValidation(msg[f.IDENTIFIER.nm], + msg[f.REQ_ID.nm], + msg[OPERATION]) + cls = self._client_request_class elif OP_FIELD_NAME in msg: op = msg.pop(OP_FIELD_NAME) cls = TaggedTuples.get(op, None) @@ -1211,6 +1295,10 @@ def validateClientMsg(self, wrappedMsg): msg.get(f.REQ_ID.nm)) try: cMsg = cls(**msg) + except TypeError as ex: + raise InvalidClientRequest(msg.get(f.IDENTIFIER.nm), + msg.get(f.REQ_ID.nm), + str(ex)) except Exception as ex: raise InvalidClientRequest(msg.get(f.IDENTIFIER.nm), msg.get(f.REQ_ID.nm)) from ex @@ -1239,6 +1327,14 @@ def unpackClientMsg(self, msg, frm): """ if isinstance(msg, Batch): for m in msg.messages: + # This check is done since Client uses NodeStack (which can + # send and receive BATCH) to talk to nodes but Node uses + # ClientStack (which cannot send or receive BATCH). + # TODO: The solution is to have both kind of stacks be able to + # parse BATCH messages + if m in (ZStack.pingMessage, ZStack.pongMessage): + continue + m = self.clientstack.deserializeMsg(m) self.handleOneClientMsg((m, frm)) else: self.postToClientInBox(msg, frm) @@ -1270,60 +1366,137 @@ async def processClientInBox(self): except InvalidClientMessageException as ex: self.handleInvalidClientMsg(ex, m) - def postPoolLedgerCaughtUp(self): + def postPoolLedgerCaughtUp(self, **kwargs): self.mode = Mode.discovered - self.ledgerManager.setLedgerCanSync(1, True) - # Node has discovered other nodes now sync up domain ledger - for nm in self.nodestack.connecteds: - self.sendDomainLedgerStatus(nm) - self.ledgerManager.processStashedLedgerStatuses(1) + # The node might have discovered more nodes, so see if schedule + # election if needed. if isinstance(self.poolManager, TxnPoolManager): self.checkInstances() # Initialising node id in case where node's information was not present # in pool ledger at the time of starting, happens when a non-genesis # node starts self.id + self.catchup_next_ledger_after_pool() + + def catchup_next_ledger_after_pool(self): + self.start_domain_ledger_sync() + + def start_domain_ledger_sync(self): + self.ledgerManager.setLedgerCanSync(DOMAIN_LEDGER_ID, True) + for nm in self.nodestack.connecteds: + self.sendDomainLedgerStatus(nm) + self.ledgerManager.processStashedLedgerStatuses(DOMAIN_LEDGER_ID) - def postDomainLedgerCaughtUp(self): + def preDomainLedgerCatchUp(self): + """ + Ledger got out of sync. Setting node's state accordingly + :return: + """ + self.mode = Mode.syncing + + def postDomainLedgerCaughtUp(self, **kwargs): """ Process any stashed ordered requests and set the mode to `participating` :return: """ - self.processStashedOrderedReqs() - self.mode = Mode.participating - # self.sync3PhaseState() - self.checkInstances() + pass - def postTxnFromCatchupAddedToLedger(self, ledgerType: int, txn: Any): - if ledgerType == 0: - self.poolManager.onPoolMembershipChange(txn) - if ledgerType == 1: - if txn.get(TXN_TYPE) == NYM: - self.addNewRole(txn) + def postTxnFromCatchupAddedToLedger(self, ledgerId: int, txn: Any): self.reqsFromCatchupReplies.add((txn.get(f.IDENTIFIER.nm), txn.get(f.REQ_ID.nm))) + rh = self.postRecvTxnFromCatchup(ledgerId, txn) + if rh: + rh.updateState([txn], isCommitted=True) + state = self.getState(ledgerId) + state.commit(rootHash=state.headHash) + self.updateSeqNoMap([txn]) + + def postRecvTxnFromCatchup(self, ledgerId: int, txn: Any): + rh = None + if ledgerId == POOL_LEDGER_ID: + self.poolManager.onPoolMembershipChange(txn) + rh = self.poolManager.reqHandler + if ledgerId == DOMAIN_LEDGER_ID: + self.post_txn_from_catchup_added_to_domain_ledger(txn) + rh = self.reqHandler + return rh + + def allLedgersCaughtUp(self): + self.mode = Mode.participating + self.processStashedOrderedReqs() + # TODO: next line not needed + self.checkInstances() + + def getLedger(self, ledgerId): + return self.ledgerManager.getLedgerInfoByType(ledgerId).ledger + + def getState(self, ledgerId): + return self.states.get(ledgerId) + + def post_txn_from_catchup_added_to_domain_ledger(self, txn): + if txn.get(TXN_TYPE) == NYM: + self.addNewRole(txn) + def sendPoolLedgerStatus(self, nodeName): - self.sendLedgerStatus(nodeName, 0) + self.sendLedgerStatus(nodeName, POOL_LEDGER_ID) def sendDomainLedgerStatus(self, nodeName): - self.sendLedgerStatus(nodeName, 1) + self.sendLedgerStatus(nodeName, DOMAIN_LEDGER_ID) - def getLedgerStatus(self, ledgerType: int): - if ledgerType == 0: + def getLedgerStatus(self, ledgerId: int): + if ledgerId == POOL_LEDGER_ID: return self.poolLedgerStatus - if ledgerType == 1: + if ledgerId == DOMAIN_LEDGER_ID: return self.domainLedgerStatus - def sendLedgerStatus(self, nodeName: str, ledgerType: int): - ledgerStatus = self.getLedgerStatus(ledgerType) + def sendLedgerStatus(self, nodeName: str, ledgerId: int): + ledgerStatus = self.getLedgerStatus(ledgerId) if ledgerStatus: rid = self.nodestack.getRemote(nodeName).uid self.send(ledgerStatus, rid) else: logger.debug("{} not sending ledger {} status to {} as it is null" - .format(self, ledgerType, nodeName)) + .format(self, ledgerId, nodeName)) + + def doStaticValidation(self, identifier, reqId, operation): + if TXN_TYPE not in operation: + raise InvalidClientRequest(identifier, reqId) + + if operation.get(TXN_TYPE) in POOL_TXN_TYPES: + self.poolManager.doStaticValidation(identifier, reqId, operation) + + if self.opVerifiers: + try: + for v in self.opVerifiers: + v.verify(operation) + except Exception as ex: + raise InvalidClientRequest(identifier, reqId) from ex + + def doDynamicValidation(self, request: Request): + """ + State based validation + """ + if self.ledgerIdForRequest(request) == POOL_LEDGER_ID: + self.poolManager.doDynamicValidation(request) + else: + self.domainDynamicValidation(request) + + def applyReq(self, request: Request): + """ + Apply request to appropriate ledger and state + """ + if self.ledgerIdForRequest(request) == POOL_LEDGER_ID: + return self.poolManager.applyReq(request) + else: + return self.domainRequestApplication(request) + + def domainDynamicValidation(self, request: Request): + self.reqHandler.validate(request, self.config) + + def domainRequestApplication(self, request: Request): + return self.reqHandler.apply(request) def processRequest(self, request: Request, frm: str): """ @@ -1353,18 +1526,14 @@ def processRequest(self, request: Request, frm: str): # TODO: What if the reply was a REQNACK? Its not gonna be found in the # replies. - typ = request.operation.get(TXN_TYPE) - if typ in POOL_TXN_TYPES: - reply = self.poolManager.getReplyFor(request) - else: - reply = self.getReplyFor(request) - + ledgerId = self.ledgerIdForRequest(request) + ledger = self.getLedger(ledgerId) + reply = self.getReplyFromLedger(ledger, request) if reply: logger.debug("{} returning REPLY from already processed " "REQUEST: {}".format(self, request)) self.transmitToClient(reply, frm) else: - self.checkRequestAuthorized(request) if not self.isProcessingReq(*request.key): self.startedProcessingReq(*request.key, frm) # If not already got the propagate request(PROPAGATE) for the @@ -1373,7 +1542,7 @@ def processRequest(self, request: Request, frm: str): self.transmitToClient(RequestAck(*request.key), frm) # noinspection PyUnusedLocal - async def processPropagate(self, msg: Propagate, frm): + def processPropagate(self, msg: Propagate, frm): """ Process one propagateRequest sent to this node asynchronously @@ -1387,7 +1556,7 @@ async def processPropagate(self, msg: Propagate, frm): logger.debug("Node {} received propagated request: {}". format(self.name, msg)) reqDict = msg.request - request = Request(**reqDict) + request = SafeRequest(**reqDict) clientName = msg.senderClient @@ -1422,35 +1591,39 @@ def processOrdered(self, ordered: Ordered, retryNo: int = 0): :return: True if successful, None otherwise """ - instId, viewNo, identifier, reqId, ppTime = tuple(ordered) + instId, viewNo, reqIdrs, ppSeqNo, ppTime, ledgerId, stateRoot, txnRoot \ + = tuple(ordered) - self.monitor.requestOrdered(identifier, - reqId, + self.monitor.requestOrdered(reqIdrs, instId, byMaster=(instId == self.instances.masterId)) # Only the request ordered by master protocol instance are executed by # the client if instId == self.instances.masterId: - key = (identifier, reqId) - if key in self.requests: - req = self.requests[key].request - self.executeRequest(ppTime, req) - logger.debug("{} executed client request {} {}". - format(self.name, identifier, reqId)) - # If the client request hasn't reached the node but corresponding - # PROPAGATE, PRE-PREPARE, PREPARE and COMMIT request did, - # then retry 3 times + reqs = [self.requests[i, r].request for (i, r) in reqIdrs + if (i, r) in self.requests] + if len(reqs) == len(reqIdrs): + logger.debug("{} executing Ordered batch {} of {} requests". + format(self.name, ppSeqNo, len(reqIdrs))) + self.executeBatch(ppSeqNo, ppTime, reqs, ledgerId, stateRoot, + txnRoot) + # If the client request hasn't reached the node but corresponding + # PROPAGATE, PRE-PREPARE, PREPARE and COMMIT request did, + # then retry 3 times elif retryNo < 3: retryNo += 1 asyncio.sleep(random.randint(2, 4)) self.processOrdered(ordered, retryNo) - logger.debug("{} retrying executing client request {} {}". - format(self.name, identifier, reqId)) + logger.debug('{} retrying executing ordered client requests'. + format(self.name)) + else: + logger.warning('{} not retrying processing Ordered any more {} ' + 'times'.format(self, retryNo)) return True else: - logger.trace("{} got ordered request from backup replica". - format(self)) + logger.trace("{} got ordered requests from backup replica {}". + format(self, instId)) def processEscalatedException(self, ex): """ @@ -1475,19 +1648,20 @@ def processInstanceChange(self, instChg: InstanceChange, frm: str) -> None: if not isinstance(instChg.viewNo, int): self.discard(instChg, "field viewNo has incorrect type: {}". format(type(instChg.viewNo))) - elif instChg.viewNo < self.viewNo: + elif instChg.viewNo <= self.viewNo: self.discard(instChg, "Received instance change request with view no {} " - "which is less than its view no {}". + "which is not more than its view no {}". format(instChg.viewNo, self.viewNo), logger.debug) else: # Record instance changes for views but send instance change # only when found master to be degraded. if quorum of view changes # found then change view even if master not degraded if not self.instanceChanges.hasInstChngFrom(instChg.viewNo, frm): - self.instanceChanges.addVote(instChg.viewNo, frm) + self.instanceChanges.addVote(instChg, frm) + if self.monitor.isMasterDegraded(): - logger.debug( + logger.info( "{} found master degraded after receiving instance change " "message from {}".format(self, frm)) self.sendInstanceChange(instChg.viewNo) @@ -1495,13 +1669,18 @@ def processInstanceChange(self, instChg: InstanceChange, frm: str) -> None: logger.debug( "{} received instance change message {} but did not " "find the master to be slow".format(self, instChg)) - if self.canViewChange(instChg.viewNo): - logger.debug("{} initiating a view change with view " - "no {}".format(self, self.viewNo)) - self.startViewChange(instChg.viewNo) - else: + + if not self.do_view_change_if_possible(instChg.viewNo): logger.trace("{} cannot initiate a view change".format(self)) + def do_view_change_if_possible(self, view_no): + if self.canViewChange(view_no): + logger.info("{} initiating a view change to {} from {}". + format(self, view_no, self.viewNo)) + self.startViewChange(view_no) + return True + return False + def checkPerformance(self): """ Check if master instance is slow and send an instance change request. @@ -1518,6 +1697,9 @@ def checkPerformance(self): self.sendNodeRequestSpike() if self.monitor.isMasterDegraded(): self.sendInstanceChange(self.viewNo+1) + logger.debug('{} sent view change performance degraded ' + 'of master instance'.format(self)) + self.do_view_change_if_possible(self.viewNo+1) return False else: logger.debug("{}'s master has higher performance than backups". @@ -1544,11 +1726,16 @@ def sendNodeRequestSpike(self): self.name ) - def sendInstanceChange(self, viewNo: int): + def _create_instance_change_msg(self, view_no, suspicion_code): + return InstanceChange(view_no, suspicion_code, + [r.lastOrderedPPSeqNo for r in self.replicas]) + + def sendInstanceChange(self, view_no: int, + suspicion=Suspicions.PRIMARY_DEGRADED): """ Broadcast an instance change request to all the remaining nodes - :param viewNo: the view number when the instance change is requested + :param view_no: the view number when the instance change is requested """ # If not found any sent instance change messages in last @@ -1558,13 +1745,14 @@ def sendInstanceChange(self, viewNo: int): canSendInsChange, cooldown = self.insChngThrottler.acquire() if canSendInsChange: - logger.info("{} master has lower performance than backups. " - "Sending an instance change with viewNo {}". - format(self, viewNo)) + logger.info("{} sending an instance change with view_no {} since " + "{}". + format(self, view_no, suspicion.reason)) logger.info("{} metrics for monitor: {}". format(self, self.monitor.prettymetrics)) - self.send(InstanceChange(viewNo)) - self.instanceChanges.addVote(viewNo, self.name) + msg = self._create_instance_change_msg(view_no, suspicion.code) + self.send(msg) + self.instanceChanges.addVote(msg, self.name) else: logger.debug("{} cannot send instance change sooner then {} seconds" .format(self, cooldown)) @@ -1582,11 +1770,16 @@ def quorum(self) -> int: """ return (2 * self.f) + 1 - def primaryFound(self): + def primary_found(self): # If the node has primary replica of master instance self.monitor.hasMasterPrimary = self.primaryReplicaNo == 0 + self.process_reqs_stashed_for_primary() + + @property + def all_instances_have_primary(self): + return all(r.primaryName is not None for r in self.replicas) - def canViewChange(self, proposedViewNo: int) -> bool: + def canViewChange(self, proposedViewNo: int) -> (bool, str): """ Return whether there's quorum for view change for the proposed view number and its view is less than or equal to the proposed view @@ -1594,25 +1787,34 @@ def canViewChange(self, proposedViewNo: int) -> bool: return self.instanceChanges.hasQuorum(proposedViewNo, self.f) and \ self.viewNo < proposedViewNo + def propose_view_change(self): + # Sends instance change message when primary has been + # disconnected for long enough + if self.lost_primary_at and \ + time.perf_counter() - self.lost_primary_at \ + >= self.config.ToleratePrimaryDisconnection: + view_no = self.viewNo + 1 + self.sendInstanceChange(view_no, + Suspicions.PRIMARY_DISCONNECTED) + logger.debug('{} sent view change since was disconnected ' + 'from primary for too long'.format(self)) + self.do_view_change_if_possible(view_no) + # TODO: consider moving this to pool manager - def startViewChangeIfPrimaryWentOffline(self, nodesGoingDown): + def lost_master_primary(self): """ - Starts view change if there are primaries among the nodes which have - gone down. - - :param nodesGoingDown: the nodes which have gone down + Schedule an primary connection check which in turn can send a view + change message :return: whether view change started """ - for node in nodesGoingDown: - for instId, replica in enumerate(self.replicas): - leftOne = '{}:{}'.format(node, instId) - if replica.primaryName == leftOne: - logger.debug("Primary {} is offline, " - "{} starting view change" - .format(leftOne, self.name)) - self.startViewChange(self.viewNo + 1) - return True - return False + self.lost_primary_at = time.perf_counter() + + self.propose_view_change() + + logger.debug('{} scheduling a view change in {} sec'. + format(self, self.config.ToleratePrimaryDisconnection)) + self._schedule(self.propose_view_change, + self.config.ToleratePrimaryDisconnection) # TODO: consider moving this to pool manager def startViewChange(self, proposedViewNo: int): @@ -1626,6 +1828,7 @@ def startViewChange(self, proposedViewNo: int): format(self)) self.monitor.reset() + self.processStashedMsgsForView(proposedViewNo) # Now communicate the view change to the elector which will # contest primary elections across protocol all instances self.elector.viewChanged(self.viewNo) @@ -1650,7 +1853,6 @@ def verifySignature(self, msg): req = msg if not isinstance(req, Mapping): - # req = msg.__getstate__() req = msg.as_dict identifier = self.authNr(req).authenticate(req) @@ -1665,100 +1867,108 @@ def authNr(self, req): def isSignatureVerificationNeeded(self, msg: Any): return True - def checkValidOperation(self, clientId, reqId, operation): - if operation.get(TXN_TYPE) in POOL_TXN_TYPES: - if not self.poolManager.checkValidOperation(operation): - raise InvalidClientRequest(clientId, reqId) - - if self.opVerifiers: - try: - for v in self.opVerifiers: - v.verify(operation) - except Exception as ex: - raise InvalidClientRequest(clientId, reqId) from ex - - def checkRequestAuthorized(self, request): - """ - Subclasses can implement this method to throw an - UnauthorizedClientRequest if the request is not authorized. + def ppSeqNoForTxnSeqNo(self, ledgerId, seqNo): + for ppSeqNo, (lid, txnSeqNo) in reversed(self.batchToSeqNos.items()): + if lid == ledgerId and txnSeqNo == seqNo: + return ppSeqNo + return -1 - If a request makes it this far, the signature has been verified - to match the identifier. - """ - if request.operation.get(TXN_TYPE) in POOL_TXN_TYPES: - return self.poolManager.checkRequestAuthorized(request) - if request.operation.get(TXN_TYPE) == NYM: - origin = request.identifier - error = None - if not self.secondaryStorage.isSteward(origin): - error = "Only Steward is allowed to do this transactions" - if request.operation.get(ROLE) == STEWARD: - error = self.authErrorWhileAddingSteward(request) - if error: - raise UnauthorizedClientRequest(request.identifier, - request.reqId, - error) - - def executeRequest(self, ppTime: float, req: Request) -> None: + def executeBatch(self, ppSeqNo: int, ppTime: float, reqs: List[Request], + ledgerId, stateRoot, txnRoot) -> None: """ Execute the REQUEST sent to this Node :param viewNo: the view number (See glossary) :param ppTime: the time at which PRE-PREPARE was sent - :param req: the client REQUEST - """ + :param reqs: list of client REQUESTs + """ + committedTxns = self.requestExecuter[ledgerId](ppTime, reqs, stateRoot, + txnRoot) + if committedTxns: + lastTxnSeqNo = committedTxns[-1][F.seqNo.name] + self.batchToSeqNos[ppSeqNo] = (ledgerId, lastTxnSeqNo) + logger.debug('{} storing ppSeqno {} for ledger {} seqNo {}'. + format(self, ppSeqNo, ledgerId, lastTxnSeqNo)) + if len(self.batchToSeqNos) > self.config.ProcessedBatchMapsToKeep: + x = self.batchToSeqNos.popitem(last=False) + logger.debug('{} popped {} from batch to txn seqNo map'. + format(self, x)) + + def updateSeqNoMap(self, committedTxns): + self.seqNoDB.addBatch((txn[f.IDENTIFIER.nm], txn[f.REQ_ID.nm], + txn[F.seqNo.name]) for txn in committedTxns) + + def commitAndSendReplies(self, reqHandler, ppTime, reqs: List[Request], + stateRoot, txnRoot) -> List: + committedTxns = reqHandler.commit(len(reqs), stateRoot, txnRoot) + self.updateSeqNoMap(committedTxns) + committedTxns = txnsWithMerkleInfo(reqHandler.ledger, + committedTxns) + self.sendRepliesToClients( + map(self.update_txn_with_extra_data, committedTxns), + ppTime) + return committedTxns + + def executeDomainTxns(self, ppTime, reqs: List[Request], stateRoot, + txnRoot) -> List: + committedTxns = self.commitAndSendReplies(self.reqHandler, ppTime, reqs, + stateRoot, txnRoot) + for txn in committedTxns: + if txn[TXN_TYPE] == NYM: + self.addNewRole(txn) + return committedTxns - self.requestExecuter[req.operation.get(TXN_TYPE)](ppTime, req) + def onBatchCreated(self, ledgerId, stateRoot): + """ + A batch of requests has been created and has been applied but + committed to ledger and state. + :param ledgerId: + :param stateRoot: state root after the batch creation + :return: + """ + if ledgerId == POOL_LEDGER_ID: + if isinstance(self.poolManager, TxnPoolManager): + self.poolManager.reqHandler.onBatchCreated(stateRoot) + elif ledgerId == DOMAIN_LEDGER_ID: + self.reqHandler.onBatchCreated(stateRoot) + else: + logger.debug('{} did not know how to handle for ledger {}'. + format(self, ledgerId)) - # TODO: Find a better name for the function - def doCustomAction(self, ppTime, req): - reply = self.generateReply(ppTime, req) - merkleProof = self.appendResultToLedger(reply.result) - reply.result.update(merkleProof) - self.sendReplyToClient(reply, req.key) - if reply.result.get(TXN_TYPE) == NYM: - self.addNewRole(reply.result) + def onBatchRejected(self, ledgerId, stateRoot=None): + """ + A batch of requests has been rejected, if stateRoot is None, reject + the current batch. + :param ledgerId: + :param stateRoot: state root after the batch was created + :return: + """ + if ledgerId == POOL_LEDGER_ID: + if isinstance(self.poolManager, TxnPoolManager): + self.poolManager.reqHandler.onBatchRejected(stateRoot) + elif ledgerId == DOMAIN_LEDGER_ID: + self.reqHandler.onBatchRejected(stateRoot) + else: + logger.debug('{} did not know how to handle for ledger {}'. + format(self, ledgerId)) - @staticmethod - def ledgerTypeForTxn(txnType: str): - return 0 if txnType in POOL_TXN_TYPES else 1 + @classmethod + def ledgerId(cls, txnType: str): + return POOL_LEDGER_ID if txnType in POOL_TXN_TYPES else DOMAIN_LEDGER_ID - def appendResultToLedger(self, data): - ledgerType = self.ledgerTypeForTxn(data[TXN_TYPE]) - return self.ledgerManager.appendToLedger(ledgerType, data) + def sendRepliesToClients(self, committedTxns, ppTime): + for txn in committedTxns: + # TODO: Send txn and state proof to the client + txn[TXN_TIME] = ppTime + self.sendReplyToClient(Reply(txn), (txn[f.IDENTIFIER.nm], + txn[f.REQ_ID.nm])) def sendReplyToClient(self, reply, reqKey): if self.isProcessingReq(*reqKey): + logger.debug('{} sending reply for {} to client'.format(self, reqKey)) self.transmitToClient(reply, self.requestSender[reqKey]) self.doneProcessingReq(*reqKey) - @staticmethod - def genTxnId(identifier, reqId): - return sha256("{}{}".format(identifier, reqId).encode()).hexdigest() - - def generateReply(self, ppTime: float, req: Request) -> Reply: - """ - Return a new clientReply created using the viewNo, request and the - computed txnId of the request - - :param ppTime: the time at which PRE-PREPARE was sent - :param req: the REQUEST - :return: a Reply generated from the request - """ - logger.debug("{} generating reply for {}".format(self, req)) - txnId = self.genTxnId(req.identifier, req.reqId) - result = { - f.IDENTIFIER.nm: req.identifier, - f.REQ_ID.nm: req.reqId, - TXN_ID: txnId, - TXN_TIME: int(ppTime) - } - result.update(req.operation) - for processor in self.reqProcessors: - result.update(processor.process(req)) - - return Reply(result) - def addNewRole(self, txn): """ Adds a new client or steward to this node based on transaction type. @@ -1772,54 +1982,52 @@ def addNewRole(self, txn): if identifier not in self.clientAuthNr.clients: role = txn.get(ROLE) if role not in (STEWARD, None): - logger.error("Role if present must be {}".format(Roles.STEWARD.name)) + logger.error("Role if present must be {}". + format(Roles.STEWARD.name)) return - self.clientAuthNr.addClient(identifier, verkey=v.verkey, - role=role) - - def initDomainLedger(self): - # If the domain ledger file is not present initialize it by copying - # from genesis transactions - if not self.hasFile(self.config.domainTransactionsFile): - defaultTxnFile = os.path.join(self.basedirpath, - self.config.domainTransactionsFile) - if os.path.isfile(defaultTxnFile): - shutil.copy(defaultTxnFile, self.dataLocation) + self.clientAuthNr.addIdr(identifier, + verkey=v.verkey, + role=role) + + @staticmethod + def initStateFromLedger(state: State, ledger: Ledger, reqHandler): + # If the trie is empty then initialize it by applying + # txns from ledger + if state.isEmpty: + txns = [_ for _ in ledger.getAllTxn().values()] + reqHandler.updateState(txns, isCommitted=True) + state.commit(rootHash=state.headHash) + + def initDomainState(self): + self.initStateFromLedger(self.states[DOMAIN_LEDGER_ID], + self.domainLedger, self.reqHandler) def addGenesisNyms(self): for _, txn in self.domainLedger.getAllTxn().items(): if txn.get(TXN_TYPE) == NYM: self.addNewRole(txn) - def authErrorWhileAddingSteward(self, request): - origin = request.identifier - if not self.secondaryStorage.isSteward(origin): - return "{} is not a steward so cannot add a new steward". \ - format(origin) - if self.stewardThresholdExceeded(): - return "New stewards cannot be added by other stewards as "\ - "there are already {} stewards in the system".format( - self.config.stewardThreshold) - - def stewardThresholdExceeded(self) -> bool: - """We allow at most `stewardThreshold` number of stewards to be added - by other stewards""" - return self.secondaryStorage.countStewards() > \ - self.config.stewardThreshold - def defaultAuthNr(self): - return SimpleAuthNr() - - def getReplyFor(self, request): - result = self.secondaryStorage.getReply(request.identifier, - request.reqId) - return Reply(result) if result else None + state = self.getState(DOMAIN_LEDGER_ID) + return SimpleAuthNr(state=state) def processStashedOrderedReqs(self): i = 0 while self.stashedOrderedReqs: msg = self.stashedOrderedReqs.popleft() + if msg.ppSeqNo <= self.ledgerManager.lastCaughtUpPpSeqNo: + logger.debug('{} ignoring stashed ordered msg {} since ledger ' + 'manager has lastCaughtUpPpSeqNo as {}'. + format(self, msg, + self.ledgerManager.lastCaughtUpPpSeqNo)) + continue if not self.gotInCatchupReplies(msg): + if msg.instId == 0: + logger.debug('{} applying stashed Ordered msg {}'. + format(self, msg)) + for reqKey in msg.reqIdr: + req = self.requests[reqKey].finalised + self.applyReq(req) self.processOrdered(msg) i += 1 logger.debug("{} processed {} stashed ordered requests".format(self, i)) @@ -1829,8 +2037,8 @@ def processStashedOrderedReqs(self): return i def gotInCatchupReplies(self, msg): - key = (getattr(msg, f.IDENTIFIER.nm), getattr(msg, f.REQ_ID.nm)) - return key in self.reqsFromCatchupReplies + reqIdr = getattr(msg, f.REQ_IDR.nm) + return set(reqIdr).intersection(self.reqsFromCatchupReplies) def sync3PhaseState(self): for replica in self.replicas: @@ -1845,6 +2053,12 @@ def ensureKeysAreSetup(self): if not areKeysSetup(name, baseDir, self.config): raise REx(REx.reason.format(name) + self.keygenScript) + @staticmethod + def reasonForClientFromException(ex: Exception): + friendly = friendlyEx(ex) + reason = "client request invalid: {}".format(friendly) + return reason + def reportSuspiciousNodeEx(self, ex: SuspiciousNode): """ Report suspicion on a node on the basis of an exception @@ -1874,15 +2088,22 @@ def reportSuspiciousNode(self, # reason=InvalidSignature.reason, # code=InvalidSignature.code) - if code in self.suspicions: - # TODO: Reconsider tolerating some suspicions, and if you tolerate, - # why are they suspicions? - if code not in (Suspicions.DUPLICATE_PPR_SENT, - Suspicions.DUPLICATE_PR_SENT, - Suspicions.DUPLICATE_CM_SENT): - self.blacklistNode(nodeName, - reason=self.suspicions[code], - code=code) + # TODO: Consider blacklisting nodes again. + # if code in self.suspicions: + # self.blacklistNode(nodeName, + # reason=self.suspicions[code], + # code=code) + + if code in (s.code for s in (Suspicions.PPR_DIGEST_WRONG, + Suspicions.PPR_REJECT_WRONG, + Suspicions.PPR_TXN_WRONG, + Suspicions.PPR_STATE_WRONG)): + self.sendInstanceChange(self.viewNo + 1, Suspicions.get_by_code(code)) + logger.info('{} sent instance change since suspicion code {}' + .format(self, code)) + + if not self.do_view_change_if_possible(self.viewNo + 1): + logger.trace("{} cannot initiate a view change".format(self)) if offendingMsg: self.discard(offendingMsg, reason, logger.warning) @@ -1960,13 +2181,33 @@ def send(self, msg: Any, *rids: Iterable[int], signer: Signer = None): .format(self, msg, recipientsNum, remoteNames)) self.nodestack.send(msg, *rids, signer=signer) - @staticmethod - def getReplyFromLedger(ledger, request): - txn = ledger.get(identifier=request.identifier, reqId=request.reqId) + def getReplyFromLedger(self, ledger, request): + # DoS attack vector, client requesting already processed request id + # results in iterating over ledger (or its subset) + seqNo = self.seqNoDB.get(request.identifier, request.reqId) + if seqNo: + txn = ledger.getBySeqNo(int(seqNo)) + else: + txn = ledger.get(identifier=request.identifier, reqId=request.reqId) if txn: txn.update(ledger.merkleInfo(txn.get(F.seqNo.name))) + txn = self.update_txn_with_extra_data(txn) return Reply(txn) + def update_txn_with_extra_data(self, txn): + """ + All the data of the transaction might not be stored in ledger so the + extra data that is omitted from ledger needs to be fetched from the + appropriate data store + :param txn: + :return: + """ + # All the data of any transaction is stored in the ledger + return txn + + def transform_txn_for_ledger(self, txn): + return self.reqHandler.transform_txn_for_ledger(txn) + def __enter__(self): return self diff --git a/plenum/server/pool_manager.py b/plenum/server/pool_manager.py index 42902f675d..d67d7a0d76 100644 --- a/plenum/server/pool_manager.py +++ b/plenum/server/pool_manager.py @@ -1,27 +1,31 @@ +import ipaddress + +import os +import base58 from typing import Dict, Tuple from functools import lru_cache from copy import deepcopy -from ledger.util import F -from plenum.common.txn_util import updateGenesisPoolTxnFile +from typing import Dict, Tuple, List +from plenum.common.constants import TXN_TYPE, NODE, TARGET_NYM, DATA, ALIAS, \ + NODE_IP, NODE_PORT, CLIENT_IP, CLIENT_PORT, VERKEY, SERVICES, \ + VALIDATOR, CLIENT_STACK_SUFFIX, POOL_LEDGER_ID, DOMAIN_LEDGER_ID from plenum.common.exceptions import UnsupportedOperation, \ - UnauthorizedClientRequest - + InvalidClientRequest +from plenum.common.request import Request from plenum.common.stack_manager import TxnStackManager +from plenum.common.types import NodeDetail +from plenum.persistence.storage import initKeyValueStorage +from plenum.persistence.util import txnsWithMerkleInfo +from plenum.server.pool_req_handler import PoolRequestHandler +from plenum.server.suspicion_codes import Suspicions +from state.pruning_state import PruningState +from stp_core.common.log import getlogger from stp_core.network.auth_mode import AuthMode from stp_core.network.exceptions import RemoteNotFound from stp_core.types import HA -from plenum.common.types import f -from plenum.common.constants import TXN_TYPE, NODE, TARGET_NYM, DATA, ALIAS, \ - POOL_TXN_TYPES, NODE_IP, NODE_PORT, CLIENT_IP, CLIENT_PORT, VERKEY, SERVICES, \ - VALIDATOR, CLIENT_STACK_SUFFIX -from stp_core.common.log import getlogger - -from plenum.common.types import NodeDetail - - logger = getlogger() @@ -48,9 +52,7 @@ def __init__(self, nodeRegistry=None, ha=None, cliname=None, cliha=None): if not nodeRegistry: self.poolManager = TxnPoolManager(self, ha=ha, cliname=cliname, cliha=cliha) - for types in POOL_TXN_TYPES: - self.requestExecuter[types] = \ - self.poolManager.executePoolTxnRequest + self.requestExecuter[POOL_LEDGER_ID] = self.poolManager.executePoolTxnBatch else: self.poolManager = RegistryPoolManager(self.name, self.basedirpath, nodeRegistry, ha, cliname, @@ -65,9 +67,36 @@ def __init__(self, node, ha=None, cliname=None, cliha=None): self.basedirpath = node.basedirpath self._ledger = None TxnStackManager.__init__(self, self.name, self.basedirpath, isNode=True) + self.state = self.loadState() + self.reqHandler = self.getPoolReqHandler() + self.initPoolState() self.nstack, self.cstack, self.nodeReg, self.cliNodeReg = \ self.getStackParamsAndNodeReg(self.name, self.basedirpath, ha=ha, cliname=cliname, cliha=cliha) + self._dataFieldsValidators = ( + (NODE_IP, self._isIpAddressValid), + (CLIENT_IP, self._isIpAddressValid), + (NODE_PORT, self._isPortValid), + (CLIENT_PORT, self._isPortValid), + ) + + def __repr__(self): + return self.node.name + + def getPoolReqHandler(self): + return PoolRequestHandler(self.ledger, self.state, + self.node.states[DOMAIN_LEDGER_ID]) + + def loadState(self): + return PruningState( + initKeyValueStorage( + self.config.poolStateStorage, + self.node.dataLocation, + self.config.poolStateDbName) + ) + + def initPoolState(self): + self.node.initStateFromLedger(self.state, self.ledger, self.reqHandler) @property def hasLedger(self): @@ -112,26 +141,21 @@ def getStackParamsAndNodeReg(self, name, basedirpath, nodeRegistry=None, return nstack, cstack, nodeReg, cliNodeReg - def executePoolTxnRequest(self, ppTime, req): + def executePoolTxnBatch(self, ppTime, reqs, stateRoot, txnRoot) -> List: """ Execute a transaction that involves consensus pool management, like adding a node, client or a steward. :param ppTime: PrePrepare request time - :param req: request + :param reqs: request """ - reply = self.node.generateReply(ppTime, req) - op = req.operation - reply.result.update(op) - merkleProof = self.node.appendResultToLedger(reply.result) - txn = deepcopy(reply.result) - txn[F.seqNo.name] = merkleProof[F.seqNo.name] - self.onPoolMembershipChange(txn) - reply.result.update(merkleProof) - self.node.sendReplyToClient(reply, req.key) - - def getReplyFor(self, request): - return self.node.getReplyFromLedger(self.ledger, request) + committedTxns = self.reqHandler.commit(len(reqs), stateRoot, txnRoot) + self.node.updateSeqNoMap(committedTxns) + for txn in committedTxns: + self.onPoolMembershipChange(deepcopy(txn)) + committedTxns = txnsWithMerkleInfo(self.reqHandler.ledger, committedTxns) + self.node.sendRepliesToClients(committedTxns, ppTime) + return committedTxns def onPoolMembershipChange(self, txn): if txn[TXN_TYPE] == NODE: @@ -164,9 +188,6 @@ def _updateNode(txn): _updateNode(txn) self.node.sendPoolInfoToClients(txn) - if self.config.UpdateGenesisPoolTxnFile: - updateGenesisPoolTxnFile(self.config.baseDir, - self.config.poolTransactionsFile, txn) def addNewNodeAndConnect(self, txn): nodeName = txn[DATA][ALIAS] @@ -177,6 +198,11 @@ def addNewNodeAndConnect(self, txn): self.connectNewRemote(txn, nodeName, self.node) self.node.newNodeJoined(txn) + def node_about_to_be_disconnected(self, nodeName): + if self.node.master_primary == nodeName: + self.node.sendInstanceChange(self.node.viewNo + 1, + Suspicions.PRIMARY_ABOUT_TO_BE_DISCONNECTED) + def nodeHaChanged(self, txn): nodeNym = txn[TARGET_NYM] nodeName = self.getNodeName(nodeNym) @@ -190,7 +216,7 @@ def nodeHaChanged(self, txn): if rid: self.node.nodestack.outBoxes.pop(rid, None) # self.node.sendPoolInfoToClients(txn) - self.node.startViewChangeIfPrimaryWentOffline([nodeName]) + self.node_about_to_be_disconnected(nodeName) def nodeKeysChanged(self, txn): # TODO: if the node whose keys are being changed is primary for any @@ -212,7 +238,7 @@ def nodeKeysChanged(self, txn): if rid: self.node.nodestack.outBoxes.pop(rid, None) # self.node.sendPoolInfoToClients(txn) - self.node.startViewChangeIfPrimaryWentOffline([nodeName]) + self.node_about_to_be_disconnected(nodeName) def nodeServicesChanged(self, txn): nodeNym = txn[TARGET_NYM] @@ -244,89 +270,34 @@ def nodeServicesChanged(self, txn): format(self, nodeName)) self.node.nodeLeft(txn) - self.node.startViewChangeIfPrimaryWentOffline([nodeName]) + self.node_about_to_be_disconnected(nodeName) def getNodeName(self, nym): # Assuming ALIAS does not change _, nodeTxn = self.getNodeInfoFromLedger(nym) return nodeTxn[DATA][ALIAS] - def checkValidOperation(self, operation): - checks = [] + def doStaticValidation(self, identifier, reqId, operation): if operation[TXN_TYPE] == NODE: - checks.append(DATA in operation and isinstance(operation[DATA], dict)) - return all(checks) - - def checkRequestAuthorized(self, request): - typ = request.operation.get(TXN_TYPE) - error = None - if typ == NODE: - nodeNym = request.operation.get(TARGET_NYM) - if self.nodeExistsInLedger(nodeNym): - error = self.authErrorWhileUpdatingNode(request) - else: - error = self.authErrorWhileAddingNode(request) - if error: - raise UnauthorizedClientRequest(request.identifier, request.reqId, - error) - - def authErrorWhileAddingNode(self, request): - origin = request.identifier - operation = request.operation - isSteward = self.node.secondaryStorage.isSteward(origin) - data = operation.get(DATA, {}) - invalidData = self._validateNodeData(data) - if invalidData: - return invalidData - - if not isSteward: - return "{} is not a steward so cannot add a new node".format(origin) - - for txn in self.ledger.getAllTxn().values(): - if txn[TXN_TYPE] == NODE: - if txn[f.IDENTIFIER.nm] == origin: - return "{} already has a node with name {}". \ - format(origin, txn[DATA][ALIAS]) - - if self.isNodeDataConflicting(data, operation.get(TARGET_NYM)): - return "existing data has conflicts with " \ - "request data {}".format(operation.get(DATA)) - - @lru_cache(maxsize=64) - def isStewardOfNode(self, stewardNym, nodeNym): - for txn in self.ledger.getAllTxn().values(): - if txn[TXN_TYPE] == NODE and \ - txn[TARGET_NYM] == nodeNym and \ - txn[f.IDENTIFIER.nm] == stewardNym: - return True - return False - - @staticmethod - def _validateNodeData(data): - if data.get(NODE_IP, "nodeip") == data.get(CLIENT_IP, "clientip") and \ - data.get(NODE_PORT, "nodeport") == data.get(CLIENT_PORT, "clientport"): - return "node and client ha can't be same" - - def authErrorWhileUpdatingNode(self, request): - origin = request.identifier - operation = request.operation - isSteward = self.node.secondaryStorage.isSteward(origin) - data = operation.get(DATA, {}) - invalidData = self._validateNodeData(data) - if invalidData: - return invalidData - if not isSteward: - return "{} is not a steward so cannot update a node".format(origin) - nodeNym = operation.get(TARGET_NYM) - if not self.isStewardOfNode(origin, nodeNym): - return "{} is not a steward of node {}".format(origin, nodeNym) - for txn in self.ledger.getAllTxn().values(): - if txn[TXN_TYPE] == NODE and nodeNym == txn[TARGET_NYM]: - if txn[DATA] == operation.get(DATA, {}): - return "node already has the same data as requested" - if self.isNodeDataConflicting(data, nodeNym): - return "existing data has conflicts with " \ - "request data {}".format(operation.get(DATA)) + if not (DATA in operation and isinstance(operation[DATA], dict)): + error = "'{}' is missed or not a dict".format(DATA) + raise InvalidClientRequest(identifier, reqId, error) + # VerKey must be base58 + if len(set(operation[TARGET_NYM]) - set(base58.alphabet)) != 0: + error = "'{}' is not a base58 string".format(TARGET_NYM) + raise InvalidClientRequest(identifier, reqId, error) + + data = operation[DATA] + for fn, validator in self._dataFieldsValidators: + if fn in data and not validator(data[fn]): + error = "'{}' ('{}') is invalid".format(fn, data[fn]) + raise InvalidClientRequest(identifier, reqId, error) + + def doDynamicValidation(self, request: Request): + self.reqHandler.validate(request) + + def applyReq(self, request: Request): + return self.reqHandler.apply(request) @property def merkleRootHash(self): @@ -340,35 +311,23 @@ def getNodeData(self, nym): _, nodeTxn = self.getNodeInfoFromLedger(nym) return nodeTxn[DATA] - def _checkAgainstOtherNodePoolTxns(self, data, existingNodeTxn): - otherNodeData = existingNodeTxn[DATA] - for (ip, port) in [(NODE_IP, NODE_PORT), - (CLIENT_IP, CLIENT_PORT)]: - if (otherNodeData.get(ip), otherNodeData.get(port)) == ( - data.get(ip), data.get(port)): - return True - - if otherNodeData.get(ALIAS) == data.get(ALIAS): - return True - - def _checkAgainstSameNodePoolTxns(self, data, existingNodeTxn): - sameNodeData = existingNodeTxn[DATA] - if sameNodeData.get(ALIAS) != data.get(ALIAS): - return True - - def isNodeDataConflicting(self, data, nodeNym=None): - for existingNodeTxn in [t for t in self.ledger.getAllTxn().values() - if t[TXN_TYPE] == NODE]: - if not nodeNym or nodeNym != existingNodeTxn[TARGET_NYM]: - conflictFound = self._checkAgainstOtherNodePoolTxns(data, existingNodeTxn) - if conflictFound: - return conflictFound - if nodeNym and nodeNym == existingNodeTxn[TARGET_NYM]: - conflictFound = self._checkAgainstSameNodePoolTxns(data, existingNodeTxn) - if conflictFound: - return conflictFound + @staticmethod + def _isIpAddressValid(ipAddress): + try: + ipaddress.ip_address(ipAddress) + except ValueError: + return False + else: + return ipAddress != '0.0.0.0' + + @staticmethod + def _isPortValid(port): + return isinstance(port, int) and 0 < port <= 65535 + class RegistryPoolManager(PoolManager): + # This is the old way of managing the pool nodes information and + # should be deprecated. def __init__(self, name, basedirpath, nodeRegistry, ha, cliname, cliha): self.nstack, self.cstack, self.nodeReg, self.cliNodeReg = \ diff --git a/plenum/server/pool_req_handler.py b/plenum/server/pool_req_handler.py new file mode 100644 index 0000000000..29adb7a587 --- /dev/null +++ b/plenum/server/pool_req_handler.py @@ -0,0 +1,196 @@ +import json +from functools import lru_cache + +from ledger.serializers.json_serializer import JsonSerializer +from plenum.common.constants import TXN_TYPE, NODE, TARGET_NYM, DATA, ALIAS, NODE_IP, NODE_PORT, CLIENT_IP, CLIENT_PORT, \ + SERVICES +from plenum.common.exceptions import UnauthorizedClientRequest +from plenum.common.ledger import Ledger +from plenum.common.request import Request +from plenum.common.txn_util import reqToTxn +from plenum.common.types import f +from plenum.persistence.util import txnsWithSeqNo +from plenum.server.domain_req_handler import DomainRequestHandler +from plenum.server.req_handler import RequestHandler +from state.state import State +from stp_core.common.log import getlogger + +logger = getlogger() + + +class PoolRequestHandler(RequestHandler): + + def __init__(self, ledger: Ledger, state: State, + domainState: State): + super().__init__(ledger, state) + self.domainState = domainState + self.stateSerializer = JsonSerializer() + + def validate(self, req: Request, config=None): + typ = req.operation.get(TXN_TYPE) + error = None + if typ == NODE: + nodeNym = req.operation.get(TARGET_NYM) + if self.getNodeData(nodeNym, isCommitted=False): + error = self.authErrorWhileUpdatingNode(req) + else: + error = self.authErrorWhileAddingNode(req) + if error: + raise UnauthorizedClientRequest(req.identifier, req.reqId, + error) + + def apply(self, req: Request): + typ = req.operation.get(TXN_TYPE) + if typ == NODE: + txn = reqToTxn(req) + (start, end), _ = self.ledger.appendTxns([txn]) + self.updateState(txnsWithSeqNo(start, end, [txn])) + return txn + else: + logger.debug('Cannot apply request of type {} to state'.format(typ)) + return None + + def updateState(self, txns, isCommitted=False): + for txn in txns: + nodeNym = txn.get(TARGET_NYM) + data = txn.get(DATA, {}) + existingData = self.getNodeData(nodeNym, isCommitted=isCommitted) + # Node data did not exist in state, so this is a new node txn, + # hence store the author of the txn (steward of node) + if not existingData: + existingData[f.IDENTIFIER.nm] = txn.get(f.IDENTIFIER.nm) + existingData.update(data) + self.updateNodeData(nodeNym, existingData) + + def authErrorWhileAddingNode(self, request): + origin = request.identifier + operation = request.operation + data = operation.get(DATA, {}) + error = self.dataErrorWhileValidating(data, skipKeys=False) + if error: + return error + + # SERVICES is required for add node and optional for update node txn + # it is the cause why the check is here + # this is not a good place for the check, + # should be moved in some other place + if SERVICES not in data: + return 'field {} is required for adding node'.format(SERVICES) + + isSteward = self.isSteward(origin, isCommitted=False) + if not isSteward: + return "{} is not a steward so cannot add a new node".format(origin) + if self.stewardHasNode(origin): + return "{} already has a node".format(origin) + if self.isNodeDataConflicting(operation.get(DATA, {})): + return "existing data has conflicts with " \ + "request data {}".format(operation.get(DATA)) + + def authErrorWhileUpdatingNode(self, request): + # Check if steward of the node is updating it and its data does not + # conflict with any existing node's data + origin = request.identifier + operation = request.operation + isSteward = self.isSteward(origin, isCommitted=False) + if not isSteward: + return "{} is not a steward so cannot update a node".format(origin) + + nodeNym = operation.get(TARGET_NYM) + if not self.isStewardOfNode(origin, nodeNym, isCommitted=False): + return "{} is not a steward of node {}".format(origin, nodeNym) + + data = operation.get(DATA, {}) + return self.dataErrorWhileValidatingUpdate(data, nodeNym) + + def getNodeData(self, nym, isCommitted: bool = True): + key = nym.encode() + data = self.state.get(key, isCommitted) + return json.loads(data.decode()) if data else {} + + def updateNodeData(self, nym, data): + key = nym.encode() + val = self.stateSerializer.serialize(data) + self.state.set(key, val) + + def isSteward(self, nym, isCommitted: bool = True): + return DomainRequestHandler.isSteward(self.domainState, nym, isCommitted) + + @lru_cache(maxsize=64) + def isStewardOfNode(self, stewardNym, nodeNym, isCommitted=True): + nodeData = self.getNodeData(nodeNym, isCommitted=isCommitted) + return nodeData and nodeData[f.IDENTIFIER.nm] == stewardNym + + def stewardHasNode(self, stewardNym) -> bool: + # Cannot use lru_cache since a steward might have a node in future and + # unfortunately lru_cache does not allow single entries to be cleared + # TODO: Modify lru_cache to clear certain entities + for nodeNym, nodeData in self.state.as_dict.items(): + nodeData = json.loads(nodeData.decode()) + if nodeData.get(f.IDENTIFIER.nm) == stewardNym: + return True + return False + + @staticmethod + def dataErrorWhileValidating(data, skipKeys): + reqKeys = {NODE_IP, NODE_PORT, CLIENT_IP, CLIENT_PORT, ALIAS} + if not skipKeys and not reqKeys.issubset(set(data.keys())): + return 'Missing some of {}'.format(reqKeys) + + nip = data.get(NODE_IP, 'nip') + np = data.get(NODE_PORT, 'np') + cip = data.get(CLIENT_IP, 'cip') + cp = data.get(CLIENT_PORT, 'cp') + if (nip, np) == (cip, cp): + return 'node and client ha cannot be same' + + def isNodeDataSame(self, nodeNym, newData, isCommitted=True): + nodeInfo = self.getNodeData(nodeNym, isCommitted=isCommitted) + nodeInfo.pop(f.IDENTIFIER.nm, None) + return nodeInfo == newData + + def isNodeDataConflicting(self, data, nodeNym=None): + # Check if node's ALIAS or IPs or ports conflicts with other nodes, + # also, the node is not allowed to change its alias. + + # Check ALIAS change + nodeData = {} + if nodeNym: + nodeData = self.getNodeData(nodeNym, isCommitted=False) + if nodeData.get(ALIAS) != data.get(ALIAS): + return True + else: + # Preparing node data for check coming next + nodeData.pop(f.IDENTIFIER.nm, None) + nodeData.pop(SERVICES, None) + nodeData.update(data) + + for otherNode, otherNodeData in self.state.as_dict.items(): + otherNode = otherNode.decode() + otherNodeData = json.loads(otherNodeData.decode()) + otherNodeData.pop(f.IDENTIFIER.nm, None) + otherNodeData.pop(SERVICES, None) + if not nodeNym or otherNode != nodeNym: + # The node's ip, port and alias shuuld be unique + bag = set() + for d in (nodeData, otherNodeData): + bag.add(d.get(ALIAS)) + bag.add((d.get(NODE_IP), d.get(NODE_PORT))) + bag.add((d.get(CLIENT_IP), d.get(CLIENT_PORT))) + + list(map(lambda x: bag.remove(x) if x in bag else None, + (None, (None, None)))) + + if (not nodeData and len(bag) != 3) or (nodeData and len(bag) != 6): + return True + + def dataErrorWhileValidatingUpdate(self, data, nodeNym): + error = self.dataErrorWhileValidating(data, skipKeys=True) + if error: + return error + + if self.isNodeDataSame(nodeNym, data, isCommitted=False): + return "node already has the same data as requested" + + if self.isNodeDataConflicting(data, nodeNym): + return "existing data has conflicts with " \ + "request data {}".format(data) \ No newline at end of file diff --git a/plenum/server/primary_elector.py b/plenum/server/primary_elector.py index 516b166f22..b4538134e3 100644 --- a/plenum/server/primary_elector.py +++ b/plenum/server/primary_elector.py @@ -58,8 +58,6 @@ def __init__(self, node): (Reelection, self.processReelection)] self.inBoxRouter = Router(*routerArgs) - self.pendingMsgsForViews = {} # Dict[int, deque] - # Keeps track of duplicate messages received. Used to blacklist if # nodes send more than 1 duplicate messages. Useful to blacklist # nodes. This number `1` is configurable. The reason 1 duplicate @@ -67,6 +65,13 @@ def __init__(self, node): # to an already lagged node, an extra NOMINATE or PRIMARY might be sent self.duplicateMsgs = {} # Dict[Tuple, int] + # Need to keep track of who was primary for the master protocol + # instance for previous view, this variable only matters between + # elections, the elector will set it before doing triggering new + # election and will reset it after primary is decided for master + # instance + self.previous_master_primary = None + def __repr__(self): return "{}".format(self.name) @@ -77,6 +82,10 @@ def hasPrimaryReplica(self) -> bool: """ return any([r.isPrimary for r in self.replicas]) + @property + def was_master_primary_in_prev_view(self): + return self.previous_master_primary == self.name + def setDefaults(self, instId: int): """ Set the default values for elections for a replica. @@ -98,17 +107,6 @@ def prepareReplicaForElection(self, replica: 'replica.Replica'): if instId not in self.nominations: self.setDefaults(instId) - def pendMsgForLaterView(self, msg: Any, viewNo: int): - """ - Add a message to the pending queue for a later view. - - :param msg: the message to pend - :param viewNo: the viewNo this message is meant for. - """ - if viewNo not in self.pendingMsgsForViews: - self.pendingMsgsForViews[viewNo] = deque() - self.pendingMsgsForViews[viewNo].append(msg) - def filterMsgs(self, wrappedMsgs: deque) -> deque: """ Filters messages by view number so that only the messages that have the @@ -124,11 +122,6 @@ def filterMsgs(self, wrappedMsgs: deque) -> deque: reqViewNo = getattr(msg, f.VIEW_NO.nm) if reqViewNo == self.viewNo: filtered.append(wrappedMsg) - elif reqViewNo > self.viewNo: - logger.debug( - "{}'s elector queueing {} since it is for a later view" - .format(self.name, wrappedMsg)) - self.pendMsgForLaterView((msg, sender), reqViewNo) else: self.discard(wrappedMsg, "its view no {} is less than the elector's {}" @@ -155,7 +148,7 @@ def didReplicaDeclarePrimary(self, instId: int): :param instId: the instance id (used to identify the replica on this node) """ return instId in self.primaryDeclarations and \ - self.replicas[instId].name in self.primaryDeclarations[instId] + self.replicas[instId].name in self.primaryDeclarations[instId] async def serviceQueues(self, limit=None): """ @@ -221,10 +214,8 @@ def nominateRandomReplica(self): .format(self)) return - undecideds = [i for i, r in enumerate(self.replicas) - if r.isPrimary is None] - if undecideds: - chosen = random.choice(undecideds) + undecideds, chosen = self._get_undecided_inst_id() + if chosen is not None: logger.debug("{} does not have a primary, " "replicas {} are undecided, " "choosing {} to nominate". @@ -245,15 +236,28 @@ def nominateReplica(self, instId): """ replica = self.replicas[instId] if not self.didReplicaNominate(instId): - self.nominations[instId][replica.name] = replica.name + self.nominations[instId][replica.name] = (replica.name, + replica.lastOrderedPPSeqNo) logger.info("{} nominating itself for instance {}". format(replica, instId), extra={"cli": "PLAIN", "tags": ["node-nomination"]}) - self.sendNomination(replica.name, instId, self.viewNo) + self.sendNomination(replica.name, instId, self.viewNo, replica.lastOrderedPPSeqNo) else: logger.debug( "{} already nominated, so hanging back".format(replica)) + def _get_undecided_inst_id(self): + undecideds = [i for i, r in enumerate(self.replicas) + if r.isPrimary is None] + if 0 in undecideds and self.was_master_primary_in_prev_view: + logger.debug('{} was primary for master in previous view, ' + 'so will not nominate master replica'.format(self)) + undecideds.remove(0) + + if undecideds: + return undecideds, random.choice(undecideds) + return None, None + # noinspection PyAttributeOutsideInit def setElectionDefaults(self, instId): """ @@ -276,13 +280,21 @@ def processNominate(self, nom: Nomination, sender: str): format(self.name, nom)) instId = nom.instId replica = self.replicas[instId] + if instId == 0 and replica.getNodeName(nom.name) == self.previous_master_primary: + self.discard(nom, '{} got Nomination from {} for {} who was primary' + ' of master in previous view too'. + format(self, sender, nom.name), + logMethod=logger.warning) + return + sndrRep = replica.generateName(sender, nom.instId) if not self.didReplicaNominate(instId): if instId not in self.nominations: self.setDefaults(instId) - self.nominations[instId][replica.name] = nom.name - self.sendNomination(nom.name, nom.instId, nom.viewNo) + self.nominations[instId][replica.name] = (nom.name, nom.ordSeqNo) + self.sendNomination(nom.name, nom.instId, nom.viewNo, + nom.ordSeqNo) logger.debug("{} nominating {} for instance {}". format(replica, nom.name, nom.instId), extra={"cli": "PLAIN", "tags": ["node-nomination"]}) @@ -292,7 +304,7 @@ def processNominate(self, nom: Nomination, sender: str): # Nodes should not be able to vote more than once if sndrRep not in self.nominations[instId]: - self.nominations[instId][sndrRep] = nom.name + self.nominations[instId][sndrRep] = (nom.name, nom.ordSeqNo) logger.debug("{} attempting to decide primary based on nomination " "request: {} from {}".format(replica, nom, sndrRep)) self._schedule(partial(self.decidePrimary, instId)) @@ -312,7 +324,8 @@ def processNominate(self, nom: Nomination, sender: str): def processPrimary(self, prim: Primary, sender: str) -> None: """ Process a vote from a replica to select a particular replica as primary. - Once 2f + 1 primary declarations have been received, decide on a primary replica. + Once 2f + 1 primary declarations have been received, decide on a + primary replica. :param prim: a vote :param sender: the name of the node from which this message was sent @@ -321,13 +334,21 @@ def processPrimary(self, prim: Primary, sender: str) -> None: .format(self.name, sender, prim)) instId = prim.instId replica = self.replicas[instId] + if instId == 0 and replica.getNodeName(prim.name) == self.previous_master_primary: + self.discard(prim, '{} got Primary from {} for {} who was primary' + ' of master in previous view too'. + format(self, sender, prim.name), + logMethod=logger.warning) + return + sndrRep = replica.generateName(sender, prim.instId) # Nodes should not be able to declare `Primary` winner more than more if instId not in self.primaryDeclarations: self.setDefaults(instId) if sndrRep not in self.primaryDeclarations[instId]: - self.primaryDeclarations[instId][sndrRep] = prim.name + self.primaryDeclarations[instId][sndrRep] = (prim.name, + prim.ordSeqNo) # If got more than 2f+1 primary declarations then in a position to # decide whether it is the primary or not `2f + 1` declarations @@ -349,7 +370,7 @@ def processPrimary(self, prim: Primary, sender: str) -> None: if self.hasPrimaryQuorum(instId): if replica.isPrimary is None: - primary = mostCommonElement( + primary, seqNo = mostCommonElement( self.primaryDeclarations[instId].values()) logger.display("{} selected primary {} for instance {} " "(view {})".format(replica, primary, @@ -363,14 +384,17 @@ def processPrimary(self, prim: Primary, sender: str) -> None: # If the maximum primary declarations are for this node # then make it primary - replica.primaryName = primary + replica.primaryChanged(primary, seqNo) + + if instId == 0: + self.previous_master_primary = None # If this replica has nominated itself and since the # election is over, reset the flag if self.replicaNominatedForItself == instId: self.replicaNominatedForItself = None - self.node.primaryFound() + self.node.primary_found() self.scheduleElection() else: @@ -427,7 +451,8 @@ def processReelection(self, reelection: Reelection, sender: str): return if sndrRep not in self.reElectionProposals[instId]: - self.reElectionProposals[instId][sndrRep] = reelection.tieAmong + self.reElectionProposals[instId][sndrRep] = [tuple(_) for _ in + reelection.tieAmong] # Check if got reelection messages from at least 2f + 1 nodes (1 # more than max faulty nodes). Necessary because some nodes may @@ -445,10 +470,10 @@ def processReelection(self, reelection: Reelection, sender: str): self.setElectionDefaults(instId) - if not self.hasPrimaryReplica: + if not self.hasPrimaryReplica and not self.was_master_primary_in_prev_view: # There was a tie among this and some other node(s), so do a # random wait - if replica.name in tieAmong: + if replica.name in [_[0] for _ in tieAmong]: # Try to nominate self after a random delay but dont block # until that delay and because a nominate from another # node might be sent @@ -459,7 +484,8 @@ def processReelection(self, reelection: Reelection, sender: str): self.nominateReplica(instId) else: logger.debug("{} does not have re-election quorum yet. " - "Got only {}".format(replica, len(self.reElectionProposals[instId]))) + "Got only {}".format(replica, + len(self.reElectionProposals[instId]))) else: self.discard(reelection, "already got re-election proposal from {}". @@ -551,13 +577,13 @@ def decidePrimary(self, instId: int): # In case of one clear winner if len(primaryCandidates) == 1: - primaryName, votes = primaryCandidates.pop() + (primaryName, seqNo), votes = primaryCandidates.pop() if self.hasNominationsFromAll(instId) or ( self.scheduledPrimaryDecisions[instId] is not None and self.hasPrimaryDecisionTimerExpired(instId)): logger.debug("{} has nominations from all so sending " "primary".format(replica)) - self.sendPrimary(instId, primaryName) + self.sendPrimary(instId, primaryName, seqNo) else: votesNeeded = math.ceil((self.nodeCount + 1) / 2.0) if votes >= votesNeeded or ( @@ -567,7 +593,7 @@ def decidePrimary(self, instId: int): "all but has {} votes for {} so sending " "primary". format(replica, votes, primaryName)) - self.sendPrimary(instId, primaryName) + self.sendPrimary(instId, primaryName, seqNo) return else: logger.debug("{} has {} nominations for {}, but " @@ -598,7 +624,8 @@ def decidePrimary(self, instId: int): logger.debug("{} has not got nomination quorum yet". format(replica)) - def sendNomination(self, name: str, instId: int, viewNo: int): + def sendNomination(self, name: str, instId: int, viewNo: int, + lastOrderedSeqNo: int): """ Broadcast a nomination message with the given parameters. @@ -606,9 +633,10 @@ def sendNomination(self, name: str, instId: int, viewNo: int): :param instId: instance id :param viewNo: view number """ - self.send(Nomination(name, instId, viewNo)) + self.send(Nomination(name, instId, viewNo, lastOrderedSeqNo)) - def sendPrimary(self, instId: int, primaryName: str): + def sendPrimary(self, instId: int, primaryName: str, + lastOrderedSeqNo: int): """ Declare a primary and broadcast the message. @@ -616,11 +644,14 @@ def sendPrimary(self, instId: int, primaryName: str): :param primaryName: the name of the primary replica """ replica = self.replicas[instId] - self.primaryDeclarations[instId][replica.name] = primaryName + self.primaryDeclarations[instId][replica.name] = (primaryName, + lastOrderedSeqNo) self.scheduledPrimaryDecisions[instId] = None logger.debug("{} declaring primary as: {} on the basis of {}". - format(replica, primaryName, self.nominations[instId])) - self.send(Primary(primaryName, instId, self.viewNo)) + format(replica, primaryName, + self.nominations[instId])) + self.send(Primary(primaryName, instId, self.viewNo, + lastOrderedSeqNo)) def sendReelection(self, instId: int, primaryCandidates: Sequence[str] = None) -> None: @@ -702,19 +733,14 @@ def viewChanged(self, viewNo: int): :param viewNo: the new view number. """ if viewNo > self.viewNo: + self.previous_master_primary = self.node.master_primary + self.viewNo = viewNo for replica in self.replicas: replica.primaryName = None - # Remove all pending messages which came for earlier views - oldViews = [] - for v in self.pendingMsgsForViews: - if v < viewNo: - oldViews.append(v) - - for v in oldViews: - self.pendingMsgsForViews.pop(v) + self.node._primary_replica_no = None # Reset to defaults values for different data structures as new # elections would begin @@ -722,16 +748,6 @@ def viewChanged(self, viewNo: int): self.setDefaults(r.instId) self.replicaNominatedForItself = None - # Schedule execution of any pending msgs from the new view - if viewNo in self.pendingMsgsForViews: - logger.debug("Pending election messages found for " - "view {}".format(viewNo)) - pendingMsgs = self.pendingMsgsForViews.pop(viewNo) - self.inBox.extendleft(pendingMsgs) - else: - logger.debug("{} found no pending election messages for " - "view {}".format(self.name, viewNo)) - self.nominateRandomReplica() else: logger.warning("Provided view no {} is not greater than the " @@ -747,19 +763,18 @@ def getElectionMsgsForInstance(self, instId: int) -> \ # If a primary for this instance has been selected then send a # primary declaration for the selected primary if replica.isPrimary is not None: - msgs.append(Primary(replica.primaryName, instId, self.viewNo)) + msgs.append(Primary(replica.primaryName, instId, self.viewNo, + replica.lastOrderedPPSeqNo)) else: # If a primary for this instance has not been selected then send # nomination and primary declaration that this node made for the # instance with id `instId` if self.didReplicaNominate(instId): - msgs.append(Nomination(self.nominations[instId][ - replica.name], - instId, self.viewNo)) + nm, seqNo = self.nominations[instId][replica.name] + msgs.append(Nomination(nm, instId, self.viewNo, seqNo)) if self.didReplicaDeclarePrimary(instId): - msgs.append(Primary(self.primaryDeclarations[instId][replica.name], - instId, - self.viewNo)) + nm, seqNo = self.primaryDeclarations[instId][replica.name] + msgs.append(Primary(nm, instId, self.viewNo, seqNo)) return msgs def getElectionMsgsForLaggedNodes(self) -> \ diff --git a/plenum/server/propagator.py b/plenum/server/propagator.py index 2f83fe1962..cb614c486e 100644 --- a/plenum/server/propagator.py +++ b/plenum/server/propagator.py @@ -1,4 +1,7 @@ +from collections import OrderedDict +from collections import deque from typing import Dict, Tuple, Union +import weakref from plenum.common.types import Propagate from plenum.common.request import Request @@ -30,7 +33,7 @@ def isFinalised(self, f): return self.finalised -class Requests(Dict[Tuple[str, int], ReqState]): +class Requests(OrderedDict): """ Storing client request object corresponding to each client and its request id. Key of the dictionary is a Tuple2 containing identifier, @@ -82,13 +85,19 @@ def votes(self, req) -> int: votes = 0 return votes - def canForward(self, req: Request, requiredVotes: int) -> bool: + def canForward(self, req: Request, requiredVotes: int) -> (bool, str): """ Check whether the request specified is eligible to be forwarded to the protocol instances. """ state = self[req.key] - return not state.forwarded and state.isFinalised(requiredVotes) + if state.forwarded: + msg = 'already forwarded' + elif not state.isFinalised(requiredVotes): + msg = 'not finalised' + else: + msg = None + return not bool(msg), msg def hasPropagated(self, req: Request, sender: str) -> bool: """ @@ -109,6 +118,11 @@ def digest(self, reqKey: Tuple) -> str: class Propagator: def __init__(self): self.requests = Requests() + # If the node does not have any primary and at least one protocol + # instance is missing a primary then add the request in + # `reqs_stashed_for_primary`. Note that this does not prevent the + # request from being processed as its marked as finalised + self.reqs_stashed_for_primary = deque() # noinspection PyUnresolvedReferences def propagate(self, request: Request, clientName): @@ -122,8 +136,8 @@ def propagate(self, request: Request, clientName): else: self.requests.addPropagate(request, self.name) # Only propagate if the node is participating in the consensus - # process - # which happens when the node has completed the catchup process + # process which happens when the node has completed the + # catchup process. QUESTION: WHY? if self.isParticipating: propagate = self.createPropagate(request, clientName) logger.display("{} propagating {} request {} from client {}". @@ -151,7 +165,7 @@ def createPropagate(request: Union[Request, dict], identifier) -> Propagate: return Propagate(request, identifier) # noinspection PyUnresolvedReferences - def canForward(self, request: Request) -> bool: + def canForward(self, request: Request) -> (bool, str): """ Determine whether to forward client REQUESTs to replicas, based on the following logic: @@ -177,10 +191,16 @@ def forward(self, request: Request): :param request: the REQUEST to propagate """ key = request.key - logger.debug("{} forwarding client request {} to its replicas". - format(self, key)) - for repQueue in self.msgsToReplicas: - repQueue.append(self.requests[key].finalised.reqDigest) + fin_req = self.requests[key].finalised + if self.primaryReplicaNo is not None: + self.msgsToReplicas[self.primaryReplicaNo].append(fin_req) + logger.debug("{} forwarding client request {} to replica {}". + format(self, key, self.primaryReplicaNo)) + elif not self.all_instances_have_primary: + logger.debug('{} stashing request {} since at least one replica ' + 'lacks primary'.format(self, key)) + self.reqs_stashed_for_primary.append(fin_req) + self.monitor.requestUnOrdered(*key) self.requests.flagAsForwarded(request, len(self.msgsToReplicas)) @@ -204,11 +224,27 @@ def tryForwarding(self, request: Request): See the method `canForward` for the conditions to check before forwarding a request. """ - if self.canForward(request): + r, msg = self.canForward(request) + if r: # If haven't got the client request(REQUEST) for the corresponding # propagate request(PROPAGATE) but have enough propagate requests # to move ahead self.forward(request) else: - logger.trace("{} cannot yet forward request {} to its replicas". - format(self, request)) + logger.trace("{} not forwarding request {} to its replicas " + "since {}".format(self, request, msg)) + + def process_reqs_stashed_for_primary(self): + if self.reqs_stashed_for_primary: + if self.primaryReplicaNo is not None: + self.msgsToReplicas[self.primaryReplicaNo].extend( + self.reqs_stashed_for_primary) + logger.debug("{} forwarding stashed {} client requests to " + "replica {}". + format(self, len(self.reqs_stashed_for_primary), + self.primaryReplicaNo)) + elif not self.all_instances_have_primary: + return + # Either the stashed requests have been given to a primary or this + # node does not have a primary, so clear the queue + self.reqs_stashed_for_primary.clear() diff --git a/plenum/server/replica.py b/plenum/server/replica.py index f78e06b060..7cf10c0d2e 100644 --- a/plenum/server/replica.py +++ b/plenum/server/replica.py @@ -1,8 +1,10 @@ import time +from binascii import hexlify, unhexlify from collections import deque, OrderedDict from enum import IntEnum from enum import unique -from typing import Dict, Union +from operator import itemgetter +from typing import Dict, List, Union from typing import Optional, Any from typing import Set from typing import Tuple @@ -13,14 +15,16 @@ import plenum.server.node from plenum.common.config_util import getConfig -from plenum.common.exceptions import SuspiciousNode +from plenum.common.exceptions import SuspiciousNode, InvalidClientRequest, \ + InvalidClientMessageException, UnknownIdentifier from plenum.common.signing import serialize +from plenum.common.txn_util import reqToTxn from plenum.common.types import PrePrepare, \ Prepare, Commit, Ordered, ThreePhaseMsg, ThreePhaseKey, ThreePCState, \ - CheckpointState, Checkpoint -from plenum.common.request import ReqDigest -from plenum.common.util import updateNamedTuple + CheckpointState, Checkpoint, Reject, f, InstanceChange +from plenum.common.request import ReqDigest, Request from plenum.common.message_processor import MessageProcessor +from plenum.common.util import updateNamedTuple from stp_core.common.log import getlogger from plenum.server.has_action_queue import HasActionQueue from plenum.server.models import Commits, Prepares @@ -83,7 +87,7 @@ def __init__(self, node: 'plenum.server.node.Node', instId: int, self.config = getConfig() - routerArgs = [(ReqDigest, self._preProcessReqDigest)] + routerArgs = [(Request, self.readyFor3PC)] for r in [PrePrepare, Prepare, Commit]: routerArgs.append((r, self.processThreePhaseMsg)) @@ -134,12 +138,17 @@ def __init__(self, node: 'plenum.server.node.Node', instId: int, self.postElectionMsgs = deque() # PRE-PREPAREs that are waiting to be processed but do not have the - # corresponding request digest. Happens when replica has not been + # corresponding request finalised. Happens when replica has not been # forwarded the request by the node but is getting 3 phase messages. # The value is a list since a malicious entry might send PRE-PREPARE - # with a different digest and since we dont have the request finalised, - # we store all PRE-PPREPARES - self.prePreparesPendingReqDigest = {} # type: Dict[Tuple[str, int], List] + # with a different digest and since we dont have the request finalised + # yet, we store all PRE-PPREPARES + self.prePreparesPendingFinReqs = [] # type: List[Tuple[PrePrepare, str, Set[Tuple[str, int]]]] + + # PrePrepares waiting for previous PrePrepares, key being tuple of view + # number and pre-prepare sequence numbers and value being tuple of + # PrePrepare and sender + self.prePreparesPendingPrevPP = SortedDict(lambda k: k[1]) # PREPAREs that are stored by non primary replica for which it has not # got any PRE-PREPARE. Dictionary that stores a tuple of view no and @@ -158,14 +167,14 @@ def __init__(self, node: 'plenum.server.node.Node', instId: int, # which it has broadcasted to all other non primary replicas # Key of dictionary is a 2 element tuple with elements viewNo, # pre-prepare seqNo and value is a tuple of Request Digest and time - self.sentPrePrepares = {} - # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], float]] + self.sentPrePrepares = SortedDict(lambda k: k[1]) + # type: Dict[Tuple[int, int], PrePrepare] # Dictionary of received PRE-PREPAREs. Key of dictionary is a 2 # element tuple with elements viewNo, pre-prepare seqNo and value is - # a tuple of Request Digest and time - self.prePrepares = {} - # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], float]] + # a list of tuples of Request Keys and time + self.prePrepares = SortedDict(lambda k: k[1]) + # type: Dict[Tuple[int, int], PrePrepare] # Dictionary of received Prepare requests. Key of dictionary is a 2 # element tuple with elements viewNo, seqNo and value is a 2 element @@ -185,11 +194,7 @@ def __init__(self, node: 'plenum.server.node.Node', instId: int, # Dictionary to keep track of the which replica was primary during each # view. Key is the view no and value is the name of the primary # replica during that view - self.primaryNames = {} # type: Dict[int, str] - - # Holds msgs that are for later views - self.threePhaseMsgsForLaterView = deque() - # type: deque[(ThreePhaseMsg, str)] + self.primaryNames = OrderedDict() # type: OrderedDict[int, str] # Holds tuple of view no and prepare seq no of 3-phase messages it # received while it was not participating @@ -215,6 +220,42 @@ def __init__(self, node: 'plenum.server.node.Node', instId: int, self.lastPrePrepareSeqNo = self.h # type: int + # Queues used in PRE-PREPARE for each ledger, + self.requestQueues = {} # type: Dict[int, deque] + for ledgerId in self.node.ledgerManager.ledgerRegistry: + self.requestQueues[ledgerId] = deque() + + # Batches with key as ppSeqNo of batch and value as a tuple of number + # of txns and the time as batch was created/received and the state root + # hash for the batch + self.batches = OrderedDict() # type: OrderedDict[int, Tuple[int, float, bytes]] + + # TODO: Need to have a timer for each ledger + self.lastBatchCreated = time.perf_counter() + + self.lastOrderedPPSeqNo = 0 + + def txnRootHash(self, ledgerId, toHex=True): + if not self.isMaster: + return None + ledger = self.node.getLedger(ledgerId) + h = ledger.uncommittedRootHash + # If no uncommittedHash since this is the beginning of the tree + # or no transactions affecting the ledger were made after the + # last changes were committed + root = h if h else ledger.tree.root_hash + if toHex: + root = hexlify(root).decode() + return root + + def stateRootHash(self, ledgerId, toHex=True): + if not self.isMaster: + return None + root = self.node.getState(ledgerId).headHash + if toHex: + root = hexlify(root).decode() + return root + @property def h(self) -> int: return self._h @@ -274,15 +315,49 @@ def primaryName(self, value: Optional[str]) -> None: :param value: the value to set isPrimary to """ + self.primaryNames[self.viewNo] = value if not value == self._primaryName: self._primaryName = value - self.primaryNames[self.viewNo] = value logger.debug("{} setting primaryName for view no {} to: {}". format(self, self.viewNo, value)) - logger.debug("{}'s primaryNames for views are: {}". - format(self, self.primaryNames)) + if self.isMaster: + self.removeObsoletePpReqs() self._stateChanged() + def primaryChanged(self, primaryName, lastOrderedPPSeqNo): + if self.lastOrderedPPSeqNo < lastOrderedPPSeqNo: + self.lastOrderedPPSeqNo = lastOrderedPPSeqNo + self.primaryName = primaryName + if primaryName == self.name: + assert self.lastOrderedPPSeqNo >= lastOrderedPPSeqNo + self.lastPrePrepareSeqNo = self.lastOrderedPPSeqNo + else: + for lid in self.requestQueues: + self.requestQueues[lid].clear() + + def removeObsoletePpReqs(self): + # If replica was primary in previous view then remove every sent + # Pre-Prepare with less than f+1 Prepares. + viewNos = self.primaryNames.keys() + if len(viewNos) > 1: + viewNos = list(viewNos) + lastViewNo = viewNos[-2] + if self.primaryNames[lastViewNo] == self.name: + lastViewPPs = [pp for pp in self.sentPrePrepares.values() if + pp.viewNo == lastViewNo] + obs = set() + for pp in lastViewPPs: + if not self.prepares.hasEnoughVotes(pp, self.f): + obs.add((pp.viewNo, pp.ppSeqNo)) + + for key in sorted(list(obs), key=itemgetter(1), reverse=True): + ppReq = self.sentPrePrepares[key] + count, _, prevStateRoot = self.batches[key[1]] + self.batches.pop(key[1]) + self.revert(ppReq.ledgerId, prevStateRoot, count) + self.sentPrePrepares.pop(key) + self.prepares.pop(key, None) + def _stateChanged(self): """ A series of actions to be performed when the state of this replica @@ -292,8 +367,7 @@ def _stateChanged(self): """ self._unstashInBox() if self.isPrimary is not None: - # TODO handle suspicion exceptions here - self.process3PhaseReqsQueue() + # self.process3PhaseReqsQueue() # TODO handle suspicion exceptions here try: self.processPostElectionMsgs() @@ -313,7 +387,8 @@ def _unstashInBox(self): """ Append the inBoxStash to the right of the inBox. """ - self.inBox.extend(self.inBoxStash) + # The stashed values need to go in "front" of the inBox. + self.inBox.extendleft(self.inBoxStash) self.inBoxStash.clear() def __repr__(self): @@ -335,54 +410,6 @@ def viewNo(self): """ return self.node.viewNo - def isPrimaryInView(self, viewNo: int) -> Optional[bool]: - """ - Return whether a primary has been selected for this view number. - """ - return self.primaryNames[viewNo] == self.name - - def isMsgForLaterView(self, msg): - """ - Return whether this request's view number is greater than the current - view number of this replica. - """ - viewNo = getattr(msg, "viewNo", None) - return viewNo > self.viewNo - - def isMsgForCurrentView(self, msg): - """ - Return whether this request's view number is equal to the current view - number of this replica. - """ - viewNo = getattr(msg, "viewNo", None) - return viewNo == self.viewNo - - def isMsgForPrevView(self, msg): - """ - Return whether this request's view number is less than the current view - number of this replica. - """ - viewNo = getattr(msg, "viewNo", None) - return viewNo < self.viewNo - - def isPrimaryForMsg(self, msg) -> Optional[bool]: - """ - Return whether this replica is primary if the request's view number is - equal this replica's view number and primary has been selected for - the current view. - Return None otherwise. - - :param msg: message - """ - if self.isMsgForLaterView(msg): - self.discard(msg, - "Cannot get primary status for a request for a later " - "view {}. Request is {}".format(self.viewNo, msg), - logger.error) - else: - return self.isPrimary if self.isMsgForCurrentView(msg) \ - else self.isPrimaryInView(msg.viewNo) - def isMsgFromPrimary(self, msg, sender: str) -> bool: """ Return whether this message was from primary replica @@ -390,27 +417,96 @@ def isMsgFromPrimary(self, msg, sender: str) -> bool: :param sender: :return: """ - if self.isMsgForLaterView(msg): - logger.error("{} cannot get primary for a request for a later " - "view. Request is {}".format(self, msg)) - else: - return self.primaryName == sender if self.isMsgForCurrentView( - msg) else self.primaryNames[msg.viewNo] == sender + return self.primaryName == sender + + def trackBatches(self, pp: PrePrepare, prevStateRootHash): + # pp.discarded indicates the index from where the discarded requests + # starts hence the count of accepted requests, prevStateRoot is + # tracked to revert this PRE-PREPARE + logger.debug('{} tracking batch for {} with state root {}'. + format(self, pp, prevStateRootHash)) + self.batches[pp.ppSeqNo] = [pp.discarded, pp.ppTime, prevStateRootHash] + + def send3PCBatch(self): + r = 0 + for lid, q in self.requestQueues.items(): + if len(q) >= self.config.Max3PCBatchSize or ( + self.lastBatchCreated + + self.config.Max3PCBatchWait < + time.perf_counter() and len(q) > 0): + oldStateRootHash = self.stateRootHash(lid, toHex=False) + ppReq = self.create3PCBatch(lid) + self.sendPrePrepare(ppReq) + self.trackBatches(ppReq, oldStateRootHash) + r += 1 + + if r > 0: + self.lastBatchCreated = time.perf_counter() + return r - def _preProcessReqDigest(self, rd: ReqDigest) -> None: - """ - Process request digest if this replica is not a primary, otherwise stash - the message into the inBox. + @staticmethod + def batchDigest(reqs): + return sha256(b''.join([r.digest.encode() for r in reqs])).hexdigest() - :param rd: the client Request Digest - """ - if self.isPrimary is not None: - self.processReqDigest(rd) + def processReqDuringBatch(self, req: Request, validReqs: List, + inValidReqs: List, rejects: List): + try: + if self.isMaster: + self.node.doDynamicValidation(req) + self.node.applyReq(req) + except (InvalidClientMessageException, UnknownIdentifier) as ex: + logger.warning('{} encountered exception {} while processing {}, ' + 'will reject'.format(self, ex, req)) + rejects.append(Reject(req.identifier, req.reqId, ex)) + inValidReqs.append(req) else: - logger.debug("{} stashing request digest {} since it does not know " - "its primary status". - format(self, (rd.identifier, rd.reqId))) - self._stashInBox(rd) + validReqs.append(req) + + def create3PCBatch(self, ledgerId): + # TODO: If no valid requests then PRE-PREPARE should be sent but rejects + # should be tracked so they can be sent as part of next batch. + ppSeqNo = self.lastPrePrepareSeqNo + 1 + logger.info("{} creating batch {} for ledger {} with state root {}". + format(self, ppSeqNo, ledgerId, + self.stateRootHash(ledgerId, toHex=False))) + tm = time.time() * 1000 + validReqs = [] + inValidReqs = [] + rejects = [] + while len(validReqs)+len(inValidReqs) < self.config.Max3PCBatchSize \ + and self.requestQueues[ledgerId]: + req = self.requestQueues[ledgerId].popleft() + self.processReqDuringBatch(req, validReqs, inValidReqs, rejects) + + reqs = validReqs+inValidReqs + digest = self.batchDigest(reqs) + prePrepareReq = PrePrepare(self.instId, + self.viewNo, + ppSeqNo, + tm, + [(req.identifier, req.reqId) for req in reqs], + len(validReqs), + digest, + ledgerId, + self.stateRootHash(ledgerId), + self.txnRootHash(ledgerId) + ) + logger.debug('{} created a PRE-PREPARE with {} requests for ledger {}' + .format(self, len(validReqs), ledgerId)) + self.lastPrePrepareSeqNo = ppSeqNo + if self.isMaster: + self.outBox.extend(rejects) + self.node.onBatchCreated(ledgerId, + self.stateRootHash(ledgerId, toHex=False)) + return prePrepareReq + + def sendPrePrepare(self, ppReq: PrePrepare): + self.sentPrePrepares[ppReq.viewNo, ppReq.ppSeqNo] = ppReq + self.send(ppReq, TPCStat.PrePrepareSent) + + def readyFor3PC(self, request: Request): + cls = self.node.__class__ + self.requestQueues[cls.ledgerIdForRequest(request)].append(request) def serviceQueues(self, limit=None): """ @@ -420,7 +516,10 @@ def serviceQueues(self, limit=None): :return: the number of messages successfully processed """ # TODO should handle SuspiciousNode here - r = self.inBoxRouter.handleAllSync(self.inBox, limit) + r = self.dequeuePrePrepares() if self.node.isParticipating else 0 + r += self.inBoxRouter.handleAllSync(self.inBox, limit) + r += self.send3PCBatch() if (self.isPrimary and + self.node.isParticipating) else 0 r += self._serviceActions() return r # Messages that can be processed right now needs to be added back to the @@ -436,24 +535,6 @@ def processPostElectionMsgs(self): logger.debug("{} processing pended msg {}".format(self, msg)) self.dispatchThreePhaseMsg(*msg) - def process3PhaseReqsQueue(self): - """ - Process the 3 phase requests from the queue whose view number is equal - to the current view number of this replica. - """ - unprocessed = deque() - while self.threePhaseMsgsForLaterView: - request, sender = self.threePhaseMsgsForLaterView.popleft() - logger.debug("{} processing pended 3 phase request: {}" - .format(self, request)) - # If the request is for a later view dont try to process it but add - # it back to the queue. - if self.isMsgForLaterView(request): - unprocessed.append((request, sender)) - else: - self.processThreePhaseMsg(request, sender) - self.threePhaseMsgsForLaterView = unprocessed - @property def quorum(self) -> int: r""" @@ -486,19 +567,6 @@ def dispatchThreePhaseMsg(self, msg: ThreePhaseMsg, sender: str) -> Any: format(self, msg, msg.ppSeqNo, self.h, self.H)) self.stashOutsideWatermarks((msg, sender)) - def processReqDigest(self, rd: ReqDigest): - """ - Process a request digest. Works only if this replica has decided its - primary status. - - :param rd: the client request digest to process - """ - self.stats.inc(TPCStat.ReqDigestRcvd) - if self.isPrimary is False: - self.dequeuePrePrepare(rd.identifier, rd.reqId) - else: - self.doPrePrepare(rd) - def processThreePhaseMsg(self, msg: ThreePhaseMsg, sender: str): """ Process a 3-phase (pre-prepare, prepare and commit) request. @@ -509,18 +577,20 @@ def processThreePhaseMsg(self, msg: ThreePhaseMsg, sender: str): COMMIT :param sender: name of the node that sent this message """ - # Can only proceed further if it knows whether its primary or not - if self.isMsgForLaterView(msg): - self.threePhaseMsgsForLaterView.append((msg, sender)) - logger.debug("{} pended received 3 phase request for a later view: " - "{}".format(self, msg)) - else: - if self.isPrimary is None: - self.postElectionMsgs.append((msg, sender)) - logger.debug("Replica {} pended request {} from {}". - format(self, msg, sender)) - else: - self.dispatchThreePhaseMsg(msg, sender) + # If COMMIT or PREPARE corresponding to which a PRE-PREPARE is + # received then proceed otherwise only proceed further if primary + # is known + if msg.viewNo < self.viewNo: + self.discard(msg, + "its a previous view message", + logger.debug) + return + if self.isPrimary is None: + self.postElectionMsgs.append((msg, sender)) + logger.debug("Replica {} pended request {} from {}". + format(self, msg, sender)) + return + self.dispatchThreePhaseMsg(msg, sender) def processPrePrepare(self, pp: PrePrepare, sender: str): """ @@ -531,24 +601,36 @@ def processPrePrepare(self, pp: PrePrepare, sender: str): :param sender: name of the node that sent this message """ key = (pp.viewNo, pp.ppSeqNo) - logger.debug("{} Receiving PRE-PREPARE{} at {} from {}". - format(self, key, time.perf_counter(), sender)) + logger.debug("{} received PRE-PREPARE{} from {} at {}". + format(self, key, sender, time.perf_counter())) + pp = updateNamedTuple(pp, **{f.REQ_IDR.nm: [(i, r) + for i, r in pp.reqIdr]}) + oldStateRoot = self.stateRootHash(pp.ledgerId, toHex=False) if self.canProcessPrePrepare(pp, sender): + self.addToPrePrepares(pp) if not self.node.isParticipating: self.stashingWhileCatchingUp.add(key) - self.addToPrePrepares(pp) - logger.info("{} processed incoming PRE-PREPARE{}".format(self, key), - extra={"tags": ["processing"]}) + logger.debug('{} stashing PRE-PREPARE{}'.format(self, key)) + return + + if self.isMaster: + self.node.onBatchCreated(pp.ledgerId, + self.stateRootHash(pp.ledgerId, + toHex=False)) + self.trackBatches(pp, oldStateRoot) + logger.debug("{} processed incoming PRE-PREPARE{}".format(self, key), + extra={"tags": ["processing"]}) def tryPrepare(self, pp: PrePrepare): """ Try to send the Prepare message if the PrePrepare message is ready to be passed into the Prepare phase. """ - if self.canSendPrepare(pp): + rv, msg = self.canPrepare(pp) + if rv: self.doPrepare(pp) else: - logger.debug("{} cannot send PREPARE".format(self)) + logger.debug("{} cannot send PREPARE since {}".format(self, msg)) def processPrepare(self, prepare: Prepare, sender: str) -> None: """ @@ -567,7 +649,7 @@ def processPrepare(self, prepare: Prepare, sender: str) -> None: logger.debug) return try: - if self.isValidPrepare(prepare, sender): + if self.validatePrepare(prepare, sender): self.addToPrepares(prepare, sender) self.stats.inc(TPCStat.PrepareRcvd) logger.debug("{} processed incoming PREPARE {}". @@ -575,7 +657,7 @@ def processPrepare(self, prepare: Prepare, sender: str) -> None: else: # TODO let's have isValidPrepare throw an exception that gets # handled and possibly logged higher - logger.warning("{} cannot process incoming PREPARE". + logger.debug("{} cannot process incoming PREPARE". format(self)) except SuspiciousNode as ex: self.node.reportSuspiciousNodeEx(ex) @@ -588,7 +670,7 @@ def processCommit(self, commit: Commit, sender: str) -> None: :param commit: an incoming COMMIT message :param sender: name of the node that sent the COMMIT """ - logger.debug("{} received COMMIT {} from {}". + logger.debug("{} received COMMIT{} from {}". format(self, commit, sender)) if self.isPpSeqNoStable(commit.ppSeqNo): self.discard(commit, @@ -596,7 +678,7 @@ def processCommit(self, commit: Commit, sender: str) -> None: logger.debug) return - if self.isValidCommit(commit, sender): + if self.validateCommit(commit, sender): self.stats.inc(TPCStat.CommitRcvd) self.addToCommits(commit, sender) logger.debug("{} processed incoming COMMIT{}". @@ -607,10 +689,12 @@ def tryCommit(self, prepare: Prepare): Try to commit if the Prepare message is ready to be passed into the commit phase. """ - if self.canCommit(prepare): + rv, reason = self.canCommit(prepare) + if rv: self.doCommit(prepare) else: - logger.debug("{} not yet able to send COMMIT".format(self)) + logger.debug("{} cannot send COMMIT since {}". + format(self, reason)) def tryOrder(self, commit: Commit): """ @@ -624,38 +708,6 @@ def tryOrder(self, commit: Commit): logger.trace("{} cannot return request to node: {}". format(self, reason)) - def doPrePrepare(self, reqDigest: ReqDigest) -> None: - """ - Broadcast a PRE-PREPARE to all the replicas. - - :param reqDigest: a tuple with elements identifier, reqId, and digest - """ - if not self.node.isParticipating: - logger.error("Non participating node is attempting PRE-PREPARE. " - "This should not happen.") - return - - if self.lastPrePrepareSeqNo == self.H: - logger.debug("{} stashing PRE-PREPARE {} since outside greater " - "than high water mark {}". - format(self, (self.viewNo, self.lastPrePrepareSeqNo+1), - self.H)) - self.stashOutsideWatermarks(reqDigest) - return - self.lastPrePrepareSeqNo += 1 - tm = time.time()*1000 - logger.debug("{} Sending PRE-PREPARE {} at {}". - format(self, (self.viewNo, self.lastPrePrepareSeqNo), - time.perf_counter())) - prePrepareReq = PrePrepare(self.instId, - self.viewNo, - self.lastPrePrepareSeqNo, - *reqDigest, - tm) - self.sentPrePrepares[self.viewNo, self.lastPrePrepareSeqNo] = (reqDigest.key, - tm) - self.send(prePrepareReq, TPCStat.PrePrepareSent) - def doPrepare(self, pp: PrePrepare): logger.debug("{} Sending PREPARE {} at {}". format(self, (pp.viewNo, pp.ppSeqNo), time.perf_counter())) @@ -663,7 +715,9 @@ def doPrepare(self, pp: PrePrepare): pp.viewNo, pp.ppSeqNo, pp.digest, - pp.ppTime) + pp.stateRootHash, + pp.txnRootHash + ) self.send(prepare, TPCStat.PrepareSent) self.addToPrepares(prepare, self.name) @@ -677,12 +731,86 @@ def doCommit(self, p: Prepare): format(self, (p.viewNo, p.ppSeqNo), time.perf_counter())) commit = Commit(self.instId, p.viewNo, - p.ppSeqNo, - p.digest, - p.ppTime) + p.ppSeqNo) self.send(commit, TPCStat.CommitSent) self.addToCommits(commit, self.name) + def nonFinalisedReqs(self, reqKeys: List[Tuple[str, int]]): + """ + Check if there are any requests which are not finalised, i.e for + which there are not enough PROPAGATEs + """ + return {key for key in reqKeys if not self.requests.isFinalised(key)} + + def isNextPrePrepare(self, ppSeqNo: int): + lastPp = self.lastPrePrepare + if lastPp: + # TODO: Is it possible that lastPp.ppSeqNo is less than + # self.lastOrderedPPSeqNo? Maybe if the node does not disconnect + # but does no work for some time or is missing PRE-PREPARES + lastPpSeqNo = lastPp.ppSeqNo if lastPp.ppSeqNo > \ + self.lastOrderedPPSeqNo \ + else self.lastOrderedPPSeqNo + else: + lastPpSeqNo = self.lastOrderedPPSeqNo + + if ppSeqNo - lastPpSeqNo != 1: + return False + return True + + def revert(self, ledgerId, stateRootHash, reqCount): + ledger = self.node.getLedger(ledgerId) + state = self.node.getState(ledgerId) + logger.info('{} reverting {} txns and state root from {} to {} for' + ' ledger {}'.format(self, reqCount, state.headHash, + stateRootHash, ledgerId)) + state.revertToHead(stateRootHash) + ledger.discardTxns(reqCount) + self.node.onBatchRejected(ledgerId) + + def validatePrePrepare(self, pp: PrePrepare, sender: str): + """ + This will apply the requests part of the PrePrepare to the ledger + and state. It will not commit though (the ledger on disk will not + change, neither the committed state root hash will change) + """ + validReqs = [] + inValidReqs = [] + rejects = [] + if self.isMaster: + # If this PRE-PREPARE is not valid then state and ledger should be + # reverted + oldStateRoot = self.stateRootHash(pp.ledgerId, toHex=False) + logger.debug('{} state root before processing {} is {}'. + format(self, pp, oldStateRoot)) + + for reqKey in pp.reqIdr: + req = self.node.requests[reqKey].finalised + self.processReqDuringBatch(req, validReqs, inValidReqs, rejects) + + if len(validReqs) != pp.discarded: + raise SuspiciousNode(sender, Suspicions.PPR_REJECT_WRONG, pp) + + reqs = validReqs + inValidReqs + digest = self.batchDigest(reqs) + + # A PRE-PREPARE is sent that does not match request digest + if digest != pp.digest: + if self.isMaster: + self.revert(pp.ledgerId, oldStateRoot, len(validReqs)) + raise SuspiciousNode(sender, Suspicions.PPR_DIGEST_WRONG, pp) + + if self.isMaster: + if pp.stateRootHash != self.stateRootHash(pp.ledgerId): + self.revert(pp.ledgerId, oldStateRoot, len(validReqs)) + raise SuspiciousNode(sender, Suspicions.PPR_STATE_WRONG, pp) + + if pp.txnRootHash != self.txnRootHash(pp.ledgerId): + self.revert(pp.ledgerId, oldStateRoot, len(validReqs)) + raise SuspiciousNode(sender, Suspicions.PPR_TXN_WRONG, pp) + + self.outBox.extend(rejects) + def canProcessPrePrepare(self, pp: PrePrepare, sender: str) -> bool: """ Decide whether this replica is eligible to process a PRE-PREPARE, @@ -702,21 +830,31 @@ def canProcessPrePrepare(self, pp: PrePrepare, sender: str) -> bool: raise SuspiciousNode(sender, Suspicions.PPR_FRM_NON_PRIMARY, pp) # A PRE-PREPARE is being sent to primary - if self.isPrimaryForMsg(pp) is True: + if self.isPrimary is True: raise SuspiciousNode(sender, Suspicions.PPR_TO_PRIMARY, pp) # A PRE-PREPARE is sent that has already been received if (pp.viewNo, pp.ppSeqNo) in self.prePrepares: raise SuspiciousNode(sender, Suspicions.DUPLICATE_PPR_SENT, pp) - key = (pp.identifier, pp.reqId) - if not self.requests.isFinalised(key): + if not self.node.isParticipating: + # Let the node stash the pre-prepare + # TODO: The next processed pre-prepare needs to take consider if + # the last pre-prepare was stashed or not since stashed requests + # do not make change to state or ledger + return True + + nonFinReqs = self.nonFinalisedReqs(pp.reqIdr) + + if nonFinReqs: + self.enqueuePrePrepare(pp, sender, nonFinReqs) + return False + + if not self.isNextPrePrepare(pp.ppSeqNo): self.enqueuePrePrepare(pp, sender) return False - # A PRE-PREPARE is sent that does not match request digest - if self.requests.digest(key) != pp.digest: - raise SuspiciousNode(sender, Suspicions.PPR_DIGEST_WRONG, pp) + self.validatePrePrepare(pp, sender) return True @@ -728,8 +866,7 @@ def addToPrePrepares(self, pp: PrePrepare) -> None: :param pp: the PRE-PREPARE to add to the list """ key = (pp.viewNo, pp.ppSeqNo) - self.prePrepares[key] = \ - ((pp.identifier, pp.reqId), pp.ppTime) + self.prePrepares[key] = pp self.dequeuePrepares(*key) self.dequeueCommits(*key) self.stats.inc(TPCStat.PrePrepareRcvd) @@ -738,19 +875,22 @@ def addToPrePrepares(self, pp: PrePrepare) -> None: def hasPrepared(self, request) -> bool: return self.prepares.hasPrepareFrom(request, self.name) - def canSendPrepare(self, request) -> bool: + def canPrepare(self, ppReq) -> (bool, str): """ - Return whether the request identified by (identifier, requestId) can - proceed to the Prepare step. + Return whether the batch of requests in the PRE-PREPARE can + proceed to the PREPARE step. - :param request: any object with identifier and requestId attributes + :param ppReq: any object with identifier and requestId attributes """ - return self.shouldParticipate(request.viewNo, request.ppSeqNo) \ - and not self.hasPrepared(request) \ - and self.requests.isFinalised((request.identifier, - request.reqId)) + if not self.shouldParticipate(ppReq.viewNo, ppReq.ppSeqNo): + return False, 'should not participate in consensus for {}'.format(ppReq) + if self.hasPrepared(ppReq): + return False, 'has already sent PREPARE for {}'.format(ppReq) + return True, '' + # and self.requests.isFinalised((ppReq.identifier, + # ppReq.reqId)) - def isValidPrepare(self, prepare: Prepare, sender: str) -> bool: + def validatePrepare(self, prepare: Prepare, sender: str) -> bool: """ Return whether the PREPARE specified is valid. @@ -759,9 +899,11 @@ def isValidPrepare(self, prepare: Prepare, sender: str) -> bool: :return: True if PREPARE is valid, False otherwise """ key = (prepare.viewNo, prepare.ppSeqNo) - primaryStatus = self.isPrimaryForMsg(prepare) + # primaryStatus = self.isPrimaryForMsg(prepare) + primaryStatus = self.isPrimary - ppReqs = self.sentPrePrepares if primaryStatus else self.prePrepares + # ppReqs = self.sentPrePrepares if primaryStatus else self.prePrepares + ppReq = self.getPrePrepare(*key) # If a non primary replica and receiving a PREPARE request before a # PRE-PREPARE request, then proceed @@ -775,41 +917,65 @@ def isValidPrepare(self, prepare: Prepare, sender: str) -> bool: if self.prepares.hasPrepareFrom(prepare, sender): raise SuspiciousNode(sender, Suspicions.DUPLICATE_PR_SENT, prepare) # If PRE-PREPARE not received for the PREPARE, might be slow network - if key not in ppReqs: + if not ppReq: self.enqueuePrepare(prepare, sender) return False - elif prepare.digest != self.requests.digest(ppReqs[key][0]): - raise SuspiciousNode(sender, Suspicions.PR_DIGEST_WRONG, prepare) - elif prepare.ppTime != ppReqs[key][1]: - raise SuspiciousNode(sender, Suspicions.PR_TIME_WRONG, - prepare) - else: - return True # If primary replica - else: + if primaryStatus is True: if self.prepares.hasPrepareFrom(prepare, sender): raise SuspiciousNode(sender, Suspicions.DUPLICATE_PR_SENT, prepare) # If PRE-PREPARE was not sent for this PREPARE, certainly # malicious behavior - if key not in ppReqs: + elif not ppReq: raise SuspiciousNode(sender, Suspicions.UNKNOWN_PR_SENT, prepare) - elif prepare.digest != self.requests.digest(ppReqs[key][0]): - raise SuspiciousNode(sender, Suspicions.PR_DIGEST_WRONG, prepare) - elif prepare.ppTime != ppReqs[key][1]: - raise SuspiciousNode(sender, Suspicions.PR_TIME_WRONG, - prepare) - else: - return True + + if primaryStatus is None and not ppReq: + self.enqueuePrepare(prepare, sender) + return False + + if prepare.digest != ppReq.digest: + raise SuspiciousNode(sender, Suspicions.PR_DIGEST_WRONG, prepare) + + elif prepare.stateRootHash != ppReq.stateRootHash: + raise SuspiciousNode(sender, Suspicions.PR_STATE_WRONG, + prepare) + elif prepare.txnRootHash != ppReq.txnRootHash: + raise SuspiciousNode(sender, Suspicions.PR_TXN_WRONG, + prepare) + else: + return True def addToPrepares(self, prepare: Prepare, sender: str): self.prepares.addVote(prepare, sender) self.tryCommit(prepare) + def getPrePrepare(self, viewNo, ppSeqNo): + key = (viewNo, ppSeqNo) + if key in self.sentPrePrepares: + return self.sentPrePrepares[key] + if key in self.prePrepares: + return self.prePrepares[key] + + @property + def lastPrePrepare(self): + lastSeqNo = 0 + lastPp = None + if self.sentPrePrepares: + (_, s), pp = self.peekitem(self.sentPrePrepares, -1) + lastSeqNo = s + lastPp = pp + if self.prePrepares: + (_, s), pp = self.peekitem(self.prePrepares, -1) + if s > lastSeqNo: + lastSeqNo = s + lastPp = pp + return lastPp + def hasCommitted(self, request) -> bool: return self.commits.hasCommitFrom(ThreePhaseKey( request.viewNo, request.ppSeqNo), self.name) - def canCommit(self, prepare: Prepare) -> bool: + def canCommit(self, prepare: Prepare) -> (bool, str): """ Return whether the specified PREPARE can proceed to the Commit step. @@ -823,21 +989,24 @@ def canCommit(self, prepare: Prepare) -> bool: :param prepare: the PREPARE """ - return self.shouldParticipate(prepare.viewNo, prepare.ppSeqNo) and \ - self.prepares.hasQuorum(prepare, self.f) and \ - not self.hasCommitted(prepare) + if not self.shouldParticipate(prepare.viewNo, prepare.ppSeqNo): + return False, 'should not participate in consensus for {}'.format(prepare) + if not self.prepares.hasQuorum(prepare, self.f): + return False, 'does not have prepare quorum for {}'.format(prepare) + if self.hasCommitted(prepare): + return False, 'has already sent COMMIT for {}'.format(prepare) + return True, '' - def isValidCommit(self, commit: Commit, sender: str) -> bool: + def validateCommit(self, commit: Commit, sender: str) -> bool: """ Return whether the COMMIT specified is valid. :param commit: the COMMIT to validate :return: True if `request` is valid, False otherwise """ - primaryStatus = self.isPrimaryForMsg(commit) - ppReqs = self.sentPrePrepares if primaryStatus else self.prePrepares key = (commit.viewNo, commit.ppSeqNo) - if key not in ppReqs: + ppReq = self.getPrePrepare(*key) + if not ppReq: self.enqueueCommit(commit, sender) return False @@ -849,12 +1018,6 @@ def isValidCommit(self, commit: Commit, sender: str) -> bool: return False elif self.commits.hasCommitFrom(commit, sender): raise SuspiciousNode(sender, Suspicions.DUPLICATE_CM_SENT, commit) - elif commit.digest != self.getDigestFor3PhaseKey(ThreePhaseKey(*key)): - - raise SuspiciousNode(sender, Suspicions.CM_DIGEST_WRONG, commit) - elif key in ppReqs and commit.ppTime != ppReqs[key][1]: - raise SuspiciousNode(sender, Suspicions.CM_TIME_WRONG, - commit) else: return True @@ -935,8 +1098,6 @@ def isNextInOrdering(self, commit: Commit): return True def orderStashedCommits(self): - # TODO: What if the first few commits were out of order and stashed? - # `self.ordered` would be empty logger.debug('{} trying to order from stashed commits. {} {}'. format(self, self.ordered, self.stashedCommitsForOrdering)) if self.ordered: @@ -988,30 +1149,37 @@ def tryOrdering(self, commit: Commit) -> bool: """ key = (commit.viewNo, commit.ppSeqNo) logger.debug("{} trying to order COMMIT{}".format(self, key)) - reqKey = self.getReqKeyFrom3PhaseKey(key) # type: Tuple - digest = self.getDigestFor3PhaseKey(key) - if not digest: - logger.error("{} did not find digest for {}, request key {}". - format(self, key, reqKey)) - return False - self.doOrder(*key, *reqKey, digest, commit.ppTime) + ppReq = self.getPrePrepare(*key) + assert ppReq + self.doOrder(ppReq) return True - def doOrder(self, viewNo, ppSeqNo, identifier, reqId, digest, ppTime): - key = (viewNo, ppSeqNo) + def doOrder(self, pp: PrePrepare): + key = (pp.viewNo, pp.ppSeqNo) self.addToOrdered(*key) ordered = Ordered(self.instId, - viewNo, - identifier, - reqId, - ppTime) + pp.viewNo, + pp.reqIdr[:pp.discarded], + pp.ppSeqNo, + pp.ppTime, + pp.ledgerId, + pp.stateRootHash, + pp.txnRootHash) # TODO: Should not order or add to checkpoint while syncing # 3 phase state. - self.send(ordered, TPCStat.OrderSent) if key in self.stashingWhileCatchingUp: + if self.isMaster and self.node.isParticipating: + # While this request arrived the node was catching up but the + # node has caught up and applied the stash so apply this request + logger.debug('{} found that 3PC of ppSeqNo {} outlived the ' + 'catchup process'.format(self, pp.ppSeqNo)) + for reqKey in pp.reqIdr[:pp.discarded]: + req = self.requests[reqKey].finalised + self.node.applyReq(req) self.stashingWhileCatchingUp.remove(key) - logger.debug("{} ordered request {}".format(self, (viewNo, ppSeqNo))) - self.addToCheckpoint(ppSeqNo, digest) + self.send(ordered, TPCStat.OrderSent) + logger.debug("{} ordered request {}".format(self, key)) + self.addToCheckpoint(pp.ppSeqNo, pp.digest) def processCheckpoint(self, msg: Checkpoint, sender: str): logger.debug('{} received checkpoint {} from {}'. @@ -1121,14 +1289,16 @@ def gc(self, tillSeqNo): logger.debug("{} cleaning up till {}".format(self, tillSeqNo)) tpcKeys = set() reqKeys = set() - for (v, p), (reqKey, _) in self.sentPrePrepares.items(): + for (v, p), pp in self.sentPrePrepares.items(): if p <= tillSeqNo: tpcKeys.add((v, p)) - reqKeys.add(reqKey) - for (v, p), (reqKey, _) in self.prePrepares.items(): + for reqKey in pp.reqIdr: + reqKeys.add(reqKey) + for (v, p), pp in self.prePrepares.items(): if p <= tillSeqNo: tpcKeys.add((v, p)) - reqKeys.add(reqKey) + for reqKey in pp.reqIdr: + reqKeys.add(reqKey) logger.debug("{} found {} 3 phase keys to clean". format(self, len(tpcKeys))) @@ -1164,9 +1334,7 @@ def processStashedMsgsForNewWaterMarks(self): logger.debug("{} processing stashed item {} after new stable " "checkpoint".format(self, item)) - if isinstance(item, ReqDigest): - self.doPrePrepare(item) - elif isinstance(item, tuple) and len(item) == 2: + if isinstance(item, tuple) and len(item) == 2: self.dispatchThreePhaseMsg(*item) else: logger.error("{} cannot process {} " @@ -1215,38 +1383,57 @@ def isPpSeqNoBetweenWaterMarks(self, ppSeqNo: int): def addToOrdered(self, viewNo: int, ppSeqNo: int): self.ordered.add((viewNo, ppSeqNo)) + self.lastOrderedPPSeqNo = ppSeqNo - def enqueuePrePrepare(self, request: PrePrepare, sender: str): - logger.debug("Queueing pre-prepares due to unavailability of finalised " - "Request. Request {} from {}".format(request, sender)) - key = (request.identifier, request.reqId) - if key not in self.prePreparesPendingReqDigest: - self.prePreparesPendingReqDigest[key] = [] - self.prePreparesPendingReqDigest[key].append((request, sender)) - - def dequeuePrePrepare(self, identifier: int, reqId: int): - key = (identifier, reqId) - if key in self.prePreparesPendingReqDigest: - pps = self.prePreparesPendingReqDigest[key] - for (pp, sender) in pps: - logger.debug("{} popping stashed PRE-PREPARE{}". - format(self, key)) - if pp.digest == self.requests.digest(key): - self.prePreparesPendingReqDigest.pop(key) - self.processPrePrepare(pp, sender) - logger.debug( - "{} processed {} PRE-PREPAREs waiting for finalised " - "request for identifier {} and reqId {}". - format(self, pp, identifier, reqId)) - break - - def enqueuePrepare(self, request: Prepare, sender: str): - logger.debug("Queueing prepares due to unavailability of PRE-PREPARE. " - "Request {} from {}".format(request, sender)) - key = (request.viewNo, request.ppSeqNo) + def enqueuePrePrepare(self, ppMsg: PrePrepare, sender: str, nonFinReqs: Set=None): + if nonFinReqs: + logger.debug("Queueing pre-prepares due to unavailability of finalised " + "requests. PrePrepare {} from {}".format(ppMsg, sender)) + self.prePreparesPendingFinReqs.append((ppMsg, sender, nonFinReqs)) + else: + # Possible exploit, an malicious party can send an invalid + # pre-prepare and over-write the correct one? + logger.debug( + "Queueing pre-prepares due to unavailability of previous " + "pre-prepares. PrePrepare {} from {}".format(ppMsg, sender)) + self.prePreparesPendingPrevPP[ppMsg.viewNo, ppMsg.ppSeqNo] = (ppMsg, sender) + + def dequeuePrePrepares(self): + ppsReady = [] + for i, (pp, sender, reqIds) in enumerate(self.prePreparesPendingFinReqs): + finalised = set() + for r in reqIds: + if self.requests.isFinalised(r): + finalised.add(r) + diff = reqIds.difference(finalised) + if not diff: + ppsReady.append(i) + self.prePreparesPendingFinReqs[i] = (pp, sender, diff) + + for i in ppsReady: + pp, sender, _ = self.prePreparesPendingFinReqs.pop(i) + self.prePreparesPendingPrevPP[pp.viewNo, pp.ppSeqNo] = (pp, sender) + + r = 0 + while self.prePreparesPendingPrevPP and self.isNextPrePrepare( + self.prePreparesPendingPrevPP.iloc[0][1]): + _, (pp, sender) = self.prePreparesPendingPrevPP.popitem(last=False) + if pp.viewNo < self.viewNo: + self.discard(pp, + "Pre-Prepare from a previous view", + logger.debug) + continue + self.processPrePrepare(pp, sender) + r += 1 + return r + + def enqueuePrepare(self, pMsg: Prepare, sender: str): + logger.debug("Queueing prepare due to unavailability of PRE-PREPARE. " + "Prepare {} from {}".format(pMsg, sender)) + key = (pMsg.viewNo, pMsg.ppSeqNo) if key not in self.preparesWaitingForPrePrepare: self.preparesWaitingForPrePrepare[key] = deque() - self.preparesWaitingForPrePrepare[key].append((request, sender)) + self.preparesWaitingForPrePrepare[key].append((pMsg, sender)) def dequeuePrepares(self, viewNo: int, ppSeqNo: int): key = (viewNo, ppSeqNo) diff --git a/plenum/server/req_handler.py b/plenum/server/req_handler.py new file mode 100644 index 0000000000..74dd8c28e5 --- /dev/null +++ b/plenum/server/req_handler.py @@ -0,0 +1,68 @@ +from binascii import unhexlify +from typing import List + +from plenum.common.ledger import Ledger +from plenum.common.request import Request +from plenum.persistence.util import txnsWithSeqNo +from stp_core.common.log import getlogger + +from state.state import State + +logger = getlogger() + + +class RequestHandler: + """ + Base class for request handlers + Declares methods for validation, application of requests and + state control + """ + + def __init__(self, ledger: Ledger, state: State): + self.ledger = ledger + self.state = state + + def validate(self, req: Request, config=None): + """ + Validates request. Raises exception if requiest is invalid. + """ + pass + + def apply(self, req: Request): + """ + Applies request + """ + pass + + def updateState(self, txns, isCommitted=False): + """ + Updates current state with a number of committed or + not committed transactions + """ + pass + + def commit(self, txnCount, stateRoot, txnRoot) -> List: + """ + :param txnCount: The number of requests to commit (The actual requests are + picked up from the uncommitted list from the ledger) + :param stateRoot: The state trie root after the txns are committed + :param txnRoot: The txn merkle root after the txns are committed + + :return: list of committed transactions + """ + + (seqNoStart, seqNoEnd), committedTxns = \ + self.ledger.commitTxns(txnCount) + stateRoot = unhexlify(stateRoot.encode()) + txnRoot = self.ledger.hashToStr(unhexlify(txnRoot.encode())) + # Probably the following assertion fail should trigger catchup + assert self.ledger.root_hash == txnRoot, '{} {}'.format( + self.ledger.root_hash, txnRoot) + self.state.commit(rootHash=stateRoot) + return txnsWithSeqNo(seqNoStart, seqNoEnd, committedTxns) + + def onBatchCreated(self, stateRoot): + pass + + def onBatchRejected(self, stateRoot=None): + pass diff --git a/plenum/server/router.py b/plenum/server/router.py index 2cf9cd103a..a5a7ce2dc9 100644 --- a/plenum/server/router.py +++ b/plenum/server/router.py @@ -1,6 +1,6 @@ from collections import deque, OrderedDict from inspect import isawaitable -from typing import Callable, Any +from typing import Callable, Any, NamedTuple, Union from typing import Tuple @@ -15,7 +15,7 @@ class Router: (2) a function that handles the message """ - def __init__(self, *routes: Tuple[type, Callable]): + def __init__(self, *routes: Tuple[Union[type, NamedTuple], Callable]): """ Create a new router with a list of routes diff --git a/plenum/server/suspicion_codes.py b/plenum/server/suspicion_codes.py index f56b03ae49..d4e63dc7d0 100644 --- a/plenum/server/suspicion_codes.py +++ b/plenum/server/suspicion_codes.py @@ -46,8 +46,31 @@ class Suspicions: Suspicion(5, "PREPARE time does not match with PRE-PREPARE") CM_TIME_WRONG = \ Suspicion(5, "COMMIT time does not match with PRE-PREPARE") + PPR_REJECT_WRONG = \ + Suspicion(16, "Pre-Prepare message has incorrect reject") + PPR_STATE_WRONG = \ + Suspicion(17, "Pre-Prepare message has incorrect state trie root") + PPR_TXN_WRONG = \ + Suspicion(18, "Pre-Prepare message has incorrect transaction tree root") + PR_STATE_WRONG = \ + Suspicion(19, "Prepare message has incorrect state trie root") + PR_TXN_WRONG = \ + Suspicion(20, "Prepare message has incorrect transaction tree root") + PRIMARY_DEGRADED = Suspicion(21, 'Primary of master protocol instance ' + 'degraded the performance') + PRIMARY_DISCONNECTED = Suspicion(22, 'Primary of master protocol instance ' + 'disconnected') + PRIMARY_ABOUT_TO_BE_DISCONNECTED = Suspicion(23, 'Primary of master ' + 'protocol instance ' + 'about to be disconnected') @classmethod - def getList(cls): + def get_list(cls): return [member for nm, member in inspect.getmembers(cls) if isinstance( member, Suspicion)] + + @classmethod + def get_by_code(cls, code): + for s in Suspicions.get_list(): + if code == s.code: + return s diff --git a/plenum/test/batching_3pc/__init__.py b/plenum/test/batching_3pc/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/plenum/test/batching_3pc/conftest.py b/plenum/test/batching_3pc/conftest.py new file mode 100644 index 0000000000..58258de369 --- /dev/null +++ b/plenum/test/batching_3pc/conftest.py @@ -0,0 +1,24 @@ +import pytest +from plenum.test.pool_transactions.conftest import looper, clientAndWallet1, \ + client1, wallet1, client1Connected + + +@pytest.fixture(scope="module") +def tconf(tconf, request): + oldSize = tconf.Max3PCBatchSize + oldTIme = tconf.Max3PCBatchWait + tconf.Max3PCBatchSize = 3 + tconf.Max3PCBatchWait = 5 + + def reset(): + tconf.Max3PCBatchSize = oldSize + tconf.Max3PCBatchWait = oldTIme + + request.addfinalizer(reset) + return tconf + + +@pytest.fixture(scope="module") +def client(tconf, looper, txnPoolNodeSet, client1, + client1Connected): + return client1Connected diff --git a/plenum/test/batching_3pc/helper.py b/plenum/test/batching_3pc/helper.py new file mode 100644 index 0000000000..b337ebfa13 --- /dev/null +++ b/plenum/test/batching_3pc/helper.py @@ -0,0 +1,37 @@ +from binascii import hexlify + +from stp_core.loop.eventually import eventually +from plenum.common.constants import DOMAIN_LEDGER_ID +from plenum.test.helper import waitForSufficientRepliesForRequests + + +def checkNodesHaveSameRoots(nodes, checkUnCommitted=True, + checkCommitted=True): + def addRoot(root, collection): + if root: + collection.add(hexlify(root)) + else: + collection.add(root) + + if checkUnCommitted: + stateRoots = set() + txnRoots = set() + for node in nodes: + addRoot(node.getState(DOMAIN_LEDGER_ID).headHash, stateRoots) + addRoot(node.getLedger(DOMAIN_LEDGER_ID).uncommittedRootHash, + txnRoots) + + assert len(stateRoots) == 1 + assert len(txnRoots) == 1 + + if checkCommitted: + stateRoots = set() + txnRoots = set() + for node in nodes: + addRoot(node.getState(DOMAIN_LEDGER_ID).committedHeadHash, + stateRoots) + addRoot(node.getLedger(DOMAIN_LEDGER_ID).tree.root_hash, + txnRoots) + + assert len(stateRoots) == 1 + assert len(txnRoots) == 1 diff --git a/plenum/test/batching_3pc/test_basic_batching.py b/plenum/test/batching_3pc/test_basic_batching.py new file mode 100644 index 0000000000..4e2a83a680 --- /dev/null +++ b/plenum/test/batching_3pc/test_basic_batching.py @@ -0,0 +1,104 @@ +import types + +import pytest + +from stp_core.loop.eventually import eventually +from plenum.common.exceptions import UnauthorizedClientRequest +from plenum.test.batching_3pc.helper import checkNodesHaveSameRoots +from plenum.test.helper import checkReqNackWithReason, sendRandomRequests, \ + checkRejectWithReason, waitForSufficientRepliesForRequests + + +def testRequestStaticValidation(tconf, looper, txnPoolNodeSet, client, + wallet1): + """ + Check that for requests which fail static validation, REQNACK is sent + :return: + """ + reqs = [wallet1.signOp((lambda : {'something': 'nothing'})()) for _ in + range(tconf.Max3PCBatchSize)] + client.submitReqs(*reqs) + for node in txnPoolNodeSet: + looper.run(eventually(checkReqNackWithReason, client, '', + node.clientstack.name, retryWait=1)) + + +def test3PCOverBatchWithThresholdReqs(tconf, looper, txnPoolNodeSet, client, + wallet1): + """ + Check that 3 phase commit happens when threshold number of requests are + received and propagated. + :return: + """ + reqs = sendRandomRequests(wallet1, client, tconf.Max3PCBatchSize) + waitForSufficientRepliesForRequests(looper, client, requests=reqs, + customTimeoutPerReq=tconf.Max3PCBatchWait-1) + + +def test3PCOverBatchWithLessThanThresholdReqs(tconf, looper, txnPoolNodeSet, + client, wallet1): + """ + Check that 3 phase commit happens when threshold number of requests are + not received but threshold time has passed + :return: + """ + reqs = sendRandomRequests(wallet1, client, tconf.Max3PCBatchSize - 1) + waitForSufficientRepliesForRequests(looper, client, requests=reqs, + customTimeoutPerReq=tconf.Max3PCBatchWait + 1) + + +def testTreeRootsCorrectAfterEachBatch(tconf, looper, txnPoolNodeSet, + client, wallet1): + """ + Check if both state root and txn tree root are correct and same on each + node after each batch + :return: + """ + # Send 1 batch + reqs = sendRandomRequests(wallet1, client, tconf.Max3PCBatchSize) + waitForSufficientRepliesForRequests(looper, client, requests=reqs, + customTimeoutPerReq=tconf.Max3PCBatchWait) + checkNodesHaveSameRoots(txnPoolNodeSet) + + # Send 2 batches + reqs = sendRandomRequests(wallet1, client, 2 * tconf.Max3PCBatchSize) + waitForSufficientRepliesForRequests(looper, client, requests=reqs, + customTimeoutPerReq=2*tconf.Max3PCBatchWait) + checkNodesHaveSameRoots(txnPoolNodeSet) + + +def testRequestDynamicValidation(tconf, looper, txnPoolNodeSet, + client, wallet1): + """ + Check that for requests which fail dynamic (state based) validation, + REJECT is sent to the client + :return: + """ + origMethods = [] + names = {node.name: 0 for node in txnPoolNodeSet} + + def rejectingMethod(self, req): + names[self.name] += 1 + # Raise rejection for last request of batch + if tconf.Max3PCBatchSize - names[self.name] == 0: + raise UnauthorizedClientRequest(req.identifier, + req.reqId, + 'Simulated rejection') + + for node in txnPoolNodeSet: + origMethods.append(node.doDynamicValidation) + node.doDynamicValidation = types.MethodType(rejectingMethod, node) + + reqs = sendRandomRequests(wallet1, client, tconf.Max3PCBatchSize) + waitForSufficientRepliesForRequests(looper, client, requests=reqs[:-1], + customTimeoutPerReq=tconf.Max3PCBatchWait) + with pytest.raises(AssertionError): + waitForSufficientRepliesForRequests(looper, client, requests=reqs[-1:], + customTimeoutPerReq=tconf.Max3PCBatchWait) + for node in txnPoolNodeSet: + looper.run(eventually(checkRejectWithReason, client, + 'Simulated rejection', node.clientstack.name, + retryWait=1)) + + for i, node in enumerate(txnPoolNodeSet): + node.doDynamicValidation = origMethods[i] diff --git a/plenum/test/batching_3pc/test_batch_rejection.py b/plenum/test/batching_3pc/test_batch_rejection.py new file mode 100644 index 0000000000..bc1fbb8346 --- /dev/null +++ b/plenum/test/batching_3pc/test_batch_rejection.py @@ -0,0 +1,89 @@ +import types + +import pytest + +from stp_core.loop.eventually import eventually +from plenum.common.constants import DOMAIN_LEDGER_ID +from plenum.common.util import updateNamedTuple +from plenum.test.helper import sendRandomRequests, \ + waitForSufficientRepliesForRequests +from plenum.test.test_node import getNonPrimaryReplicas, getPrimaryReplica + + +@pytest.fixture(scope="module") +def setup(tconf, looper, txnPoolNodeSet, client, wallet1): + # Patch the 3phase request sending method to send incorrect digest and + pr, otherR = getPrimaryReplica(txnPoolNodeSet, instId=0), \ + getNonPrimaryReplicas(txnPoolNodeSet, instId=0) + + reqs = sendRandomRequests(wallet1, client, tconf.Max3PCBatchSize) + waitForSufficientRepliesForRequests(looper, client, requests=reqs, + customTimeoutPerReq=tconf.Max3PCBatchWait) + stateRoot = pr.stateRootHash(DOMAIN_LEDGER_ID, toHex=False) + + origMethod = pr.create3PCBatch + malignedOnce = None + + def badMethod(self, ledgerId): + nonlocal malignedOnce + pp = origMethod(ledgerId) + if not malignedOnce: + pp = updateNamedTuple(pp, digest=pp.digest + '123') + malignedOnce = True + return pp + + pr.create3PCBatch = types.MethodType(badMethod, pr) + sendRandomRequests(wallet1, client, tconf.Max3PCBatchSize) + return pr, otherR, stateRoot + + +@pytest.fixture(scope="module") +def reverted(setup, looper): + pr, otherR, oldStateRoot = setup + + def chkPps(n): + assert len(pr.batches) == n + + def chkStateRoot(root): + for r in [pr]+otherR: + r.stateRootHash(DOMAIN_LEDGER_ID, toHex=False) == root + + looper.run(eventually(chkPps, 1, retryWait=1, timeout=5)) + looper.run(eventually(chkStateRoot, oldStateRoot)) + + +@pytest.fixture(scope="module") +def viewChanged(reverted, looper, txnPoolNodeSet): + def chk(): + for n in txnPoolNodeSet: + assert n.viewNo == 1 + assert all([r.primaryName for r in n.replicas]) + + looper.run(eventually(chk, retryWait=1, timeout=15)) + + +def testTreeStateRevertedAfterBatchRejection(reverted): + """" + After a batch is rejected, all nodes revert their trees to last known + correct state + """ + pass + + +def testViewChangeAfterBatchRejected(viewChanged): + """" + After a batch is rejected and each batch that was created based on the + rejected batch is discarded, the discarded batches are tried again + """ + pass + + +def testMoreBatchesWillBeSentAfterViewChange(reverted, viewChanged, wallet1, + client, tconf, looper): + """ + After retrying discarded batches, new batches are sent + :return: + """ + reqs = sendRandomRequests(wallet1, client, tconf.Max3PCBatchSize) + waitForSufficientRepliesForRequests(looper, client, requests=reqs, + customTimeoutPerReq=tconf.Max3PCBatchWait) diff --git a/plenum/test/batching_3pc/test_batching_scenarios.py b/plenum/test/batching_3pc/test_batching_scenarios.py new file mode 100644 index 0000000000..99795a8c0c --- /dev/null +++ b/plenum/test/batching_3pc/test_batching_scenarios.py @@ -0,0 +1,54 @@ +from stp_core.common.log import getlogger +from plenum.common.types import PrePrepare +from plenum.test.batching_3pc.helper import checkNodesHaveSameRoots +from plenum.test.helper import sendRandomRequests, \ + waitForSufficientRepliesForRequests +from plenum.test.spy_helpers import getAllArgs +from plenum.test.test_node import getPrimaryReplica, getNonPrimaryReplicas + +logger = getlogger() + + +def testPrePrepareProcessedInOrder(tconf, looper, txnPoolNodeSet, wallet1, + client): + """ + A non-primary receives PRE-PREPARE out of order, it receives with ppSeqNo 2 + earlier than it receives the one with ppSeqNo 1 but it stashes the one + with ppSeqNo 2 and only unstashes it for processing once it has + processed PRE-PREPARE with ppSeqNo 1 + :return: + """ + pr, otherR = getPrimaryReplica(txnPoolNodeSet, instId=0), \ + getNonPrimaryReplicas(txnPoolNodeSet, instId=0) + otherNodes = [r.node for r in otherR] + ppsToDelay = 2 + ppDelay = 3 + delayeds = 0 + expectedDelayeds = (len(txnPoolNodeSet) - 1) * ppsToDelay + delayedPpSeqNos = set() + + def specificPrePrepares(wrappedMsg): + nonlocal delayeds + msg, sender = wrappedMsg + if isinstance(msg, PrePrepare) and delayeds < expectedDelayeds: + delayeds += 1 + delayedPpSeqNos.add(msg.ppSeqNo) + logger.debug('ppSeqNo {} would be delayed'.format(msg.ppSeqNo)) + return ppDelay + + for node in otherNodes: + logger.debug('{} would be delaying reception of some pre-prepares'. + format(node)) + node.nodeIbStasher.delay(specificPrePrepares) + + reqs = sendRandomRequests(wallet1, client, + (ppsToDelay+1)*tconf.Max3PCBatchSize) + + waitForSufficientRepliesForRequests(looper, client, requests=reqs, + customTimeoutPerReq=(ppsToDelay + 1) * tconf.Max3PCBatchWait) + checkNodesHaveSameRoots(txnPoolNodeSet) + + for r in otherR: + seqNos = [a['pp'].ppSeqNo for a in getAllArgs(r, r.addToPrePrepares)] + seqNos.reverse() + assert sorted(seqNos) == seqNos diff --git a/plenum/test/batching_3pc/test_client_requests.py b/plenum/test/batching_3pc/test_client_requests.py new file mode 100644 index 0000000000..cbf9bdc402 --- /dev/null +++ b/plenum/test/batching_3pc/test_client_requests.py @@ -0,0 +1,6 @@ +def testClientRequestingStateProof(): + pass + + +def testClientRequestingStateVariableValue(): + pass diff --git a/plenum/test/batching_3pc/test_state_proof.py b/plenum/test/batching_3pc/test_state_proof.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/plenum/test/blacklist/test_blacklist_client.py b/plenum/test/blacklist/test_blacklist_client.py index 98332c8f70..f98ab7a18d 100644 --- a/plenum/test/blacklist/test_blacklist_client.py +++ b/plenum/test/blacklist/test_blacklist_client.py @@ -22,5 +22,5 @@ def chk(): for node in nodeSet: assert not node.isClientBlacklisted(client1.name) - timeout = waits.expectedClientConnectionTimeout(nodeSet.f) + timeout = waits.expectedClientToPoolConnectionTimeout(len(nodeSet)) looper.run(eventually(chk, retryWait=1, timeout=timeout)) diff --git a/plenum/test/blacklist/test_blacklist_node_on_multiple_nominations.py b/plenum/test/blacklist/test_blacklist_node_on_multiple_nominations.py index 2192deac12..320fe9051f 100644 --- a/plenum/test/blacklist/test_blacklist_node_on_multiple_nominations.py +++ b/plenum/test/blacklist/test_blacklist_node_on_multiple_nominations.py @@ -26,5 +26,5 @@ def chk(): for node in A, C, D: assert node.isNodeBlacklisted(B.name) - timeout = waits.expectedNominationTimeout(len(nodeSet.nodes)) + timeout = waits.expectedPoolNominationTimeout(len(nodeSet.nodes)) looper.run(eventually(chk, retryWait=1, timeout=timeout)) diff --git a/plenum/test/blacklist/test_blacklist_node_on_multiple_primary_declarations.py b/plenum/test/blacklist/test_blacklist_node_on_multiple_primary_declarations.py index 8ee3b4e576..6af10e63b7 100644 --- a/plenum/test/blacklist/test_blacklist_node_on_multiple_primary_declarations.py +++ b/plenum/test/blacklist/test_blacklist_node_on_multiple_primary_declarations.py @@ -30,5 +30,5 @@ def chk(): for node in A, C, D: assert node.isNodeBlacklisted(B.name) - timeout = waits.expectedNominationTimeout(len(nodeSet.nodes)) + timeout = waits.expectedPoolNominationTimeout(len(nodeSet.nodes)) looper.run(eventually(chk, retryWait=1, timeout=timeout)) diff --git a/plenum/test/checkpoints/test_message_outside_watermark.py b/plenum/test/checkpoints/test_message_outside_watermark.py index 999a2c0e0a..2bda8ea407 100644 --- a/plenum/test/checkpoints/test_message_outside_watermark.py +++ b/plenum/test/checkpoints/test_message_outside_watermark.py @@ -4,7 +4,7 @@ from plenum.test.delayers import ppDelay from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, \ countDiscarded -from plenum.test.node_catchup.helper import checkNodeLedgersForEquality +from plenum.test.node_catchup.helper import checkNodeDataForEquality from plenum.test.test_node import getNonPrimaryReplicas, TestReplica from stp_core.loop.eventually import eventually @@ -40,8 +40,8 @@ def discardCounts(replicas, pat): 'achieved stable checkpoint') sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, reqsToSend, 1) - timeout =waits.expectedPoolLedgerCheck(len(txnPoolNodeSet)) - looper.run(eventually(checkNodeLedgersForEquality, slowNode, + timeout =waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) + looper.run(eventually(checkNodeDataForEquality, slowNode, *[_ for _ in txnPoolNodeSet if _ != slowNode], retryWait=1, timeout=timeout)) newStashCount = slowReplica.spylog.count(TestReplica.stashOutsideWatermarks.__name__) @@ -54,5 +54,5 @@ def chk(): for nm, count in counts.items(): assert count > oldDiscardCounts[nm] - timeout = waits.expectedNodeToNodeMessageDeliveryTime() * len(txnPoolNodeSet) + timeout = waits.expectedNodeToNodeMessageDeliveryTime() * len(txnPoolNodeSet) + delay looper.run(eventually(chk, retryWait=1, timeout=timeout)) diff --git a/plenum/test/checkpoints/test_message_outside_watermark1.py b/plenum/test/checkpoints/test_message_outside_watermark1.py index ca34741130..537de33517 100644 --- a/plenum/test/checkpoints/test_message_outside_watermark1.py +++ b/plenum/test/checkpoints/test_message_outside_watermark1.py @@ -6,7 +6,7 @@ from plenum.test.test_node import getNonPrimaryReplicas, getPrimaryReplica -def testPrimaryRecvs3PhaseMessageOutsideWatermarks(chkFreqPatched, looper, +def testPrimaryRecvs3PhaseMessageOutsideWatermarks(tconf, chkFreqPatched, looper, txnPoolNodeSet, client1, wallet1, client1Connected): """ @@ -16,7 +16,7 @@ def testPrimaryRecvs3PhaseMessageOutsideWatermarks(chkFreqPatched, looper, Eventually this primary will send PRE-PREPARE for all requests and those requests will complete """ - delay = 10 + delay = 5 instId = 1 reqsToSend = 2*chkFreqPatched.LOG_SIZE + 1 npr = getNonPrimaryReplicas(txnPoolNodeSet, instId) @@ -32,5 +32,4 @@ def chk(): print('Sending {} requests'.format(reqsToSend)) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, reqsToSend, 1) - # TODO Select or create the timeout from 'waits'. Don't use constant. - looper.run(eventually(chk, retryWait=1, timeout=80)) + looper.run(eventually(chk, retryWait=1, timeout=tconf.TestRunningTimeLimitSec)) diff --git a/plenum/test/checkpoints/test_stable_checkpoint.py b/plenum/test/checkpoints/test_stable_checkpoint.py index 429ddafddb..ad5db58ca9 100644 --- a/plenum/test/checkpoints/test_stable_checkpoint.py +++ b/plenum/test/checkpoints/test_stable_checkpoint.py @@ -11,7 +11,6 @@ def checkRequestCounts(nodes, count): for r in node.replicas: assert len(r.commits) == count assert len(r.prepares) == count - # assert len(r.ordered) == count def testRequestOlderThanStableCheckpointRemoved(chkFreqPatched, looper, @@ -20,15 +19,18 @@ def testRequestOlderThanStableCheckpointRemoved(chkFreqPatched, looper, reqs = sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, CHK_FREQ-1, 1) timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) - looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, retryWait=1, timeout=timeout)) + looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, retryWait=1, + timeout=timeout)) checkRequestCounts(txnPoolNodeSet, len(reqs)) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1, 1) - looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, 0, retryWait=1, timeout=timeout)) + looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, 0, retryWait=1, + timeout=timeout)) checkRequestCounts(txnPoolNodeSet, 0) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, - 3*CHK_FREQ + 1, 1) + 3*CHK_FREQ + 1, 1) - looper.run(eventually(chkChkpoints, txnPoolNodeSet, 2, 0, retryWait=1, timeout=timeout)) + looper.run(eventually(chkChkpoints, txnPoolNodeSet, 2, 0, retryWait=1, + timeout=timeout)) checkRequestCounts(txnPoolNodeSet, 1) diff --git a/plenum/test/cli/helper.py b/plenum/test/cli/helper.py index 31ad2b6671..34c30d37e4 100644 --- a/plenum/test/cli/helper.py +++ b/plenum/test/cli/helper.py @@ -193,7 +193,7 @@ def checkAllNodesUp(cli): for inst in [0, 1]: rep = node.replicas[inst] assert rep - pri = rep.primaryNames[0] + pri = rep.primaryName assert expected.format(nm=nm, pri=pri, inst=inst) in msgs @@ -227,8 +227,7 @@ def waitClientConnected(cli, nodeNames, clientName): Wait for moment when client connected to pool """ - fVal = util.getMaxFailures(len(nodeNames)) - timeout = waits.expectedClientConnectionTimeout(fVal) + timeout = waits.expectedClientToPoolConnectionTimeout(len(nodeNames)) cli.looper.run(eventually(checkClientConnected, cli, nodeNames, clientName, timeout=timeout)) @@ -243,11 +242,6 @@ def createClientAndConnect(cli, nodeNames, clientName): createNewKeyring(clientName, cli) cli.enterCmd("new key clientName{}".format("key")) - from plenum.common import util - - fVal = util.getMaxFailures(len(cli.nodeReg)) - timeout = waits.expectedClientConnectionTimeout(fVal) - waitClientConnected(cli, nodeNames, clientName) @@ -285,8 +279,9 @@ def checkRequest(cli, operation): # txnTimePattern = "'txnTime', \d+\.*\d*" # txnIdPattern = "'txnId', '" + txn['txnId'] + "'" txnTimePattern = "\'txnTime\': \d+\.*\d*" - txnIdPattern = "\'txnId\': '" + txn['txnId'] + "'" - assert re.search(txnIdPattern, printedReply['msg']) + # DEPR + # txnIdPattern = "\'txnId\': '" + txn['txnId'] + "'" + # assert re.search(txnIdPattern, printedReply['msg']) assert re.search(txnTimePattern, printedReply['msg']) assert printedStatus['msg'] == "Status: {}".format(status) return client, wallet diff --git a/plenum/test/cli/test_basic_client_commands.py b/plenum/test/cli/test_basic_client_commands.py index 22872e1730..ce6e12c8c3 100644 --- a/plenum/test/cli/test_basic_client_commands.py +++ b/plenum/test/cli/test_basic_client_commands.py @@ -1,6 +1,7 @@ from plenum.common.util import randomString from plenum.test.cli.helper import waitClientConnected + def testClientNames(cli, validNodeNames, createAllNodes): """ Test adding clients with valid and invalid names(prefixed with node names). @@ -28,7 +29,6 @@ def checkClientNotAddedWithNodeName(name): assert cName in cli.clients waitClientConnected(cli, validNodeNames, cName) - # Add clients with name same as a node name or starting with a node name for i, nm in enumerate(validNodeNames): # Adding client with name same as that of a node diff --git a/plenum/test/cli/test_cli_with_auction_req_plugin.py b/plenum/test/cli/test_cli_with_auction_req_plugin.py index e44f4a27c7..1baee318d9 100644 --- a/plenum/test/cli/test_cli_with_auction_req_plugin.py +++ b/plenum/test/cli/test_cli_with_auction_req_plugin.py @@ -3,6 +3,7 @@ import pytest +from plenum.test import waits from stp_core.loop.eventually import eventually from plenum.test.cli.helper import loadPlugin, assertNoClient, \ createClientAndConnect, waitRequestSuccess, waitBalanceChange @@ -29,7 +30,7 @@ def testReqForNonExistentClient(cli, loadAuctionReqPlugin, createAllNodes): # TODO: Have a test for non existent auction id -# @pytest.mark.skipif('sys.platform == "win32"', reason='SOV-457') +@pytest.mark.skipif('sys.platform == "win32"', reason='SOV-457') def testTransactions(cli, loadAuctionReqPlugin, createAllNodes, validNodeNames): nodeCount = len(validNodeNames) auctionId = str(uuid4()) @@ -38,74 +39,75 @@ def testTransactions(cli, loadAuctionReqPlugin, createAllNodes, validNodeNames): createClientAndConnect(cli, validNodeNames, name) jason, tyler, les, john, timothy = names + timeout = waits.expectedTransactionExecutionTime(nodeCount) cli.enterCmd("client {} start auction {}".format(jason, auctionId)) - waitRequestSuccess(cli, nodeCount * 1) + waitRequestSuccess(cli, nodeCount * 1, customTimeout=timeout) cli.enterCmd("client {} place bid 20 on {}".format(tyler, auctionId)) - waitRequestSuccess(cli, nodeCount * 2) + waitRequestSuccess(cli, nodeCount * 2, customTimeout=timeout) cli.enterCmd("client {} balance".format(tyler)) - waitRequestSuccess(cli, nodeCount * 3) + waitRequestSuccess(cli, nodeCount * 3, customTimeout=timeout) - waitBalanceChange(cli, nodeCount, 980) + waitBalanceChange(cli, nodeCount, 980, customTimeout=timeout) cli.enterCmd("client {} place bid 40 on {}".format(les, auctionId)) - waitRequestSuccess(cli, nodeCount * 4) + waitRequestSuccess(cli, nodeCount * 4, customTimeout=timeout) cli.enterCmd("client {} balance".format(tyler)) - waitRequestSuccess(cli, nodeCount * 5) - waitBalanceChange(cli, nodeCount, 1000) + waitRequestSuccess(cli, nodeCount * 5, customTimeout=timeout) + waitBalanceChange(cli, nodeCount, 1000, customTimeout=timeout) cli.enterCmd("client {} balance".format(les)) - waitRequestSuccess(cli, nodeCount * 6) - waitBalanceChange(cli, nodeCount, 960) + waitRequestSuccess(cli, nodeCount * 6, customTimeout=timeout) + waitBalanceChange(cli, nodeCount, 960, customTimeout=timeout) # This bid would fail so `success` would be false and thus success count # wont increase cli.enterCmd("client {} place bid 30 on {}".format(john, auctionId)) - waitRequestSuccess(cli, nodeCount * 6) + waitRequestSuccess(cli, nodeCount * 6, customTimeout=timeout) cli.enterCmd("client {} balance".format(john)) - waitRequestSuccess(cli, nodeCount * 7) - waitBalanceChange(cli, nodeCount * 2, 1000) + waitRequestSuccess(cli, nodeCount * 7, customTimeout=timeout) + waitBalanceChange(cli, nodeCount * 2, 1000, customTimeout=timeout) cli.enterCmd("client {} balance".format(les)) - waitRequestSuccess(cli, nodeCount * 8) - waitBalanceChange(cli, nodeCount * 2, 960) + waitRequestSuccess(cli, nodeCount * 8, customTimeout=timeout) + waitBalanceChange(cli, nodeCount * 2, 960, customTimeout=timeout) cli.enterCmd("client {} place bid 200 on {}".format(timothy, auctionId)) - waitRequestSuccess(cli, nodeCount * 9) + waitRequestSuccess(cli, nodeCount * 9, customTimeout=timeout) cli.enterCmd("client {} balance".format(timothy)) - waitRequestSuccess(cli, nodeCount * 10) - waitBalanceChange(cli, nodeCount, 800) + waitRequestSuccess(cli, nodeCount * 10, customTimeout=timeout) + waitBalanceChange(cli, nodeCount, 800, customTimeout=timeout) cli.enterCmd("client {} balance".format(les)) - waitRequestSuccess(cli, nodeCount * 11) - waitBalanceChange(cli, nodeCount * 3, 1000) + waitRequestSuccess(cli, nodeCount * 11, customTimeout=timeout) + waitBalanceChange(cli, nodeCount * 3, 1000, customTimeout=timeout) cli.enterCmd("client {} end auction {}".format(jason, auctionId)) - waitRequestSuccess(cli, nodeCount * 12) + waitRequestSuccess(cli, nodeCount * 12, customTimeout=timeout) cli.enterCmd("client {} place bid 300 on {}".format(john, auctionId)) - waitRequestSuccess(cli, nodeCount * 12) + waitRequestSuccess(cli, nodeCount * 12, customTimeout=timeout) cli.enterCmd("client {} balance".format(john)) - waitRequestSuccess(cli, nodeCount * 13) - waitBalanceChange(cli, nodeCount * 4, 1000) + waitRequestSuccess(cli, nodeCount * 13, customTimeout=timeout) + waitBalanceChange(cli, nodeCount * 4, 1000, customTimeout=timeout) cli.enterCmd("client {} balance".format(tyler)) - waitRequestSuccess(cli, nodeCount * 14) - waitBalanceChange(cli, nodeCount * 5, 1000) + waitRequestSuccess(cli, nodeCount * 14, customTimeout=timeout) + waitBalanceChange(cli, nodeCount * 5, 1000, customTimeout=timeout) cli.enterCmd("client {} balance".format(john)) - waitRequestSuccess(cli, nodeCount * 15) - waitBalanceChange(cli, nodeCount * 6, 1000) + waitRequestSuccess(cli, nodeCount * 15, customTimeout=timeout) + waitBalanceChange(cli, nodeCount * 6, 1000, customTimeout=timeout) cli.enterCmd("client {} balance".format(les)) - waitRequestSuccess(cli, nodeCount * 16) - waitBalanceChange(cli, nodeCount * 7, 1000) + waitRequestSuccess(cli, nodeCount * 16, customTimeout=timeout) + waitBalanceChange(cli, nodeCount * 7, 1000, customTimeout=timeout) cli.enterCmd("client {} balance".format(timothy)) - waitRequestSuccess(cli, nodeCount * 17) - waitBalanceChange(cli, nodeCount * 2, 800) + waitRequestSuccess(cli, nodeCount * 17, customTimeout=timeout) + waitBalanceChange(cli, nodeCount * 2, 800, customTimeout=timeout) diff --git a/plenum/test/cli/test_cli_with_bank_req_plugin.py b/plenum/test/cli/test_cli_with_bank_req_plugin.py index aea3d23a78..e9b48cb710 100644 --- a/plenum/test/cli/test_cli_with_bank_req_plugin.py +++ b/plenum/test/cli/test_cli_with_bank_req_plugin.py @@ -1,5 +1,6 @@ import pytest +from plenum.test import waits from plenum.test.cli.helper import \ waitRequestSuccess, waitBalanceChange, \ assertNoClient, loadPlugin, \ @@ -23,49 +24,51 @@ def testReqForNonExistentClient(cli, loadBankReqPlugin, createAllNodes): # @pytest.mark.skipif('sys.platform == "win32"', reason='SOV-457') def testTransactions(cli, loadBankReqPlugin, createAllNodes, validNodeNames): - numOfNodes = len(validNodeNames) + nodeCount = len(validNodeNames) createClientAndConnect(cli, validNodeNames, "Alice") createClientAndConnect(cli, validNodeNames, "Bob") + timeout = waits.expectedTransactionExecutionTime(nodeCount) + cli.enterCmd("client Alice credit 500 to Bob") - waitRequestSuccess(cli, numOfNodes) + waitRequestSuccess(cli, nodeCount, customTimeout=timeout) cli.enterCmd("client Alice balance") - waitRequestSuccess(cli, numOfNodes * 2) - waitBalanceChange(cli, numOfNodes, 500) + waitRequestSuccess(cli, nodeCount * 2, customTimeout=timeout) + waitBalanceChange(cli, nodeCount, 500, customTimeout=timeout) cli.enterCmd("client Bob balance") - waitRequestSuccess(cli, numOfNodes * 3) - waitBalanceChange(cli, numOfNodes, 1500) + waitRequestSuccess(cli, nodeCount * 3, customTimeout=timeout) + waitBalanceChange(cli, nodeCount, 1500, customTimeout=timeout) cli.enterCmd("client Bob credit 10 to Alice") - waitRequestSuccess(cli, numOfNodes * 4) + waitRequestSuccess(cli, nodeCount * 4, customTimeout=timeout) cli.enterCmd("client Bob balance") - waitRequestSuccess(cli, numOfNodes * 5) - waitBalanceChange(cli, numOfNodes, 1490) + waitRequestSuccess(cli, nodeCount * 5, customTimeout=timeout) + waitBalanceChange(cli, nodeCount, 1490, customTimeout=timeout) cli.enterCmd("client Bob credit 100 to Alice") - waitRequestSuccess(cli, numOfNodes * 6) + waitRequestSuccess(cli, nodeCount * 6, customTimeout=timeout) cli.enterCmd("client Alice balance") - waitRequestSuccess(cli, numOfNodes * 7) - waitBalanceChange(cli, numOfNodes, 610) + waitRequestSuccess(cli, nodeCount * 7, customTimeout=timeout) + waitBalanceChange(cli, nodeCount, 610, customTimeout=timeout) cli.enterCmd("client Bob balance") - waitRequestSuccess(cli, numOfNodes * 8) - waitBalanceChange(cli, numOfNodes, 1390) + waitRequestSuccess(cli, nodeCount * 8, customTimeout=timeout) + waitBalanceChange(cli, nodeCount, 1390, customTimeout=timeout) createClientAndConnect(cli, validNodeNames, "Carol") cli.enterCmd("client Carol credit 50 to Bob") - waitRequestSuccess(cli, numOfNodes * 9) + waitRequestSuccess(cli, nodeCount * 9, customTimeout=timeout) cli.enterCmd("client Bob balance") - waitRequestSuccess(cli, numOfNodes * 10) - waitBalanceChange(cli, numOfNodes, 1440) + waitRequestSuccess(cli, nodeCount * 10, customTimeout=timeout) + waitBalanceChange(cli, nodeCount, 1440, customTimeout=timeout) cli.enterCmd("client Carol balance") - waitRequestSuccess(cli, numOfNodes * 11) - waitBalanceChange(cli, numOfNodes, 950) + waitRequestSuccess(cli, nodeCount * 11, customTimeout=timeout) + waitBalanceChange(cli, nodeCount, 950, customTimeout=timeout) diff --git a/plenum/test/cli/test_status_command.py b/plenum/test/cli/test_status_command.py index bee02c7073..96d5c80c30 100644 --- a/plenum/test/cli/test_status_command.py +++ b/plenum/test/cli/test_status_command.py @@ -118,8 +118,8 @@ def testStatusAfterClientAdded(cli, validNodeNames, createAllNodes): clientName = "Joe" cli.enterCmd("new client {}".format(clientName)) - fVal = util.getMaxFailures(len(validNodeNames)) - connectionTimeout = waits.expectedClientConnectionTimeout(fVal) + connectionTimeout = \ + waits.expectedClientToPoolConnectionTimeout(len(validNodeNames)) waitClientConnected(cli, validNodeNames, clientName) diff --git a/plenum/test/client/test_client.py b/plenum/test/client/test_client.py index a0faaee1da..f42a4b5600 100644 --- a/plenum/test/client/test_client.py +++ b/plenum/test/client/test_client.py @@ -61,9 +61,6 @@ def testClientShouldNotBeAbleToConnectToNodesNodeStack(pool): """ async def go(ctx): - # for n in ctx.nodeset: - # n.nodestack.keep.auto = AutoMode.never - nodestacksVersion = {k: v.ha for k, v in ctx.nodeset.nodeReg.items()} client1, _ = genTestClient(nodeReg=nodestacksVersion, tmpdir=ctx.tmpdir) for node in ctx.nodeset: @@ -191,7 +188,7 @@ def testReplyWhenRepliesFromExactlyFPlusOneNodesAreSame(looper, # change two responses to something different for i in range(2): msg = next(replies) - msg[f.RESULT.nm][TXN_ID] = str(i) + "Some random id" + msg[f.RESULT.nm][f.SIG.nm] = str(i) + "Some random id" checkResponseCorrectnessFromNodes(client1.inBox, request.reqId, F) @@ -208,7 +205,8 @@ def testReplyWhenRequestAlreadyExecuted(looper, nodeSet, client1, sent1): originalRequestResponsesLen = nodeCount * 2 duplicateRequestRepliesLen = nodeCount # for a duplicate request we need to - client1.nodestack._enqueueIntoAllRemotes(sent1, None) + serializedPayload = client1.nodestack.signAndSerialize(sent1, None) + client1.nodestack._enqueueIntoAllRemotes(serializedPayload, None) def chk(): assertLength([response for response in client1.inBox diff --git a/plenum/test/client/test_client_authn.py b/plenum/test/client/test_client_authn.py index 46f14db08a..7c6f42851e 100644 --- a/plenum/test/client/test_client_authn.py +++ b/plenum/test/client/test_client_authn.py @@ -16,7 +16,7 @@ def cli(): @pytest.fixture(scope="module") def sa(cli): sa = SimpleAuthNr() - sa.addClient(cli.identifier, cli.verkey) + sa.addIdr(cli.identifier, cli.verkey) return sa @@ -46,7 +46,7 @@ def testMessageModified(sa, cli, msg, sig): def testAnotherAuthenticatorCanAuthenticate(sa, cli, msg, sig): sa2 = SimpleAuthNr() - sa2.addClient(cli.identifier, cli.verkey) + sa2.addIdr(cli.identifier, cli.verkey) sa.authenticate(msg, idr, sig) diff --git a/plenum/test/client/test_client_retry.py b/plenum/test/client/test_client_retry.py index 7da11da9a6..b30eed9be6 100644 --- a/plenum/test/client/test_client_retry.py +++ b/plenum/test/client/test_client_retry.py @@ -2,11 +2,12 @@ from functools import partial import pytest +import time from stp_core.loop.eventually import eventually, eventuallyAll from plenum.common.request import Request from plenum.common.types import Reply, RequestNack -from plenum.test.helper import sendRandomRequest, checkReqAck, waitReplyCount +from plenum.test.helper import sendRandomRequest, checkReqAck, wait_for_replies from plenum.test import waits whitelist = ['AlphaC unable to send message', ] @@ -47,13 +48,13 @@ def chkAcks(): timeout = waits.expectedReqAckQuorumTime() looper.run(eventually(chkAcks, retryWait=1, timeout=timeout)) idr, reqId = req.key - waitReplyCount(looper, client1, idr, reqId, 4) + wait_for_replies(looper, client1, idr, reqId, 4) def testClientRetryRequestWhenReplyNotReceived(looper, nodeSet, client1, wallet1, tconf): """ - A node say Alpha sends ACK but doesn't send REPLY. The connect resends the + A node say Alpha sends ACK but doesn't send REPLY. The client resends the request and gets REPLY """ @@ -72,10 +73,18 @@ def skipReplyOnce(msg, remoteName): req = sendRandomRequest(wallet1, client1) coros = [partial(checkReqAck, client1, node, *req.key) for node in nodeSet] timeout = waits.expectedReqAckQuorumTime() + start = time.perf_counter() looper.run(eventuallyAll(*coros, retryWait=.5, totalTimeout=timeout)) idr, reqId = req.key - waitReplyCount(looper, client1, idr, reqId, 3) - waitReplyCount(looper, client1, idr, reqId, 4) + # Client should get only 3 replies till the retry timeout since one node + # is not sending any replies + wait_for_replies(looper, client1, idr, reqId, 3, + custom_timeout=tconf.CLIENT_REPLY_TIMEOUT-1) + end = time.perf_counter() + # Client should wait till the retry timeout but after that should + # get the reply from the remaining node + looper.runFor(tconf.CLIENT_REPLY_TIMEOUT-(end-start)) + wait_for_replies(looper, client1, idr, reqId, 4) def testClientNotRetryRequestWhenReqnackReceived(looper, nodeSet, client1, @@ -117,17 +126,32 @@ def onlyTransNack(msg, remoteName): assert client1.spylog.count(client1.resendRequests.__name__) == totalResends idr, reqId = req.key - waitReplyCount(looper, client1, idr, reqId, 3) + wait_for_replies(looper, client1, idr, reqId, 3) alpha.clientMsgRouter.routes[Request] = origProcReq alpha.transmitToClient = origTrans +@pytest.fixture(scope="function") +def withFewerRetryReq(tconf, tdir, request): + oldRetryReplyCount = tconf.CLIENT_MAX_RETRY_REPLY + oldRetryReplyTimeout = tconf.CLIENT_REPLY_TIMEOUT + tconf.CLIENT_MAX_RETRY_REPLY = 3 + tconf.CLIENT_REPLY_TIMEOUT = 5 + + def reset(): + tconf.CLIENT_MAX_RETRY_REPLY = oldRetryReplyCount + tconf.CLIENT_REPLY_TIMEOUT = oldRetryReplyTimeout + + request.addfinalizer(reset) + return tconf + + def testClientNotRetryingRequestAfterMaxTriesDone(looper, nodeSet, client1, wallet1, - tconf): + withFewerRetryReq): """ A client sends Request to a node but the node never responds to client. The client resends the request but only the number of times defined in its @@ -148,15 +172,19 @@ def dontTransmitReply(msg, remoteName): req = sendRandomRequest(wallet1, client1) # Wait for more than REPLY timeout - timeout = waits.expectedTransactionExecutionTime(len(nodeSet)) + \ - tconf.CLIENT_REQACK_TIMEOUT * tconf.CLIENT_MAX_RETRY_REPLY + # +1 because we have to wait one more retry timeout to make sure what + # client cleaned his buffers (expectingAcksFor, expectingRepliesFor) + retryTime = withFewerRetryReq.CLIENT_REPLY_TIMEOUT * \ + (withFewerRetryReq.CLIENT_MAX_RETRY_REPLY + 1) + timeout = waits.expectedTransactionExecutionTime(len(nodeSet)) + retryTime + looper.runFor(timeout) idr, reqId = req.key - waitReplyCount(looper, client1, idr, reqId, 3) + wait_for_replies(looper, client1, idr, reqId, 3) assert client1.spylog.count(client1.resendRequests.__name__) == \ - (totalResends + tconf.CLIENT_MAX_RETRY_REPLY) + (totalResends + withFewerRetryReq.CLIENT_MAX_RETRY_REPLY) assert req.key not in client1.expectingAcksFor assert req.key not in client1.expectingRepliesFor alpha.processRequest = origTrans diff --git a/plenum/test/client/test_client_sends_to_f_plus_one_nodes.py b/plenum/test/client/test_client_sends_to_f_plus_one_nodes.py index bb623dad65..a532a904ca 100644 --- a/plenum/test/client/test_client_sends_to_f_plus_one_nodes.py +++ b/plenum/test/client/test_client_sends_to_f_plus_one_nodes.py @@ -1,8 +1,10 @@ from plenum.test.client.conftest import passThroughReqAcked1 -from plenum.client.client import Client +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies from plenum.test.malicious_behaviors_client import \ genDoesntSendRequestToSomeNodes +from plenum.test.node_catchup.helper import waitNodeDataEquality + nodeCount = 4 clientFault = genDoesntSendRequestToSomeNodes("AlphaC") @@ -10,5 +12,36 @@ def testReplyWhenRequestSentToMoreThanFPlusOneNodes(looper, nodeSet, - fClient: Client, replied1): - pass + fClient, replied1, + wallet1): + """ + Alpha would not be sent request but other nodes will be, so Alpha will + just rely on propagates from other nodes + """ + alpha = nodeSet.Alpha + other_nodes = [n for n in nodeSet if n != alpha] + + def chk(req_count=1): + for node in nodeSet: + prc_req = node.processRequest.__name__ + prc_ppg = node.processPropagate.__name__ + if node != alpha: + # All nodes except alpha will receive requests from client + assert node.spylog.count(prc_req) == req_count + else: + # Alpha will not receive requests from client + assert node.spylog.count(prc_req) == 0 + + # All nodes will get propagates from others + assert node.spylog.count(prc_ppg) == req_count*(nodeCount - 1) + + # Ledger is same for all nodes + waitNodeDataEquality(looper, alpha, *other_nodes) + chk(1) + + more_reqs_count = 5 + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, fClient, + more_reqs_count, 1) + # Ledger is same for all nodes + waitNodeDataEquality(looper, alpha, *other_nodes) + chk(6) # Since one request is already sent as part of `replied1` diff --git a/plenum/test/conftest.py b/plenum/test/conftest.py index 2fbc854e58..970560f02a 100644 --- a/plenum/test/conftest.py +++ b/plenum/test/conftest.py @@ -9,6 +9,8 @@ from contextlib import ExitStack from copy import copy from functools import partial + +import time from typing import Dict, Any from plenum.test import waits @@ -28,7 +30,7 @@ from plenum.common.config_util import getConfig from stp_core.loop.eventually import eventually, eventuallyAll from plenum.common.exceptions import BlowUp -from stp_core.common.log import getlogger +from stp_core.common.log import getlogger, Logger from stp_core.loop.looper import Looper, Prodable from plenum.common.constants import TXN_TYPE, DATA, NODE, ALIAS, CLIENT_PORT, \ CLIENT_IP, NODE_PORT, NYM, CLIENT_STACK_SUFFIX, PLUGIN_BASE_DIR_PATH @@ -47,6 +49,7 @@ from plenum.test.test_node import TestNode, TestNodeSet, Pool, \ checkNodesConnected, ensureElectionsDone, genNodeReg +Logger.setLogLevel(logging.DEBUG) logger = getlogger() config = getConfig() @@ -93,6 +96,25 @@ def keyfunc(_): pytest.fail('\n'.join(to_prints)) +@pytest.fixture(scope="function", autouse=True) +def limitTestRunningTime(tconf): + st = time.time() + yield + runningTime = time.time() - st + if runningTime > tconf.TestRunningTimeLimitSec: + pytest.fail( + 'The running time of each test is limited by {} sec ' + '(actually the test has taken {:2.1f} sec).\n' + 'In order to make the test passed there are two options:\n' + '\t1. Make the test faster (for example: override default ' + 'timeouts ONLY for the tests, do not wait ' + '`with pytest.raises(..)` and so on)\n' + '\t2. Override the `limitTestRunningTime` fixture ' + 'for the test module.\n' + 'Firstly, try to use the option #1.' + ''.format(tconf.TestRunningTimeLimitSec, runningTime)) + + @pytest.fixture(scope="session", autouse=True) def setResourceLimits(): try: @@ -138,8 +160,9 @@ def getValueFromModule(request, name: str, default: Any = None): PLUGIN_BASE_DIR_PATH: testPluginBaseDirPath, PLUGIN_TYPE_STATS_CONSUMER: "stats_consumer" }, - 'UpdateGenesisPoolTxnFile': False, - 'EnsureLedgerDurability': False + 'EnsureLedgerDurability': False, + 'Max3PCBatchSize': 1, + 'DELTA': .8 } @@ -150,8 +173,6 @@ def allPluginsPath(): @pytest.fixture(scope="module") def keySharedNodes(startedNodes): - # for n in startedNodes: - # n.startKeySharing() return startedNodes @@ -191,7 +212,8 @@ def logcapture(request, whitelist, concerningLogLevels): '.+ failed to ping .+ at', 'discarding message (NOMINATE|PRIMARY)', '.+ rid .+ has been removed', - 'last try...' + 'last try...', + 'has uninitialised socket' ] wlfunc = inspect.isfunction(whitelist) @@ -306,7 +328,7 @@ def ensureView(nodeSet, looper, up): @pytest.fixture("module") -def delayedPerf(nodeSet): +def delayed_perf_chk(nodeSet): for node in nodeSet: node.delayCheckPerformance(20) @@ -350,7 +372,7 @@ def reqAcked1(looper, nodeSet, client1, sent1, faultyNodes): numerOfNodes = len(nodeSet) # Wait until request received by all nodes - propTimeout = waits.expectedClientToNodeMessageDeliveryTime(numerOfNodes) + propTimeout = waits.expectedClientToPoolRequestDeliveryTime(numerOfNodes) coros = [partial(checkLastClientReqForNode, node, sent1) for node in nodeSet] looper.run(eventuallyAll(*coros, diff --git a/plenum/test/delayers.py b/plenum/test/delayers.py index 550162be82..8ad4fffdad 100644 --- a/plenum/test/delayers.py +++ b/plenum/test/delayers.py @@ -1,7 +1,8 @@ from typing import Iterable from plenum.common.types import f, Propagate, PrePrepare, \ - Prepare, Commit, InstanceChange, LedgerStatus, ConsistencyProof, CatchupReq + Prepare, Commit, InstanceChange, LedgerStatus, ConsistencyProof, CatchupReq, \ + Nomination, CatchupRep from plenum.common.constants import OP_FIELD_NAME from plenum.common.util import getCallableName from plenum.test.test_client import TestClient @@ -62,6 +63,11 @@ def inner(action_pair): return inner +def nom_delay(delay: float): + # Delayer of NOMINATE requests + return delayerMsgTuple(delay, Nomination) + + def ppgDelay(delay: float): # Delayer of PROPAGATE requests return delayerMsgTuple(delay, Propagate) @@ -97,11 +103,16 @@ def cpDelay(delay: float): return delayerMsgTuple(delay, ConsistencyProof) -def crDelay(delay: float): +def cqDelay(delay: float): # Delayer of CATCHUP_REQ requests return delayerMsgTuple(delay, CatchupReq) +def cr_delay(delay: float): + # Delayer of CATCHUP_REP requests + return delayerMsgTuple(delay, CatchupRep) + + def delay(what, frm, to, howlong): from plenum.test.test_node import TestNode diff --git a/plenum/test/helper.py b/plenum/test/helper.py index bea42cdf94..0cde3fba16 100644 --- a/plenum/test/helper.py +++ b/plenum/test/helper.py @@ -4,38 +4,36 @@ import string from _signal import SIGINT from functools import partial -from itertools import permutations +from itertools import permutations, combinations from shutil import copyfile from sys import executable from time import sleep - -from psutil import Popen from typing import Tuple, Iterable, Dict, Optional, NamedTuple, \ List, Any, Sequence from typing import Union +from psutil import Popen + from plenum.client.client import Client from plenum.client.wallet import Wallet -from stp_core.common.log import getlogger -from stp_core.loop.looper import Looper +from plenum.common.constants import REPLY, REQACK, REQNACK, REJECT, OP_FIELD_NAME from plenum.common.request import Request -from plenum.common.constants import REPLY, REQACK, TXN_ID, REQNACK, OP_FIELD_NAME from plenum.common.types import Reply, f, PrePrepare from plenum.common.util import getMaxFailures, \ checkIfMoreThanFSameItems from plenum.config import poolTransactionsFile, domainTransactionsFile -from stp_core.loop.eventually import eventuallyAll, eventually - -from stp_core.network.util import checkPortAvailable from plenum.server.node import Node +from plenum.test import waits from plenum.test.msgs import randomMsg from plenum.test.spy_helpers import getLastClientReqReceivedForNode, getAllArgs, \ getAllReturnVals from plenum.test.test_client import TestClient, genTestClient from plenum.test.test_node import TestNode, TestReplica, TestNodeSet, \ - checkPoolReady, checkNodesConnected, ensureElectionsDone, NodeRef -from plenum.test import waits - + checkNodesConnected, ensureElectionsDone, NodeRef +from stp_core.common.log import getlogger +from stp_core.loop.eventually import eventuallyAll, eventually +from stp_core.loop.looper import Looper +from stp_core.network.util import checkPortAvailable DelayRef = NamedTuple("DelayRef", [ ("op", Optional[str]), @@ -109,7 +107,13 @@ def waitForSufficientRepliesForRequests(looper, timeoutPerRequest = customTimeoutPerReq or \ waits.expectedTransactionExecutionTime(nodeCount) - totalTimeout = timeoutPerRequest * len(requestIds) + # here we try to take into account what timeout for execution + # N request - totalTimeout should be in + # timeoutPerRequest < totalTimeout < timeoutPerRequest * N + # we cannot just take (timeoutPerRequest * N) because it is so huge. + # (for timeoutPerRequest=5 and N=10, totalTimeout=50sec) + # lets start with some simple formula: + totalTimeout = (1 + len(requestIds) / 10) * timeoutPerRequest coros = [] for requestId in requestIds: @@ -145,8 +149,8 @@ def checkResponseCorrectnessFromNodes(receivedMsgs: Iterable, reqId: int, """ the client must get at least :math:`2f+1` responses """ - msgs = [(msg[f.RESULT.nm][f.REQ_ID.nm], msg[f.RESULT.nm][TXN_ID]) for msg in - getRepliesFromClientInbox(receivedMsgs, reqId)] + msgs = [(msg[f.RESULT.nm][f.REQ_ID.nm], msg[f.RESULT.nm][f.IDENTIFIER.nm]) + for msg in getRepliesFromClientInbox(receivedMsgs, reqId)] groupedMsgs = {} for tpl in msgs: groupedMsgs[tpl] = groupedMsgs.get(tpl, 0) + 1 @@ -336,7 +340,7 @@ def requestReturnedToNode(node: TestNode, identifier: str, reqId: int, instId: int): params = getAllArgs(node, node.processOrdered) # Skipping the view no and time from each ordered request - recvdOrderedReqs = [p['ordered'][:1] + p['ordered'][2:-1] for p in params] + recvdOrderedReqs = [(p['ordered'].instId, *p['ordered'].reqIdr[0]) for p in params] expected = (instId, identifier, reqId) return expected in recvdOrderedReqs @@ -347,27 +351,28 @@ def checkRequestReturnedToNode(node: TestNode, identifier: str, reqId: int, def checkPrePrepareReqSent(replica: TestReplica, req: Request): - prePreparesSent = getAllArgs(replica, replica.doPrePrepare) - expected = req.reqDigest - assert expected in [p["reqDigest"] for p in prePreparesSent] + prePreparesSent = getAllArgs(replica, replica.sendPrePrepare) + expectedDigest = TestReplica.batchDigest([req]) + assert expectedDigest in [p["ppReq"].digest for p in prePreparesSent] + assert [(req.identifier, req.reqId)] in \ + [p["ppReq"].reqIdr for p in prePreparesSent] def checkPrePrepareReqRecvd(replicas: Iterable[TestReplica], expectedRequest: PrePrepare): for replica in replicas: params = getAllArgs(replica, replica.canProcessPrePrepare) - assert expectedRequest[:-1] in [p['pp'][:-1] for p in params] + assert expectedRequest.reqIdr in [p['pp'].reqIdr for p in params] def checkPrepareReqSent(replica: TestReplica, identifier: str, reqId: int): - paramsList = getAllArgs(replica, replica.canSendPrepare) + paramsList = getAllArgs(replica, replica.canPrepare) rv = getAllReturnVals(replica, - replica.canSendPrepare) - for params in paramsList: - req = params['request'] - assert req.identifier == identifier - assert req.reqId == reqId - assert all(rv) + replica.canPrepare) + assert [(identifier, reqId)] in \ + [p["ppReq"].reqIdr for p in paramsList] + idx = [p["ppReq"].reqIdr for p in paramsList].index([(identifier, reqId)]) + assert rv[idx] def checkSufficientPrepareReqRecvd(replica: TestReplica, viewNo: int, @@ -418,12 +423,14 @@ def checkReplyCount(client, idr, reqId, count): senders.add(sdr) assertLength(senders, count) -def waitReplyCount(looper, client, idr, reqId, count): - numOfNodes = len(client.nodeReg) - timeout = waits.expectedTransactionExecutionTime(numOfNodes) + +def wait_for_replies(looper, client, idr, reqId, count, custom_timeout=None): + timeout = custom_timeout or waits.expectedTransactionExecutionTime( + len(client.nodeReg)) looper.run(eventually(checkReplyCount, client, idr, reqId, count, timeout=timeout)) + def checkReqNackWithReason(client, reason: str, sender: str): found = False for msg, sdr in client.inBox: @@ -431,9 +438,58 @@ def checkReqNackWithReason(client, reason: str, sender: str): and sdr == sender: found = True break + assert found, "there is no Nack with reason: {}".format(reason) + + +def wait_negative_resp(looper, client, reason, sender, timeout, chk_method): + return looper.run(eventually(chk_method, + client, + reason, + sender, + timeout=timeout)) + + +def waitReqNackWithReason(looper, client, reason: str, sender: str): + timeout = waits.expectedReqNAckQuorumTime() + return wait_negative_resp(looper, client, reason, sender, timeout, + checkReqNackWithReason) + + +def checkRejectWithReason(client, reason: str, sender: str): + found = False + for msg, sdr in client.inBox: + if msg[OP_FIELD_NAME] == REJECT and reason in msg.get(f.REASON.nm, "")\ + and sdr == sender: + found = True + break assert found +def waitRejectWithReason(looper, client, reason: str, sender: str): + timeout = waits.expectedReqRejectQuorumTime() + return wait_negative_resp(looper, client, reason, sender, timeout, + checkRejectWithReason) + + +def ensureRejectsRecvd(looper, nodes, client, reason, timeout=5): + for node in nodes: + looper.run(eventually(checkRejectWithReason, client, reason, + node.clientstack.name, retryWait=1, + timeout=timeout)) + + +def waitReqNackFromPoolWithReason(looper, nodes, client, reason): + for node in nodes: + waitReqNackWithReason(looper, client, reason, + node.clientstack.name) + + +def waitRejectFromPoolWithReason(looper, nodes, client, reason): + for node in nodes: + waitRejectWithReason(looper, client, reason, + node.clientstack.name) + + def checkViewNoForNodes(nodes: Iterable[TestNode], expectedViewNo: int = None): """ Checks if all the given nodes have the expected view no @@ -461,7 +517,7 @@ def waitForViewChange(looper, nodeSet, expectedViewNo=None, customTimeout = None Raises exception when time is out """ - timeout = customTimeout or waits.expectedViewChangeTime(len(nodeSet)) + timeout = customTimeout or waits.expectedPoolElectionTimeout(len(nodeSet)) return looper.run(eventually(checkViewNoForNodes, nodeSet, expectedViewNo, @@ -532,10 +588,22 @@ def checkLedgerEquality(ledger1, ledger2): def checkAllLedgersEqual(*ledgers): - for l1, l2 in permutations(ledgers, 2): + for l1, l2 in combinations(ledgers, 2): checkLedgerEquality(l1, l2) +def checkStateEquality(state1, state2): + assertEquality(state1.as_dict, state2.as_dict) + assertEquality(state1.committedHeadHash, state2.committedHeadHash) + assertEquality(state1.committedHead, state2.committedHead) + + +def check_seqno_db_equality(db1, db2): + assert db1.size == db2.size + assert {bytes(k): bytes(v) for k, v in db1._keyValueStorage.iter()} == \ + {bytes(k): bytes(v) for k, v in db2._keyValueStorage.iter()} + + def randomText(size): return ''.join(random.choice(string.ascii_letters) for _ in range(size)) @@ -599,6 +667,7 @@ def run_script(script, *args): p.wait(timeout=1) assert p.poll() == 0, 'script failed' + def viewNoForNodes(nodes): viewNos = {node.viewNo for node in nodes} assert 1 == len(viewNos) diff --git a/plenum/test/input_validation/__init__.py b/plenum/test/input_validation/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/plenum/test/input_validation/conftest.py b/plenum/test/input_validation/conftest.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/plenum/test/input_validation/fields.py b/plenum/test/input_validation/fields.py new file mode 100644 index 0000000000..0908fa053d --- /dev/null +++ b/plenum/test/input_validation/fields.py @@ -0,0 +1,147 @@ +from plenum.common.request import SafeRequest +from plenum.test.input_validation.helper import * + +name_field = NonEmptyStringField('name') + +view_no_field = PositiveNumberField('viewNo') + +inst_id_field = PositiveNumberField('instId') + +ord_seq_no_field = PositiveNumberField('ordSeqNo') + +round_field = PositiveNumberField('round') + +tie_among_field = ListField('tieAmong', TieAmongField()) + +req_idr_field = RequestIdrField("reqIdr") + +pp_seq_no_field = PositiveNumberField('ppSeqNo') + +pp_time_field = TimestampField('ppTime') + +ledger_id_field = LedgerIdFiled("ledgerId") + +state_root_field = MerkleRootField('stateRootHash') + +txn_root_hash_field = MerkleRootField("txnRootHash") + +sender_client_field = NonEmptyStringField('senderClient') + +discarded_field = PositiveNumberField('discarded') + +digest_field = NonEmptyStringField('digest') + +reason_field = PositiveNumberField('reason') + +ord_seq_nos_field = ListField('ordSeqNos', PositiveNumberField()) + +seq_no_start_field = PositiveNumberField('seqNoStart') + +seq_no_stop_field = PositiveNumberField('seqNoEnd') + +txn_seq_no_field = PositiveNumberField('txnSeqNo') + +merkle_root_field = MerkleRootField('merkleRoot') + +old_merkle_root_field = MerkleRootField('oldMerkleRoot') + +new_merkle_root_field = MerkleRootField('newMerkleRoot') + +hashes_field = ListField('hashes', HexString64Field()) + +catchup_till_field = PositiveNumberField('catchupTill') + +cons_proof_field = ListField('consProof', HexString64Field()) + +identifier_field = IdentifierField('identifier') + +req_id_field = PositiveNumberField('reqId') + +signature_field = SignatureField('signature') + +node_port_field = NetworkPortField('node_port') + +client_port_field = NetworkPortField('client_port') + +node_ip_field = NetworkIpAddressField('node_ip') + +client_ip_field = NetworkIpAddressField('client_ip') + +alias_field = NonEmptyStringField('alias') + +services_field = ServicesNodeOperation('services') + +dest_field = IdentifierField('dest') + +verkey_field = VerkeyField('verkey') + +role_field = RoleField('role') + + +# creates node operation field +client_node_op_data = MessageDescriptor( + dict, + fields=[ + node_port_field, + client_port_field, + node_ip_field, + client_ip_field, + alias_field, + services_field, + ], + name='data', + optional_fields=(node_port_field, client_port_field, node_ip_field, + client_ip_field, alias_field, services_field), + ) + + +def create_nym_op(name=None): + return MessageDescriptor( + dict, + fields=[ + ConstantField('type', '1'), + alias_field, + verkey_field, + dest_field, + role_field, + ], + name=name, + ) + + +def create_node_op(name=None): + return MessageDescriptor( + dict, + fields=[ + ConstantField('type', '0'), + dest_field, + client_node_op_data, + ], + name=name + ) + + +def build_client_request_message(op_field, name=None): + return MessageDescriptor( + klass=SafeRequest, + fields=[ + identifier_field, + req_id_field, + op_field, + signature_field, + ], + optional_fields=(signature_field,), + name=name + ) + + +# check complex field using NODE op +node_operation_field = create_node_op('operation') + +nym_operation_field = create_nym_op('operation') + +client_request_field = build_client_request_message(create_node_op('operation'), 'request') + +tnxs_field = ListField('txns', build_client_request_message(create_node_op('operation'))) + +messages_field = ListField('messages', build_client_request_message(create_node_op('operation'))) diff --git a/plenum/test/input_validation/helper.py b/plenum/test/input_validation/helper.py new file mode 100644 index 0000000000..bd44bf0e2b --- /dev/null +++ b/plenum/test/input_validation/helper.py @@ -0,0 +1,319 @@ +from collections import namedtuple +from copy import deepcopy + +import itertools + + +class TestCases: + + @property + def positive_test_cases(self): + raise NotImplementedError + + @property + def negative_test_cases(self): + raise NotImplementedError + + +class TestFieldBase(TestCases): + + def __init__(self, name=None): + self.name = name + + @property + def field_type(self): + raise NotImplementedError + + +class ConstantField(TestFieldBase): + field_types = None + + def __init__(self, name, value): + self.value = value + super().__init__(name) + + @property + def negative_test_cases(self): + return [] + + @property + def positive_test_cases(self): + return [self.value] + + +class PositiveNumberField(TestFieldBase): + negative_test_cases = (-1,) + positive_test_cases = (0, 1) + field_types = (int,) + + +class NonEmptyStringField(TestFieldBase): + negative_test_cases = ('',) + positive_test_cases = ('foo',) + field_types = (str,) + + +class HexString64Field(TestFieldBase): + # TODO implement + negative_test_cases = ( + #'', + #'fba333c13994f63edd900cdc625b88d0dcee6dda7df2c6e9b5bcd5c1072c04f', # 63 characters + #'77fba333c13994f63edd900cdc625b88d0dcee6dda7df2c6e9b5bcd5c1072c04f', # 65 characters + #'xfba333c13994f63edd900cdc625b88d0dcee6dda7df2c6e9b5bcd5c1072c04f', # first char is 'x' + ) + positive_test_cases = ( + '7fba333c13994f63edd900cdc625b88d0dcee6dda7df2c6e9b5bcd5c1072c04f', # lower case + '7FBA333C13994F63EDD900CDC625B88D0DCEE6DDA7DF2C6E9B5BCD5C1072C04F' # upper case + ) + field_types = (str,) + + +class SignatureField(TestFieldBase): + # TODO implement + negative_test_cases = () + positive_test_cases = ( + '2JAVzLWFWxAC6anXKaBZAsKDCoJn7y6z8Q3AjxsrDn7' + 'U2NRRcjiCVpexhh6urx2Uc8HCmdW7U1pmiCLNjePEBMkR' + ) + field_types = (str,) + + +class MerkleRootField(TestFieldBase): + negative_test_cases = () + positive_test_cases = ('47DEQpj8HBSa+\\/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=',) + field_types = (str,) + pass + # TODO implement + # negative_test_cases = ( + # '', + # '7DEQpj8HBSa+\\/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=', # 43 characters + # ) + # positive_test_cases = ( + # '47DEQpj8HBSa+\\/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=' + # ) + # field_types = str + + +class TimestampField(TestFieldBase): + negative_test_cases = (-1,) + positive_test_cases = (0, 1492619799822.973,) + field_types = (float, int) + + +class ListField(TestFieldBase): + field_types = (list,) + + def __init__(self, name, inner_field): + super().__init__(name) + self.inner_field = inner_field + + @property + def negative_test_cases(self): + values = [] + for val in self.inner_field.negative_test_cases: + values.append(list(self.inner_field.positive_test_cases) + [val]) + return values + + @property + def positive_test_cases(self): + return [self.inner_field.positive_test_cases] + + +class LedgerIdFiled(TestFieldBase): + negative_test_cases = (-1, 2, 3) + positive_test_cases = (0, 1) + field_types = (int,) + + +class IdrField(NonEmptyStringField): + # TODO Only non empty string? + pass + + +class RequestIdrField(TestFieldBase): + field_types = (list,) + idr_field = IdrField() + ts_field = TimestampField() + + @property + def negative_test_cases(self): + return [ + [[self.idr_field.positive_test_cases[0], self.ts_field.negative_test_cases[0]]], + [[self.idr_field.negative_test_cases[0], self.ts_field.positive_test_cases[0]]], + ] + + @property + def positive_test_cases(self): + return [ + [[self.idr_field.positive_test_cases[0], self.ts_field.positive_test_cases[0]]], + ] + + +class TieAmongField(TestFieldBase): + field_types = (list,) + name_field = NonEmptyStringField() + ts_field = TimestampField() + + @property + def negative_test_cases(self): + return [ + [self.name_field.positive_test_cases[0], self.ts_field.negative_test_cases[0]], + [self.name_field.negative_test_cases[0], self.ts_field.positive_test_cases[0]], + ] + + @property + def positive_test_cases(self): + return [ + [self.name_field.positive_test_cases[0], self.ts_field.positive_test_cases[0]], + ] + + +class IdentifierField(NonEmptyStringField): + # TODO NonEmptyStringField definitely not enough + pass + + +class NetworkPortField(TestFieldBase): + field_types = (int,) + + @property + def negative_test_cases(self): + return -1, 65535 + 1 + + @property + def positive_test_cases(self): + return 0, 9700, 65535 + + +class NetworkIpAddressField(TestFieldBase): + field_types = (str,) + + @property + def negative_test_cases(self): + return 'x', '0.0.0.0', '127.0.0', '256.0.0.1', 'x001:db8:85a3::8a2e:370:7334' + + @property + def positive_test_cases(self): + return '8.8.8.8', '127.0.0.1', '2001:db8:85a3::8a2e:370:7334' + + +class ServicesNodeOperation(TestFieldBase): + field_types = (list,) + VALIDATOR = 'VALIDATOR' + OBSERVER = 'OBSERVER' + + @property + def negative_test_cases(self): + return [ + ['foo'], + [self.VALIDATOR, 'foo'], + ] + + @property + def positive_test_cases(self): + return [ + [], + [self.VALIDATOR], + [self.VALIDATOR, self.VALIDATOR], + ] + + +class VerkeyField(NonEmptyStringField): + # TODO implement + pass + + +class RoleField(TestFieldBase): + field_types = (str,) + roles = ('0', '2') + + @property + def negative_test_cases(self): + return ['', 'foo', '3'] + + @property + def positive_test_cases(self): + return self.roles + + +TestCase = namedtuple('TestCase', ['case', 'description']) + + +class MessageDescriptor(TestFieldBase): + field_types = (dict, ) + + def __init__(self, klass, fields, optional_fields=None, name=None): + self.klass = klass + self.fields = fields + self.optional_fields = optional_fields + super().__init__(name) + + @property + def positive_test_cases(self): + return list(itertools.chain( + self.positive_test_cases_valid_message, + self.positive_test_cases_missed_optional_field, + )) + + @property + def negative_test_cases(self): + return list(itertools.chain( + self.negative_test_cases_invalid_value, + self.negative_test_cases_missed_required_field, + self.negative_test_cases_extra_field, + self.negative_test_cases_wrong_type, + )) + + @property + def positive_test_cases_valid_message(self): + for field in self.fields: + m = deepcopy(self._any_positive_case_copy) + for v in field.positive_test_cases: + m[field.name] = v + yield m + + @property + def positive_test_cases_missed_optional_field(self): + for field in self.fields: + if self.optional_fields and field in self.optional_fields: + m = self._any_positive_case_copy + del m[field.name] + yield m + + @property + def negative_test_cases_invalid_value(self): + for field in self.fields: + for val in field.negative_test_cases: + m = self._any_positive_case_copy + m[field.name] = val + yield m + + @property + def negative_test_cases_missed_required_field(self): + for field in self.fields: + if not self.optional_fields or field not in self.optional_fields: + m = self._any_positive_case_copy + del m[field.name] + yield m + + @property + def negative_test_cases_extra_field(self): + m = self._any_positive_case_copy + m.update(foo='bar') + yield m + + @property + def negative_test_cases_wrong_type(self): + for field in self.fields: + m = self._any_positive_case_copy + for test_type in self._types_list: + if field.field_types is None or test_type in field.field_types: + continue + m[field.name] = test_type() + yield m + + _types_list = (str, int, dict, list, float, bytes, bytearray) + + @property + def _any_positive_case_copy(self): + return deepcopy({field.name: field.positive_test_cases[0] for field in self.fields}) diff --git a/plenum/test/input_validation/messages.py b/plenum/test/input_validation/messages.py new file mode 100644 index 0000000000..849b6321ae --- /dev/null +++ b/plenum/test/input_validation/messages.py @@ -0,0 +1,189 @@ +from plenum.common.request import Request +from plenum.common.types import Nomination, Reelection, Primary, Ordered, Propagate, PrePrepare, Prepare, Commit, \ + InstanceChange, Checkpoint, ThreePCState, LedgerStatus, ConsistencyProof, ConsProofRequest, CatchupReq, CatchupRep +from plenum.test.input_validation.fields import * +from plenum.test.input_validation.helper import * + +messages = ( + + # 3phase messages + MessageDescriptor( + klass=Propagate, + fields=[ + client_request_field, + sender_client_field, + ], + ), + + MessageDescriptor( + klass=PrePrepare, + fields=[ + inst_id_field, + view_no_field, + pp_seq_no_field, + pp_time_field, + req_idr_field, + discarded_field, + digest_field, + ledger_id_field, + state_root_field, + txn_root_hash_field, + ], + ), + + MessageDescriptor( + klass=Prepare, + fields=[ + inst_id_field, + view_no_field, + pp_seq_no_field, + digest_field, + state_root_field, + txn_root_hash_field, + ], + ), + + MessageDescriptor( + klass=Commit, + fields=[ + inst_id_field, + view_no_field, + pp_seq_no_field, + ], + ), + + MessageDescriptor( + klass=Ordered, + fields=[ + inst_id_field, + view_no_field, + req_idr_field, + pp_seq_no_field, + pp_time_field, + ledger_id_field, + state_root_field, + txn_root_hash_field, + ], + ), + + # Election + MessageDescriptor( + klass=Nomination, + fields=[ + name_field, + view_no_field, + inst_id_field, + ord_seq_no_field, + ], + ), + + MessageDescriptor( + klass=Reelection, + fields=[ + inst_id_field, + round_field, + tie_among_field, + view_no_field, + ], + ), + + MessageDescriptor( + klass=Primary, + fields=[ + name_field, + inst_id_field, + view_no_field, + ord_seq_no_field, + ], + ), + + MessageDescriptor( + klass=InstanceChange, + fields=[ + view_no_field, + reason_field, + ord_seq_nos_field, + ], + ), + + + MessageDescriptor( + klass=Checkpoint, + fields=[ + inst_id_field, + view_no_field, + seq_no_start_field, + seq_no_stop_field, + digest_field, + ], + ), + + MessageDescriptor( + klass=ThreePCState, + fields=[ + inst_id_field, + messages_field, + ], + ), + + # Ledger status + + MessageDescriptor( + klass=LedgerStatus, + fields=[ + ledger_id_field, + txn_seq_no_field, + merkle_root_field, + ], + ), + + MessageDescriptor( + klass=ConsistencyProof, + fields=[ + ledger_id_field, + seq_no_start_field, + seq_no_stop_field, + pp_seq_no_field, + old_merkle_root_field, + new_merkle_root_field, + hashes_field, + ], + ), + + MessageDescriptor( + klass=ConsProofRequest, + fields=[ + ledger_id_field, + seq_no_start_field, + seq_no_stop_field, + ], + ), + + MessageDescriptor( + klass=CatchupReq, + fields=[ + ledger_id_field, + seq_no_start_field, + seq_no_stop_field, + catchup_till_field, + ], + ), + + MessageDescriptor( + klass=CatchupRep, + fields=[ + ledger_id_field, + tnxs_field, + cons_proof_field, + ], + ), + + # client NODE request + build_client_request_message(node_operation_field), + + # client NYM request + build_client_request_message(nym_operation_field), + +) + +messages_names_shortcut = list(map(lambda x: x.klass.__name__, messages)) diff --git a/plenum/test/input_validation/test_client_node_op.py b/plenum/test/input_validation/test_client_node_op.py new file mode 100644 index 0000000000..b9543239eb --- /dev/null +++ b/plenum/test/input_validation/test_client_node_op.py @@ -0,0 +1,47 @@ +import pytest + +from plenum.common.messages.client_request import ClientNodeOperationData, ALIAS, SERVICES, NODE_IP, CLIENT_IP, \ + NODE_PORT, CLIENT_PORT + +op = ClientNodeOperationData() + + +def test_only_alias_passes(): + op.validate({ALIAS: 'aNode'}) + + +def test_empty_alias_fails(): + with pytest.raises(TypeError) as ex_info: + op.validate({ALIAS: ''}) + ex_info.match('validation error: empty string') + + +def test_missed_alias_fails(): + with pytest.raises(TypeError) as ex_info: + op.validate({SERVICES: []}) + ex_info.match('validation error: missed fields alias') + + +def test_missed_a_ha_field_fails(): + with pytest.raises(TypeError) as ex_info: + op.validate({ + ALIAS: 'aNode', + NODE_PORT: 9700, + CLIENT_IP: '8.8.8.8', + CLIENT_PORT: 9701, + }) + ex_info.match('validation error: missed fields node_ip') + + +def test_update_services_passes(): + op.validate({ALIAS: 'aNode', SERVICES: []}) + + +def test_update_ha_passes(): + op.validate({ + ALIAS: 'aNode', + NODE_IP: '8.8.8.8', + NODE_PORT: 9700, + CLIENT_IP: '8.8.8.8', + CLIENT_PORT: 9701, + }) diff --git a/plenum/test/input_validation/test_common_checks.py b/plenum/test/input_validation/test_common_checks.py new file mode 100644 index 0000000000..a2277b7893 --- /dev/null +++ b/plenum/test/input_validation/test_common_checks.py @@ -0,0 +1,56 @@ +import pytest + +from plenum.test.input_validation.messages import messages, messages_names_shortcut + + +# TODO: check error messages + + +@pytest.mark.skip('INDY-78. Roll away new validation logic') +# @pytest.mark.parametrize('descriptor', argvalues=messages, ids=messages_names_shortcut) +def test_message_valid(descriptor): + for m in descriptor.positive_test_cases_valid_message: + assert descriptor.klass(**m), 'Correct msg passes: {}'.format(m) + + +@pytest.mark.skip('INDY-78. Roll away new validation logic') +# @pytest.mark.parametrize('descriptor', argvalues=messages, ids=messages_names_shortcut) +def test_message_missed_optional_field_pass(descriptor): + for m in descriptor.positive_test_cases_missed_optional_field: + assert descriptor.klass(**m), 'Correct msg passes: {}'.format(m) + + +@pytest.mark.skip('INDY-78. Roll away new validation logic') +# @pytest.mark.parametrize('descriptor', argvalues=messages, ids=messages_names_shortcut) +def test_message_invalid_value_fail(descriptor): + for m in descriptor.negative_test_cases_invalid_value: + with pytest.raises(TypeError, message='did not raise {}'.format(m)) as exc_info: + descriptor.klass(**m) + assert exc_info.match(r'validation error: .*') + + +@pytest.mark.skip('INDY-78. Roll away new validation logic') +# @pytest.mark.parametrize('descriptor', argvalues=messages, ids=messages_names_shortcut) +def test_message_missed_required_field_fail(descriptor): + for m in descriptor.negative_test_cases_missed_required_field: + with pytest.raises(TypeError, message='did not raise {}'.format(m)) as exc_info: + descriptor.klass(**m) + assert exc_info.match(r'validation error: missed fields .*') + + +@pytest.mark.skip('INDY-78. Roll away new validation logic') +# @pytest.mark.parametrize('descriptor', argvalues=messages, ids=messages_names_shortcut) +def test_message_extra_field_fail(descriptor): + for m in descriptor.negative_test_cases_extra_field: + with pytest.raises(TypeError, message='did not raise {}'.format(m)) as exc_info: + descriptor.klass(**m) + assert exc_info.match(r'validation error: unknown field .*') + + +@pytest.mark.skip('INDY-78. Roll away new validation logic') +# @pytest.mark.parametrize('descriptor', argvalues=messages, ids=messages_names_shortcut) +def test_message_wrong_type_fail(descriptor): + for m in descriptor.negative_test_cases_wrong_type: + with pytest.raises(TypeError, message='did not raise {}'.format(m)) as exc_info: + descriptor.klass(**m) + assert exc_info.match(r'validation error: .*') diff --git a/plenum/test/input_validation/test_handle_one_node_message.py b/plenum/test/input_validation/test_handle_one_node_message.py new file mode 100644 index 0000000000..dab197def2 --- /dev/null +++ b/plenum/test/input_validation/test_handle_one_node_message.py @@ -0,0 +1,20 @@ +import pytest + + +@pytest.mark.skip('INDY-79. Implement') +def test_empty_args_fail(testNode): + before_msg = len(testNode.nodeInBox) + while pytest.raises(AssertionError): + testNode.handleOneNodeMsg(()) + assert before_msg == len(testNode.nodeInBox), \ + 'nodeInBox has not got a message' + + +@pytest.mark.skip('INDY-79. Implement') +def test_too_many_args_fail(testNode): + before_msg = len(testNode.nodeInBox) + testNode.handleOneNodeMsg(({}, 'otherNone', 'extra_arg')) + while pytest.raises(AssertionError): + testNode.handleOneNodeMsg(()) + assert before_msg == len(testNode.nodeInBox), \ + 'nodeInBox has not got a message' diff --git a/plenum/test/instances/helper.py b/plenum/test/instances/helper.py index c376ab43f9..d4dee5c0dd 100644 --- a/plenum/test/instances/helper.py +++ b/plenum/test/instances/helper.py @@ -6,6 +6,10 @@ def recvdPrePrepare(replica: TestReplica): return getAllArgs(replica, TestReplica.processPrePrepare) +def processedPrePrepare(replica: TestReplica): + return getAllArgs(replica, TestReplica.addToPrePrepares) + + def sentPrepare(replica: TestReplica, viewNo: int = None, ppSeqNo: int = None): params = getAllArgs(replica, TestReplica.doPrepare) diff --git a/plenum/test/instances/test_commit_digest.py b/plenum/test/instances/test_commit_digest.py deleted file mode 100644 index e4d45fe8ca..0000000000 --- a/plenum/test/instances/test_commit_digest.py +++ /dev/null @@ -1,55 +0,0 @@ -from functools import partial - -import pytest - -from stp_core.loop.eventually import eventually -from plenum.common.types import Commit -from plenum.common.util import adict -from plenum.server.suspicion_codes import Suspicions -from plenum.test.helper import getNodeSuspicions -from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ - send3PhaseMsgWithIncorrectDigest -from plenum.test.test_node import getNonPrimaryReplicas, getPrimaryReplica -from plenum.test import waits - - -whitelist = [Suspicions.CM_DIGEST_WRONG.reason, - 'cannot process incoming COMMIT'] - - -@pytest.fixture("module") -def setup(nodeSet, up): - primaryRep = getPrimaryReplica(nodeSet, 0) - nonPrimaryReps = getNonPrimaryReplicas(nodeSet, 0) - - faultyRep = nonPrimaryReps[0] - makeNodeFaulty(faultyRep.node, partial(send3PhaseMsgWithIncorrectDigest, - msgType=Commit, instId=0)) - - return adict(primaryRep=primaryRep, nonPrimaryReps=nonPrimaryReps, - faultyRep=faultyRep) - - -# noinspection PyIncorrectDocstring -def testCommitDigest(setup, looper, sent1): - """ - A replica COMMIT messages with incorrect digests to all other replicas. - Other replicas should raise suspicion for the COMMIT seen - """ - primaryRep = setup.primaryRep - nonPrimaryReps = setup.nonPrimaryReps - faultyRep = setup.faultyRep - - def chkSusp(): - for r in (primaryRep, *nonPrimaryReps): - if r.name != faultyRep.name: - # Every node except the one from which COMMIT with incorrect - # digest was sent should raise suspicion for the COMMIT - # message - susps = getNodeSuspicions(r.node, - Suspicions.CM_DIGEST_WRONG.code) - assert len(susps) == 1 - - numOfNodes = len(primaryRep.node.nodeReg) - timeout = waits.expectedTransactionExecutionTime(numOfNodes) - looper.run(eventually(chkSusp, retryWait=1, timeout=timeout)) diff --git a/plenum/test/instances/test_instance_cannot_become_active_with_less_than_four_servers.py b/plenum/test/instances/test_instance_cannot_become_active_with_less_than_four_servers.py index 2328fa93bf..cf7bbb840b 100644 --- a/plenum/test/instances/test_instance_cannot_become_active_with_less_than_four_servers.py +++ b/plenum/test/instances/test_instance_cannot_become_active_with_less_than_four_servers.py @@ -19,7 +19,6 @@ # noinspection PyIncorrectDocstring -@pytest.mark.skip(reason="SOV-940") def testProtocolInstanceCannotBecomeActiveWithLessThanFourServers( tdir_for_func): """ @@ -27,17 +26,14 @@ def testProtocolInstanceCannotBecomeActiveWithLessThanFourServers( The status of the nodes will change from starting to started only after the addition of the fourth node to the system. """ - nodeCount = 16 - f = 5 - minimumNodesToBeUp = 16 - f + nodeCount = 13 + f = 4 + minimumNodesToBeUp = nodeCount - f nodeNames = genNodeNames(nodeCount) with TestNodeSet(names=nodeNames, tmpdir=tdir_for_func) as nodeSet: with Looper(nodeSet) as looper: - # for n in nodeSet: - # n.startKeySharing() - # helpers def genExpectedStates(connecteds: Iterable[str]): @@ -58,14 +54,12 @@ def addNodeBackAndCheck(nodeIdx: int, expectedStatus: Status): addNodeBack(nodeSet, looper, nodeNames[nodeIdx]) timeout = waits.expectedNodeStartUpTimeout() + \ - waits.expectedNodeInterconnectionTime(len(nodeSet)) + waits.expectedPoolInterconnectionTime(len(nodeSet)) looper.run(eventually(checkNodeStatusRemotesAndF, expectedStatus, nodeIdx, retryWait=1, timeout=timeout)) - # tests - logger.debug("Sharing keys") looper.run(checkNodesConnected(nodeSet)) diff --git a/plenum/test/instances/test_multiple_instance_change_msgs.py b/plenum/test/instances/test_multiple_instance_change_msgs.py index 8aae93d9e2..0b5c148377 100644 --- a/plenum/test/instances/test_multiple_instance_change_msgs.py +++ b/plenum/test/instances/test_multiple_instance_change_msgs.py @@ -13,11 +13,11 @@ nodeCount = 7 -@pytest.mark.skip(reason="Not yet implemented") +@pytest.mark.skip(reason="INDY-80. Not yet implemented") def testMultipleInstanceChangeMsgsMarkNodeAsSuspicious(looper, nodeSet, up): maliciousNode = nodeSet.Alpha for i in range(0, 5): - maliciousNode.send(InstanceChange(i)) + maliciousNode.send(maliciousNode._create_instance_change_msg(i, 0)) def chk(instId): for node in nodeSet: @@ -28,7 +28,7 @@ def chk(instId): assert arg['frm'] == maliciousNode.name numOfNodes = len(nodeSet) - instanceChangeTimeout = waits.expectedViewChangeTime(numOfNodes) + instanceChangeTimeout = waits.expectedPoolViewChangeStartedTimeout(numOfNodes) for i in range(0, 5): looper.run(eventually(chk, i, retryWait=1, timeout=instanceChangeTimeout)) diff --git a/plenum/test/malicious_behaviors_client.py b/plenum/test/malicious_behaviors_client.py index 6fc4892b9c..b4e9a0531b 100644 --- a/plenum/test/malicious_behaviors_client.py +++ b/plenum/test/malicious_behaviors_client.py @@ -36,8 +36,7 @@ def evilSend(self, msg, *rids, signer=None) -> None: logger.debug("EVIL: sending to less nodes {}, ignoring passed " "rids {} and sending to {} instead.". format(msg, rids, ovrdRids)) - for r in ovrdRids: - self._enqueue(msg, r, signer) + self.nodestack.send(msg, *ovrdRids, signer=signer) client.send = types.MethodType(evilSend, client) return client diff --git a/plenum/test/malicious_behaviors_node.py b/plenum/test/malicious_behaviors_node.py index dd18358fee..c47d679e17 100644 --- a/plenum/test/malicious_behaviors_node.py +++ b/plenum/test/malicious_behaviors_node.py @@ -6,7 +6,7 @@ import plenum.common.error from plenum.common.types import Propagate, PrePrepare, Prepare, ThreePhaseMsg, \ - Commit, Reply + Commit, Reply, f from plenum.common.request import Request, ReqDigest from plenum.common import util @@ -14,7 +14,7 @@ from stp_core.common.log import getlogger from plenum.server.replica import TPCStat from plenum.test.helper import TestReplica -from plenum.test.test_node import TestNode, TestReplica +from plenum.test.test_node import TestNode, TestReplica, getPrimaryReplica from plenum.test.delayers import ppDelay logger = getlogger() @@ -48,21 +48,22 @@ def delaysPrePrepareProcessing(node, delay: float=30, instId: int=None): # instance id but this looks more useful as a complete node can be malicious def sendDuplicate3PhaseMsg(node: TestNode, msgType: ThreePhaseMsg, count: int=2, instId=None): - def evilSendPrePrepareRequest(self, reqDigest: ReqDigest): - tm = time.time() - prePrepare = PrePrepare(self.instId, self.viewNo, - self.lastPrePrepareSeqNo+1, *reqDigest, tm) - logger.debug("EVIL: Creating pre-prepare message for request {}: {}". - format(reqDigest, prePrepare)) - self.sentPrePrepares[self.viewNo, self.lastPrePrepareSeqNo] = (reqDigest, tm) - sendDup(self, prePrepare, TPCStat.PrePrepareSent, count) + def evilSendPrePrepareRequest(self, ppReq: PrePrepare): + # tm = time.time() + # prePrepare = PrePrepare(self.instId, self.viewNo, + # self.lastPrePrepareSeqNo+1, tm, *reqDigest) + logger.debug("EVIL: Sending duplicate pre-prepare message: {}". + format(ppReq)) + self.sentPrePrepares[self.viewNo, self.lastPrePrepareSeqNo] = ppReq + sendDup(self, ppReq, TPCStat.PrePrepareSent, count) def evilSendPrepare(self, request): prepare = Prepare(self.instId, request.viewNo, request.ppSeqNo, request.digest, - request.ppTime) + request.stateRootHash, + request.txnRootHash) logger.debug("EVIL: Creating prepare message for request {}: {}". format(request, prepare)) self.addToPrepares(prepare, self.name) @@ -71,9 +72,7 @@ def evilSendPrepare(self, request): def evilSendCommit(self, request): commit = Commit(self.instId, request.viewNo, - request.ppSeqNo, - request.digest, - request.ppTime) + request.ppSeqNo) logger.debug("EVIL: Creating commit message for request {}: {}". format(request, commit)) self.addToCommits(commit, self.name) @@ -101,7 +100,7 @@ def malign3PhaseSendingMethod(replica: TestReplica, msgType: ThreePhaseMsg, evilMethod = types.MethodType(evilMethod, replica) if msgType == PrePrepare: - replica.doPrePrepare = evilMethod + replica.sendPrePrepare = evilMethod elif msgType == Prepare: replica.doPrepare = evilMethod elif msgType == Commit: @@ -122,25 +121,27 @@ def malignInstancesOfNode(node: TestNode, malignMethod, instId: int=None): def send3PhaseMsgWithIncorrectDigest(node: TestNode, msgType: ThreePhaseMsg, instId: int=None): - def evilSendPrePrepareRequest(self, reqDigest: ReqDigest): - reqDigest = ReqDigest(reqDigest.identifier, reqDigest.reqId, "random") - tm = time.time() - prePrepare = PrePrepare(self.instId, self.viewNo, - self.lastPrePrepareSeqNo+1, *reqDigest, tm) - logger.debug("EVIL: Creating pre-prepare message for request {}: {}". - format(reqDigest, prePrepare)) - self.sentPrePrepares[self.viewNo, self.lastPrePrepareSeqNo] = (reqDigest, tm) - self.send(prePrepare, TPCStat.PrePrepareSent) - - def evilSendPrepare(self, request): + def evilSendPrePrepareRequest(self, ppReq: PrePrepare): + # reqDigest = ReqDigest(reqDigest.identifier, reqDigest.reqId, "random") + # tm = time.time() + # prePrepare = PrePrepare(self.instId, self.viewNo, + # self.lastPrePrepareSeqNo+1, *reqDigest, tm) + logger.debug("EVIL: Creating pre-prepare message for request : {}". + format(ppReq)) + ppReq = updateNamedTuple(ppReq, digest=ppReq.digest+'random') + self.sentPrePrepares[self.viewNo, self.lastPrePrepareSeqNo] = ppReq + self.send(ppReq, TPCStat.PrePrepareSent) + + def evilSendPrepare(self, ppReq): digest = "random" prepare = Prepare(self.instId, - request.viewNo, - request.ppSeqNo, + ppReq.viewNo, + ppReq.ppSeqNo, digest, - request.ppTime) + ppReq.stateRootHash, + ppReq.txnRootHash) logger.debug("EVIL: Creating prepare message for request {}: {}". - format(request, prepare)) + format(ppReq, prepare)) self.addToPrepares(prepare, self.name) self.send(prepare, TPCStat.PrepareSent) @@ -148,15 +149,12 @@ def evilSendCommit(self, request): digest = "random" commit = Commit(self.instId, request.viewNo, - request.ppSeqNo, - digest, - request.ppTime) + request.ppSeqNo) logger.debug("EVIL: Creating commit message for request {}: {}". format(request, commit)) self.send(commit, TPCStat.CommitSent) self.addToCommits(commit, self.name) - methodMap = { PrePrepare: evilSendPrePrepareRequest, Prepare: evilSendPrepare, @@ -177,7 +175,18 @@ def faultyReply(node): def newGenerateReply(self, viewNo: int, req: Request) -> Reply: reply = oldGenerateReply(viewNo, req) - reply.result["txnId"] = "For great justice." + reply.result[f.SIG.nm] = "incorrect signature" reply.result["declaration"] = "All your base are belong to us." return reply node.generateReply = types.MethodType(newGenerateReply, node) + + +def slow_primary(nodes, instId=0, delay=5): + # make primary replica slow to send PRE-PREPAREs + def ifPrePrepare(msg): + if isinstance(msg, PrePrepare): + return delay + + pr = getPrimaryReplica(nodes, instId) + pr.outBoxTestStasher.delay(ifPrePrepare) + return pr diff --git a/plenum/test/monitoring/conftest.py b/plenum/test/monitoring/conftest.py index a6abd51536..a9acbc59b5 100644 --- a/plenum/test/monitoring/conftest.py +++ b/plenum/test/monitoring/conftest.py @@ -13,3 +13,21 @@ def requests(looper, wallet1, client1): requests=[req], fVal=1) requests.append(req) return requests + + +@pytest.fixture +def decreasedMonitoringTimeouts(conf, request): + oldThroughputWindowSize = conf.ThroughputWindowSize + oldDashboardUpdateFreq = conf.DashboardUpdateFreq + oldLatencyWindowSize = conf.LatencyWindowSize + conf.ThroughputWindowSize = 5 + conf.LatencyWindowSize = 5 + conf.DashboardUpdateFreq = 1 + + def reset(): + conf.ThroughputWindowSize = oldThroughputWindowSize + conf.LatencyWindowSize = oldLatencyWindowSize + conf.DashboardUpdateFreq = oldDashboardUpdateFreq + + request.addfinalizer(reset) + return conf \ No newline at end of file diff --git a/plenum/test/monitoring/test_instance_change_with_Delta.py b/plenum/test/monitoring/test_instance_change_with_Delta.py index 7bc80697d2..49a8b6f1b8 100644 --- a/plenum/test/monitoring/test_instance_change_with_Delta.py +++ b/plenum/test/monitoring/test_instance_change_with_Delta.py @@ -2,16 +2,17 @@ import pytest -from stp_core.loop.eventually import eventually -from stp_core.common.log import getlogger from plenum.common.types import PrePrepare from plenum.common.util import adict from plenum.server.node import Node from plenum.test import waits -from plenum.test.helper import waitForViewChange, \ - sendReqsToNodesAndVerifySuffReplies, sendRandomRequests, \ - checkViewNoForNodes +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies +from plenum.test.malicious_behaviors_node import slow_primary from plenum.test.test_node import getPrimaryReplica +from plenum.test.view_change.helper import provoke_and_wait_for_view_change +from plenum.test.helper import waitForViewChange +from stp_core.common.log import getlogger +from stp_core.loop.eventually import eventually nodeCount = 7 @@ -51,7 +52,7 @@ def ensureAnotherPerfCheck(): assert cur[c].endtime > previousPerfChecks[c].endtime return cur - timeout = waits.expectedNextPerfCheck(nodes) + timeout = waits.expectedPoolNextPerfCheck(nodes) newPerfChecks = looper.run(eventually(ensureAnotherPerfCheck, retryWait=1, timeout=timeout)) @@ -102,32 +103,18 @@ def step2(step1, looper): @pytest.fixture(scope="module") def step3(step2): - # make P (primary replica on master) faulty, i.e., slow to send PRE-PREPAREs - def ifPrePrepare(msg): - if isinstance(msg, PrePrepare): - return 5 - - step2.P.outBoxTestStasher.delay(ifPrePrepare) - # send requests to client + slow_primary(step2.nodes, 0, 5) return step2 -# This test fails when the whole package is run. +@pytest.mark.skip(reason="SOV-1123 - fails intermittently") def testInstChangeWithLowerRatioThanDelta(looper, step3, wallet1, client1): - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 10) + # from plenum.test.test_node import ensureElectionsDone + # ensureElectionsDone(looper, []) + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 9) # wait for every node to run another checkPerformance waitForNextPerfCheck(looper, step3.nodes, step3.perfChecks) + provoke_and_wait_for_view_change(looper, step3.nodes, 1, wallet1, client1) - # verify all nodes have undergone an instance change - for i in range(20): - try: - waitForViewChange(looper, step3.nodes, expectedViewNo=1) - except AssertionError as ex: - # send additional request and check view change - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1) - else: - break - else: - assert False, ex diff --git a/plenum/test/monitoring/test_instance_change_with_req_Lambda.py b/plenum/test/monitoring/test_instance_change_with_req_Lambda.py index 20c9644145..aff6d4cff2 100644 --- a/plenum/test/monitoring/test_instance_change_with_req_Lambda.py +++ b/plenum/test/monitoring/test_instance_change_with_req_Lambda.py @@ -26,8 +26,8 @@ """ -@pytest.fixture(scope="module") -def setup(looper, startedNodes, up, wallet1, client1): +@pytest.fixture('module') +def setup(looper, tconf, startedNodes, up, wallet1, client1): # Get the master replica of the master protocol instance P = getPrimaryReplica(startedNodes) @@ -35,23 +35,28 @@ def setup(looper, startedNodes, up, wallet1, client1): for node in startedNodes: node.monitor.Delta = .001 - slowRequest = None + # set LAMBDA not so huge like it set in the production config + testLambda = 30 + for node in startedNodes: + node.monitor.Lambda = testLambda + + slowed_request = False # make P (primary replica on master) faulty, i.e., slow to send # PRE-PREPARE for a specific client request only - def by65SpecificPrePrepare(msg): - nonlocal slowRequest - if isinstance(msg, PrePrepare) and slowRequest is None: - slowRequest = getattr(msg, f.REQ_ID.nm) - return 65 + def specificPrePrepare(msg): + nonlocal slowed_request + if isinstance(msg, PrePrepare) and slowed_request is False: + slowed_request = True + return testLambda + 5 # just more that LAMBDA - P.outBoxTestStasher.delay(by65SpecificPrePrepare) + P.outBoxTestStasher.delay(specificPrePrepare) # TODO select or create a timeout for this case in 'waits' sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, numReqs=5, - customTimeoutPerReq=80) + customTimeoutPerReq=tconf.TestRunningTimeLimitSec) return adict(nodes=startedNodes) diff --git a/plenum/test/monitoring/test_post_monitoring_stats.py b/plenum/test/monitoring/test_post_monitoring_stats.py index 4e64feb5ea..c52889ea4e 100644 --- a/plenum/test/monitoring/test_post_monitoring_stats.py +++ b/plenum/test/monitoring/test_post_monitoring_stats.py @@ -8,7 +8,9 @@ config = getConfig() -def testPostingThroughput(postingStatsEnabled, looper: Looper, +def testPostingThroughput(postingStatsEnabled, + decreasedMonitoringTimeouts, + looper: Looper, nodeSet: TestNodeSet, wallet1, client1): """ @@ -62,9 +64,11 @@ def chk(): looper.run(eventually(chk, retryWait=1, timeout=timeout)) -def testPostingLatency(postingStatsEnabled, looper: Looper, - nodeSet: TestNodeSet, - wallet1, client1): +def testPostingLatency(postingStatsEnabled, + decreasedMonitoringTimeouts, + looper: Looper, + nodeSet: TestNodeSet, + wallet1, client1): """ The latencies (master as well as average of backups) after `DashboardUpdateFreq` seconds and before sending any requests should be zero. diff --git a/plenum/test/node_catchup/conftest.py b/plenum/test/node_catchup/conftest.py index 0f841b76fb..c5ca1f68a3 100644 --- a/plenum/test/node_catchup/conftest.py +++ b/plenum/test/node_catchup/conftest.py @@ -5,7 +5,7 @@ from plenum.common.util import randomString from plenum.test.conftest import getValueFromModule from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies -from plenum.test.node_catchup.helper import waitNodeLedgersEquality +from plenum.test.node_catchup.helper import waitNodeDataEquality from plenum.test.pool_transactions.helper import \ addNewStewardAndNode, buildPoolClientAndWallet from plenum.test.pool_transactions.conftest import stewardAndWallet1, \ @@ -63,6 +63,5 @@ def nodeSetWithNodeAddedAfterSomeTxns(txnPoolNodeSet, nodeCreatedAfterSomeTxns): @pytest.fixture("module") def newNodeCaughtUp(txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns): looper, newNode, _, _, _, _ = nodeSetWithNodeAddedAfterSomeTxns - waitNodeLedgersEquality(looper, newNode, *txnPoolNodeSet[:4]) - + waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:4]) return newNode diff --git a/plenum/test/node_catchup/helper.py b/plenum/test/node_catchup/helper.py index d1c162df83..2694f70c6e 100644 --- a/plenum/test/node_catchup/helper.py +++ b/plenum/test/node_catchup/helper.py @@ -1,26 +1,45 @@ +from stp_zmq.zstack import KITZStack from typing import Iterable +from plenum.common.constants import POOL_LEDGER_ID, DOMAIN_LEDGER_ID from stp_core.loop.eventually import eventually from stp_core.types import HA -from plenum.test.helper import checkLedgerEquality +from plenum.test.helper import checkLedgerEquality, checkStateEquality, \ + check_seqno_db_equality, assertEquality from plenum.test.test_client import TestClient from plenum.test.test_node import TestNode from plenum.test import waits from plenum.common import util +import pytest + # TODO: This should just take an arbitrary number of nodes and check for their # ledgers to be equal -def checkNodeLedgersForEquality(node: TestNode, - *otherNodes: Iterable[TestNode]): + + +def checkNodeDataForEquality(node: TestNode, + *otherNodes: Iterable[TestNode]): + # Checks for node's ledgers and state's to be equal for n in otherNodes: + check_seqno_db_equality(node.seqNoDB, n.seqNoDB) checkLedgerEquality(node.domainLedger, n.domainLedger) - checkLedgerEquality(node.poolLedger, n.poolLedger) + checkStateEquality(node.getState(DOMAIN_LEDGER_ID), n.getState(DOMAIN_LEDGER_ID)) + if n.poolLedger: + checkLedgerEquality(node.poolLedger, n.poolLedger) + checkStateEquality(node.getState(POOL_LEDGER_ID), n.getState(POOL_LEDGER_ID)) -def waitNodeLedgersEquality(looper, - referenceNode: TestNode, - *otherNodes: Iterable[TestNode], - customTimeout = None): +def checkNodeDataForUnequality(node: TestNode, + *otherNodes: Iterable[TestNode]): + # Checks for node's ledgers and state's to be unequal + with pytest.raises(AssertionError): + checkNodeDataForEquality(node, *otherNodes) + + +def waitNodeDataEquality(looper, + referenceNode: TestNode, + *otherNodes: Iterable[TestNode], + customTimeout=None): """ Wait for node ledger to become equal @@ -28,8 +47,26 @@ def waitNodeLedgersEquality(looper, """ numOfNodes = len(otherNodes) + 1 - timeout = customTimeout or waits.expectedPoolLedgerCheck(numOfNodes) - looper.run(eventually(checkNodeLedgersForEquality, + timeout = customTimeout or waits.expectedPoolGetReadyTimeout(numOfNodes) + looper.run(eventually(checkNodeDataForEquality, + referenceNode, + *otherNodes, + retryWait=1, timeout=timeout)) + + +def waitNodeDataUnequality(looper, + referenceNode: TestNode, + *otherNodes: Iterable[TestNode], + customTimeout=None): + """ + Wait for node ledger to become equal + + :param referenceNode: node whose ledger used as a reference + """ + + numOfNodes = len(otherNodes) + 1 + timeout = customTimeout or waits.expectedPoolGetReadyTimeout(numOfNodes) + looper.run(eventually(checkNodeDataForUnequality, referenceNode, *otherNodes, retryWait=1, timeout=timeout)) @@ -50,10 +87,14 @@ def checkClientPoolLedgerSameAsNodes(client: TestClient, def ensureClientConnectedToNodesAndPoolLedgerSame(looper, client: TestClient, *nodes:Iterable[TestNode]): - fVal = util.getMaxFailures(len(nodes)) - poolCheckTimeout = waits.expectedPoolLedgerCheck(fVal) + looper.run(client.ensureConnectedToNodes()) + timeout = waits.expectedPoolGetReadyTimeout(len(nodes)) looper.run(eventually(checkClientPoolLedgerSameAsNodes, client, *nodes, - timeout=poolCheckTimeout)) - looper.run(client.ensureConnectedToNodes()) + timeout=timeout)) + + +def check_ledger_state(node, ledger_id, ledger_state): + assertEquality(node.ledgerManager.getLedgerInfoByType(ledger_id).state, + ledger_state) \ No newline at end of file diff --git a/plenum/test/node_catchup/test_catchup_delayed_nodes.py b/plenum/test/node_catchup/test_catchup_delayed_nodes.py index aea887e22b..ae588a7bb7 100644 --- a/plenum/test/node_catchup/test_catchup_delayed_nodes.py +++ b/plenum/test/node_catchup/test_catchup_delayed_nodes.py @@ -6,7 +6,7 @@ from plenum.test import waits from plenum.test.delayers import cpDelay from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies -from plenum.test.node_catchup.helper import waitNodeLedgersEquality +from plenum.test.node_catchup.helper import waitNodeDataEquality from plenum.test.pool_transactions.helper import addNewStewardAndNode from plenum.test.test_node import checkNodesConnected @@ -53,7 +53,7 @@ def testCatchupDelayedNodes(txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns, txnPoolNodeSet.append(nodeX) txnPoolNodeSet.append(nodeY) - timeout = waits.expectedCatchupTime(len(txnPoolNodeSet)) + delayX + delayY + timeout = waits.expectedPoolCatchupTime(len(txnPoolNodeSet)) + delayX + delayY looper.run(checkNodesConnected(txnPoolNodeSet, customTimeout=timeout)) logger.debug("Stopping 2 newest nodes, {} and {}".format(nodeX.name, nodeY.name)) @@ -65,5 +65,5 @@ def testCatchupDelayedNodes(txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns, nodeY.name)) nodeX.start(looper.loop) nodeY.start(looper.loop) - waitNodeLedgersEquality(looper, nodeX, *txnPoolNodeSet[:5]) - waitNodeLedgersEquality(looper, nodeY, *txnPoolNodeSet[:5]) + waitNodeDataEquality(looper, nodeX, *txnPoolNodeSet[:5]) + waitNodeDataEquality(looper, nodeY, *txnPoolNodeSet[:5]) diff --git a/plenum/test/node_catchup/test_catchup_scenarios.py b/plenum/test/node_catchup/test_catchup_scenarios.py index f4b36a093b..787251c6d8 100644 --- a/plenum/test/node_catchup/test_catchup_scenarios.py +++ b/plenum/test/node_catchup/test_catchup_scenarios.py @@ -3,7 +3,7 @@ from stp_core.loop.eventually import eventually from stp_core.common.log import getlogger from plenum.common.startable import Mode -from plenum.test.delayers import crDelay +from plenum.test.delayers import cqDelay from plenum.test.helper import sendRandomRequests from plenum.test.node_catchup.helper import \ ensureClientConnectedToNodesAndPoolLedgerSame @@ -20,7 +20,7 @@ def nodeStashingOrderedRequests(txnPoolNodeSet, nodeCreatedAfterSomeTxns): looper, newNode, client, wallet, _, _ = nodeCreatedAfterSomeTxns for node in txnPoolNodeSet: - node.nodeIbStasher.delay(crDelay(5)) + node.nodeIbStasher.delay(cqDelay(5)) txnPoolNodeSet.append(newNode) ensureClientConnectedToNodesAndPoolLedgerSame(looper, client, *txnPoolNodeSet[:-1]) @@ -32,7 +32,7 @@ def stashing(): assert len(newNode.stashedOrderedReqs) > 0 assert len(newNode.reqsFromCatchupReplies) > 0 - timeout = waits.expectedRequestStashingTime() + timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) looper.run(eventually(stashing, retryWait=1, timeout=timeout)) diff --git a/plenum/test/node_catchup/test_catchup_while_new_request_incoming.py b/plenum/test/node_catchup/test_catchup_while_new_request_incoming.py index dd440ecfbb..92adbc96fc 100644 --- a/plenum/test/node_catchup/test_catchup_while_new_request_incoming.py +++ b/plenum/test/node_catchup/test_catchup_while_new_request_incoming.py @@ -1,11 +1,12 @@ import types +from plenum.common.constants import DOMAIN_LEDGER_ID from plenum.common.types import CatchupReq, f from plenum.common.util import randomString -from plenum.test.delayers import crDelay +from plenum.test.delayers import cqDelay from plenum.test.helper import sendRandomRequests, \ sendReqsToNodesAndVerifySuffReplies -from plenum.test.node_catchup.helper import checkNodeLedgersForEquality +from plenum.test.node_catchup.helper import checkNodeDataForEquality from plenum.test.pool_transactions.helper import addNewStewardAndNode from plenum.test.test_node import checkNodesConnected, TestNode from stp_core.loop.eventually import eventually @@ -25,16 +26,16 @@ def testNewNodeCatchupWhileIncomingRequests(looper, txnPoolNodeSet, def chkAfterCall(self, req, frm): r = self.processCatchupReq(req, frm) - typ = getattr(req, f.LEDGER_TYPE.nm) - if typ == 1: + typ = getattr(req, f.LEDGER_ID.nm) + if typ == DOMAIN_LEDGER_ID: ledger = self.getLedgerForMsg(req) - assert req.catchupTill < ledger.size + assert req.catchupTill <= ledger.size return r for node in txnPoolNodeSet: node.nodeMsgRouter.routes[CatchupReq] = types.MethodType( chkAfterCall, node.ledgerManager) - node.nodeIbStasher.delay(crDelay(3)) + node.nodeIbStasher.delay(cqDelay(3)) print('Sending 10 requests') sendRandomRequests(stewardWallet, steward1, 5) @@ -48,6 +49,6 @@ def chkAfterCall(self, req, frm): looper.runFor(2) sendRandomRequests(stewardWallet, steward1, 5) # TODO select or create a timeout for this case in 'waits' - looper.run(eventually(checkNodeLedgersForEquality, newNode, + looper.run(eventually(checkNodeDataForEquality, newNode, *txnPoolNodeSet[:-1], retryWait=1, timeout=80)) assert newNode.spylog.count(TestNode.processStashedOrderedReqs) > 0 diff --git a/plenum/test/node_catchup/test_discard_view_no.py b/plenum/test/node_catchup/test_discard_view_no.py index b34b83859b..ede4dffd4f 100644 --- a/plenum/test/node_catchup/test_discard_view_no.py +++ b/plenum/test/node_catchup/test_discard_view_no.py @@ -9,7 +9,7 @@ from plenum.test.delayers import delayNonPrimaries from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, \ waitForViewChange, checkDiscardMsg -from plenum.test.node_catchup.helper import waitNodeLedgersEquality +from plenum.test.node_catchup.helper import waitNodeDataEquality from plenum.test.pool_transactions.helper import addNewStewardAndNode from plenum.test.test_node import checkNodesConnected, \ checkProtocolInstanceSetup @@ -49,10 +49,11 @@ def testNodeDiscardMessageFromUnknownView(txnPoolNodeSet, txnPoolNodeSet.append(nodeTheta) looper.run(checkNodesConnected(txnPoolNodeSet)) looper.run(client.ensureConnectedToNodes()) - - waitNodeLedgersEquality(looper, nodeTheta, *txnPoolNodeSet[:-1]) - checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) - electMsg = Nomination(nodeX.name, 0, viewNo) + waitNodeDataEquality(looper, nodeTheta, *txnPoolNodeSet[:-1]) + checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1, + timeout=10) + electMsg = Nomination(nodeX.name, 0, viewNo, + nodeX.replicas[0].lastOrderedPPSeqNo) threePMsg = PrePrepare( 0, viewNo, diff --git a/plenum/test/node_catchup/test_new_node_catchup.py b/plenum/test/node_catchup/test_new_node_catchup.py index b0f28fc703..6d5172fb12 100644 --- a/plenum/test/node_catchup/test_new_node_catchup.py +++ b/plenum/test/node_catchup/test_new_node_catchup.py @@ -1,13 +1,21 @@ +from time import perf_counter + import pytest +from plenum.common.constants import DOMAIN_LEDGER_ID, LedgerState +from plenum.common.util import updateNamedTuple +from plenum.test.delayers import cqDelay, cr_delay +from stp_zmq.zstack import KITZStack + from stp_core.loop.eventually import eventually +from plenum.common.types import HA from stp_core.common.log import getlogger from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies -from plenum.test.node_catchup.helper import waitNodeLedgersEquality -from plenum.test.pool_transactions.helper import ensureNodeDisconnectedFromPool +from plenum.test.node_catchup.helper import waitNodeDataEquality, \ + check_ledger_state +from plenum.test.pool_transactions.helper import disconnect_node_and_ensure_disconnected from plenum.test.test_ledger_manager import TestLedgerManager -from plenum.test.test_node import checkNodesConnected, ensureElectionsDone, \ - TestNode +from plenum.test.test_node import checkNodesConnected, TestNode from plenum.test import waits # Do not remove the next import @@ -17,7 +25,6 @@ txnCount = 5 -@pytest.mark.skip(reason="SOV-939") def testNewNodeCatchup(newNodeCaughtUp): """ A new node that joins after some transactions should eventually get @@ -29,7 +36,6 @@ def testNewNodeCatchup(newNodeCaughtUp): pass -@pytest.mark.skip(reason="SOV-939") def testPoolLegerCatchupBeforeDomainLedgerCatchup(txnPoolNodeSet, newNodeCaughtUp): """ @@ -48,9 +54,9 @@ def testPoolLegerCatchupBeforeDomainLedgerCatchup(txnPoolNodeSet, startTimes = {} completionTimes = {} for start in starts: - startTimes[start.params.get('ledgerType')] = start.endtime + startTimes[start.params.get('ledgerId')] = start.endtime for comp in completes: - completionTimes[comp.params.get('ledgerType')] = comp.endtime + completionTimes[comp.params.get('ledgerId')] = comp.endtime assert startTimes[0] < completionTimes[0] < \ startTimes[1] < completionTimes[1] @@ -72,10 +78,9 @@ def testDelayedLedgerStatusNotChangingState(): # but its weird since prepares and commits are received which are sent before # and after prepares, respectively. Here is the pivotal link # https://www.pivotaltracker.com/story/show/127897273 -@pytest.mark.skip(reason='fails, SOV-928, SOV-939') -def testNodeCatchupAfterRestart(newNodeCaughtUp, txnPoolNodeSet, +def testNodeCatchupAfterRestart(newNodeCaughtUp, txnPoolNodeSet, tconf, nodeSetWithNodeAddedAfterSomeTxns, - tdirWithPoolTxns, tconf, allPluginsPath): + tdirWithPoolTxns, allPluginsPath): """ A node that restarts after some transactions should eventually get the transactions which happened while it was down @@ -84,7 +89,7 @@ def testNodeCatchupAfterRestart(newNodeCaughtUp, txnPoolNodeSet, looper, newNode, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns logger.debug("Stopping node {} with pool ledger size {}". format(newNode, newNode.poolManager.txnSeqNo)) - ensureNodeDisconnectedFromPool(looper, txnPoolNodeSet, newNode) + disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, newNode) looper.removeProdable(newNode) # for n in txnPoolNodeSet[:4]: # for r in n.nodestack.remotes.values(): @@ -95,20 +100,62 @@ def testNodeCatchupAfterRestart(newNodeCaughtUp, txnPoolNodeSet, # TODO: Check if the node has really stopped processing requests? logger.debug("Sending requests") sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5) - restartedNewNode = TestNode(newNode.name, - basedirpath=tdirWithPoolTxns, - config=tconf, - ha=newNode.nodestack.ha, - cliha=newNode.clientstack.ha, - pluginPaths=allPluginsPath) - logger.debug("Starting the stopped node, {}".format(restartedNewNode)) - looper.add(restartedNewNode) - looper.run(checkNodesConnected(txnPoolNodeSet[:4] + [restartedNewNode])) - waitNodeLedgersEquality(looper, restartedNewNode, *txnPoolNodeSet[:4]) - restartedNewNode.stop() - - -@pytest.mark.skip(reason='fails, SOV-928, SOV-939') + logger.debug("Starting the stopped node, {}".format(newNode)) + nodeHa, nodeCHa = HA(*newNode.nodestack.ha), HA(*newNode.clientstack.ha) + newNode = TestNode(newNode.name, basedirpath=tdirWithPoolTxns, config=tconf, + ha=nodeHa, cliha=nodeCHa, pluginPaths=allPluginsPath) + looper.add(newNode) + txnPoolNodeSet[-1] = newNode + + # Delay catchup reply processing so LedgerState does not change + delay_catchup_reply = 5 + newNode.nodeIbStasher.delay(cr_delay(delay_catchup_reply)) + looper.run(checkNodesConnected(txnPoolNodeSet)) + + # Make sure ledger starts syncing (sufficient consistency proofs received) + looper.run(eventually(check_ledger_state, newNode, DOMAIN_LEDGER_ID, + LedgerState.syncing, retryWait=.5, timeout=5)) + + confused_node = txnPoolNodeSet[0] + cp = newNode.ledgerManager.ledgerRegistry[DOMAIN_LEDGER_ID].catchUpTill + start, end = cp.seqNoStart, cp.seqNoEnd + cons_proof = confused_node.ledgerManager._buildConsistencyProof( + DOMAIN_LEDGER_ID, start, end) + + bad_send_time = None + + def chk(): + nonlocal bad_send_time + entries = newNode.ledgerManager.spylog.getAll( + newNode.ledgerManager.canProcessConsistencyProof.__name__) + for entry in entries: + # `canProcessConsistencyProof` should return False after `syncing_time` + if entry.result == False and entry.starttime > bad_send_time: + return + assert False + + def send_and_chk(ledger_state): + nonlocal bad_send_time, cons_proof + bad_send_time = perf_counter() + confused_node.ledgerManager.sendTo(cons_proof, newNode.name) + # Check that the ConsistencyProof messages rejected + looper.run(eventually(chk, retryWait=.5, timeout=5)) + check_ledger_state(newNode, DOMAIN_LEDGER_ID, ledger_state) + + send_and_chk(LedgerState.syncing) + + # Not accurate timeout but a conservative one + timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) + \ + 2*delay_catchup_reply + waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:4], + customTimeout=timeout) + + send_and_chk(LedgerState.synced) + # cons_proof = updateNamedTuple(cons_proof, seqNoEnd=cons_proof.seqNoStart, + # seqNoStart=cons_proof.seqNoEnd) + # send_and_chk(LedgerState.synced) + + def testNodeDoesNotParticipateUntilCaughtUp(txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns): """ diff --git a/plenum/test/node_catchup/test_node_catchup_after_disconnect.py b/plenum/test/node_catchup/test_node_catchup_after_disconnect.py new file mode 100644 index 0000000000..b5e37d2d69 --- /dev/null +++ b/plenum/test/node_catchup/test_node_catchup_after_disconnect.py @@ -0,0 +1,35 @@ +from stp_core.common.log import getlogger +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies +from plenum.test.node_catchup.helper import waitNodeDataEquality, waitNodeDataUnequality +from plenum.test.pool_transactions.helper import disconnect_node_and_ensure_disconnected, reconnect_node_and_ensure_connected + +# Do not remove the next import +from plenum.test.node_catchup.conftest import whitelist + +logger = getlogger() +txnCount = 5 + + +# TODO: Refactor tests to minimize module-scoped fixtures.They make tests depend on each other +def testNodeCatchupAfterDisconnect(newNodeCaughtUp, txnPoolNodeSet, + nodeSetWithNodeAddedAfterSomeTxns): + """ + A node that disconnects after some transactions should eventually get the + transactions which happened while it was disconnected + :return: + """ + looper, newNode, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns + logger.debug("Stopping node {} with pool ledger size {}". + format(newNode, newNode.poolManager.txnSeqNo)) + disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, newNode, stopNode=False) + looper.removeProdable(newNode) + # TODO: Check if the node has really stopped processing requests? + logger.debug("Sending requests") + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5) + # Make sure new node got out of sync + waitNodeDataUnequality(looper, newNode, *txnPoolNodeSet[:-1]) + logger.debug("Starting the stopped node, {}".format(newNode)) + looper.add(newNode) + reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, newNode) + logger.debug("Waiting for the node to catch up, {}".format(newNode)) + waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1]) diff --git a/plenum/test/node_catchup/test_node_catchup_after_lost_connection.py b/plenum/test/node_catchup/test_node_catchup_after_lost_connection.py new file mode 100644 index 0000000000..f74e733c66 --- /dev/null +++ b/plenum/test/node_catchup/test_node_catchup_after_lost_connection.py @@ -0,0 +1,35 @@ +from stp_core.common.log import getlogger +from plenum.test.test_node import ensure_node_disconnected +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies +from plenum.test.node_catchup.helper import waitNodeDataEquality, waitNodeDataUnequality + +# Do not remove the next import +from plenum.test.node_catchup.conftest import whitelist + +logger = getlogger() +txnCount = 5 + + +# TODO: Refactor tests to minimize module-scoped fixtures.They make tests depend on each other +def testNodeCatchupAfterLostConnection(newNodeCaughtUp, txnPoolNodeSet, + nodeSetWithNodeAddedAfterSomeTxns): + """ + A node that has poor internet connection and got unsynced after some transactions should eventually get the + transactions which happened while it was not accessible + :return: + """ + looper, newNode, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns + logger.debug("Stopping node {} with pool ledger size {}". + format(newNode, newNode.poolManager.txnSeqNo)) + looper.removeProdable(newNode) + # TODO: Check if the node has really stopped processing requests? + logger.debug("Sending requests") + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5) + # Make sure new node got out of sync + waitNodeDataUnequality(looper, newNode, *txnPoolNodeSet[:-1]) + logger.debug("Ensure node {} gets disconnected".format(newNode)) + ensure_node_disconnected(looper, newNode, txnPoolNodeSet[:-1]) + logger.debug("Starting the stopped node, {}".format(newNode)) + looper.add(newNode) + logger.debug("Waiting for the node to catch up, {}".format(newNode)) + waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1]) diff --git a/plenum/test/node_catchup/test_node_reject_invalid_txn_during_catchup.py b/plenum/test/node_catchup/test_node_reject_invalid_txn_during_catchup.py index 7a8db89dc4..1a007980c5 100644 --- a/plenum/test/node_catchup/test_node_reject_invalid_txn_during_catchup.py +++ b/plenum/test/node_catchup/test_node_reject_invalid_txn_during_catchup.py @@ -1,15 +1,12 @@ import types -from base64 import b64encode -import pytest - -from stp_core.loop.eventually import eventually +from plenum.common.ledger import Ledger from stp_core.common.log import getlogger -from plenum.common.constants import TXN_TYPE +from plenum.common.constants import TXN_TYPE, DOMAIN_LEDGER_ID from plenum.common.types import CatchupReq, f, CatchupRep from plenum.test.helper import sendRandomRequests -from plenum.test.node_catchup.helper import waitNodeLedgersEquality -from plenum.test.test_node import checkNodesConnected +from plenum.test.node_catchup.helper import waitNodeDataEquality +from plenum.test.test_node import checkNodesConnected, getNonPrimaryReplicas from plenum.test import waits # Do not remove the next import @@ -37,8 +34,8 @@ def testNodeRejectingInvalidTxns(txnPoolNodeSet, nodeCreatedAfterSomeTxns): node.sendPoolInfoToClients = types.MethodType(lambda x, y: None, node) def sendIncorrectTxns(self, req, frm): - ledgerType = getattr(req, f.LEDGER_TYPE.nm) - if ledgerType == 1: + ledgerId = getattr(req, f.LEDGER_ID.nm) + if ledgerId == DOMAIN_LEDGER_ID: logger.info("{} being malicious and sending incorrect transactions" " for catchup request {} from {}". format(self, req, frm)) @@ -50,22 +47,29 @@ def sendIncorrectTxns(self, req, frm): # Since the type of random request is `buy` if txns[seqNo].get(TXN_TYPE) == "buy": txns[seqNo][TXN_TYPE] = "randomtype" - consProof = [b64encode(p).decode() for p in + consProof = [Ledger.hashToStr(p) for p in ledger.tree.consistency_proof(end, ledger.size)] - self.sendTo(msg=CatchupRep(getattr(req, f.LEDGER_TYPE.nm), txns, + self.sendTo(msg=CatchupRep(getattr(req, f.LEDGER_ID.nm), txns, consProof), to=frm) else: self.processCatchupReq(req, frm) # One of the node sends incorrect txns in catchup reply. - txnPoolNodeSet[0].nodeMsgRouter.routes[CatchupReq] = types.MethodType( - sendIncorrectTxns, txnPoolNodeSet[0].ledgerManager) + npr = getNonPrimaryReplicas(txnPoolNodeSet, 0) + badReplica = npr[0] + badNode = badReplica.node + badNode.nodeMsgRouter.routes[CatchupReq] = types.MethodType( + sendIncorrectTxns, badNode.ledgerManager) logger.debug( - 'Catchup request processor of {} patched'.format(txnPoolNodeSet[0])) + 'Catchup request processor of {} patched'.format(badNode)) sendRandomRequests(wallet, client, 10) looper.run(checkNodesConnected(txnPoolNodeSet)) - waitNodeLedgersEquality(looper, newNode, *txnPoolNodeSet[:-1]) + # Since one of the nodes will send a bad catchup reply, this node will + # request transactions from another node, hence large timeout. + # Dont reduce it. + waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1], + customTimeout=45) - assert newNode.isNodeBlacklisted(txnPoolNodeSet[0].name) + assert newNode.isNodeBlacklisted(badNode.name) diff --git a/plenum/test/node_catchup/test_node_request_consistency_proof.py b/plenum/test/node_catchup/test_node_request_consistency_proof.py index 67209dd14c..40206261c0 100644 --- a/plenum/test/node_catchup/test_node_request_consistency_proof.py +++ b/plenum/test/node_catchup/test_node_request_consistency_proof.py @@ -1,14 +1,14 @@ -import base64 import types from random import randint import pytest +from plenum.common.ledger import Ledger from stp_core.loop.eventually import eventually from stp_core.common.log import getlogger from plenum.common.types import LedgerStatus from plenum.test.helper import sendRandomRequests -from plenum.test.node_catchup.helper import waitNodeLedgersEquality +from plenum.test.node_catchup.helper import waitNodeDataEquality from plenum.test.test_ledger_manager import TestLedgerManager from plenum.test.test_node import checkNodesConnected from plenum.test import waits @@ -46,8 +46,8 @@ def sendDLStatus(self, name): while newSize in sentSizes: newSize = randint(1, size) print("new size {}".format(newSize)) - newRootHash = base64.b64encode( - self.domainLedger.tree.merkle_tree_hash(0, newSize)).decode() + newRootHash = Ledger.hashToStr( + self.domainLedger.tree.merkle_tree_hash(0, newSize)) ledgerStatus = LedgerStatus(1, newSize, newRootHash) @@ -65,9 +65,12 @@ def sendDLStatus(self, name): # wait more than `ConsistencyProofsTimeout` # TODO: apply configurable timeout here + # `ConsistencyProofsTimeout` is set to 60 sec, so need to wait more than + # 60 sec, hence large timeout. Dont reduce it. + waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1], + customTimeout=75) - waitNodeLedgersEquality(looper, newNode, *txnPoolNodeSet[:-1]) - + # Other nodes should have received a `ConsProofRequest` and processed it. for node in txnPoolNodeSet[:-1]: assert node.ledgerManager.spylog.count( TestLedgerManager.processConsistencyProofReq.__name__) > 0 diff --git a/plenum/test/node_catchup/test_node_request_missing_transactions.py b/plenum/test/node_catchup/test_node_request_missing_transactions.py index 5769a01e69..8994115e3b 100644 --- a/plenum/test/node_catchup/test_node_request_missing_transactions.py +++ b/plenum/test/node_catchup/test_node_request_missing_transactions.py @@ -1,3 +1,4 @@ +import time import types import pytest @@ -6,8 +7,8 @@ from stp_core.common.log import getlogger from plenum.common.types import CatchupReq from plenum.test.helper import sendRandomRequests -from plenum.test.node_catchup.helper import waitNodeLedgersEquality -from plenum.test.test_node import checkNodesConnected +from plenum.test.node_catchup.helper import waitNodeDataEquality +from plenum.test.test_node import checkNodesConnected, getNonPrimaryReplicas from plenum.test import waits # Do not remove the next import @@ -17,7 +18,19 @@ logger = getlogger() -@pytest.mark.skip(reason='fails, https://evernym.atlassian.net/browse/SOV-928') +@pytest.fixture(scope="module") +def catchupTimeoutReduced(conf, tdir, request): + defaultCatchupTransactionsTimeout = conf.CatchupTransactionsTimeout + conf.baseDir = tdir + conf.CatchupTransactionsTimeout = 1 + + def reset(): + conf.CatchupTransactionsTimeout = defaultCatchupTransactionsTimeout + + request.addfinalizer(reset) + return conf + + def testNodeRequestingTxns(txnPoolNodeSet, nodeCreatedAfterSomeTxns): """ A newly joined node is catching up and sends catchup requests to other @@ -31,16 +44,23 @@ def testNodeRequestingTxns(txnPoolNodeSet, nodeCreatedAfterSomeTxns): for node in txnPoolNodeSet: node.sendPoolInfoToClients = types.MethodType(lambda x, y: None, node) - txnPoolNodeSet.append(newNode) - def ignoreCatchupReq(self, req, frm): logger.info("{} being malicious and ignoring catchup request {} " "from {}".format(self, req, frm)) # One of the node does not process catchup request. - txnPoolNodeSet[0].nodeMsgRouter.routes[CatchupReq] = types.MethodType( - ignoreCatchupReq, txnPoolNodeSet[0].ledgerManager) + npr = getNonPrimaryReplicas(txnPoolNodeSet, 0) + badReplica = npr[0] + badNode = badReplica.node + txnPoolNodeSet.append(newNode) + + badNode.nodeMsgRouter.routes[CatchupReq] = types.MethodType( + ignoreCatchupReq, badNode.ledgerManager) sendRandomRequests(wallet, client, 10) looper.run(checkNodesConnected(txnPoolNodeSet)) - waitNodeLedgersEquality(looper, newNode, *txnPoolNodeSet[:-1]) + # Since one of the nodes does not reply, this new node will experience a + # timeout and retry catchup requests, hence a long test timeout. + # Dont reduce it. + waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1], + customTimeout=100) diff --git a/plenum/test/node_request/node_request_helper.py b/plenum/test/node_request/node_request_helper.py index 595e753200..5919d9da74 100644 --- a/plenum/test/node_request/node_request_helper.py +++ b/plenum/test/node_request/node_request_helper.py @@ -3,6 +3,7 @@ from stp_core.loop.eventually import eventuallyAll from plenum.common.types import PrePrepare, OPERATION, f +from plenum.common.constants import DOMAIN_LEDGER_ID from plenum.common.util import getMaxFailures from plenum.server.node import Node from plenum.server.replica import Replica @@ -79,19 +80,22 @@ def nonPrimarySeesCorrectNumberOfPREPREPAREs(): instId, primary.viewNo, primary.lastPrePrepareSeqNo, - propagated1.identifier, - propagated1.reqId, - propagated1.digest, - time.time() - ) + time.time(), + [[propagated1.identifier, propagated1.reqId]], + 1, + Replica.batchDigest([propagated1,]), + DOMAIN_LEDGER_ID, + primary.stateRootHash(DOMAIN_LEDGER_ID), + primary.txnRootHash(DOMAIN_LEDGER_ID), + ) passes = 0 for npr in nonPrimaryReplicas: actualMsgs = len([param for param in getAllArgs(npr, npr.processPrePrepare) - if (param['pp'][:-1], + if (param['pp'][0:3]+param['pp'][4:], param['sender']) == ( - expectedPrePrepareRequest[:-1], + expectedPrePrepareRequest[0:3] + expectedPrePrepareRequest[4:], primary.name)]) numOfMsgsWithZFN = 1 @@ -111,13 +115,13 @@ def primarySentsCorrectNumberOfPREPREPAREs(): will be zero and primary must be marked as malicious. """ actualMsgs = len([param for param in - getAllArgs(primary, primary.doPrePrepare) - if (param['reqDigest'].identifier, - param['reqDigest'].reqId, - param['reqDigest'].digest) == + getAllArgs(primary, primary.sendPrePrepare) + if (param['ppReq'].reqIdr[0][0], + param['ppReq'].reqIdr[0][1], + param['ppReq'].digest) == (propagated1.identifier, propagated1.reqId, - propagated1.digest) + primary.batchDigest([propagated1, ])) ]) numOfMsgsWithZFN = 1 @@ -140,12 +144,12 @@ def nonPrimaryReceivesCorrectNumberOfPREPREPAREs(): for npr in nonPrimaryReplicas: l4 = len([param for param in getAllArgs(npr, npr.addToPrePrepares) - if (param['pp'].identifier, - param['pp'].reqId, + if (param['pp'].reqIdr[0][0], + param['pp'].reqIdr[0][1], param['pp'].digest) == ( propagated1.identifier, propagated1.reqId, - propagated1.digest)]) + primary.batchDigest([propagated1, ]))]) numOfMsgsWithZFN = 1 numOfMsgsWithFaults = 0 @@ -220,9 +224,9 @@ def primaryReceivesCorrectNumberOfPREPAREs(): primary.processPrepare) if (param['prepare'].instId, param['prepare'].viewNo, - param['prepare'].ppSeqNo) == (primary.instId, - primary.viewNo, - primary.lastPrePrepareSeqNo) and + param['prepare'].ppSeqNo) == ( + primary.instId, primary.viewNo, + primary.lastPrePrepareSeqNo) and param['sender'] != primary.name]) numOfMsgsWithZFN = nodeCount - 1 @@ -276,8 +280,8 @@ def nonPrimaryReplicasReceiveCorrectNumberOfPREPAREs(): looper.run(eventuallyAll(*coros, retryWait=1, totalTimeout=timeout)) -def checkCommitted(looper, nodeSet, prepared1, instIds, faultyNodes=0, - timeout=60): +def checkCommitted(looper, nodeSet, prepared1, instIds, faultyNodes=0): + timeout = waits.expectedCommittedTime(len(nodeSet)) nodeCount = len((list(nodeSet))) f = getMaxFailures(nodeCount) @@ -331,8 +335,7 @@ def replicasReceivesCorrectNumberOfCOMMITs(): for arg in args: assert arg['commit'].viewNo == primaryReplica.viewNo and \ - arg['commit'].ppSeqNo == primaryReplica.lastPrePrepareSeqNo and \ - arg['commit'].digest == prepared1.digest + arg['commit'].ppSeqNo == primaryReplica.lastPrePrepareSeqNo assert r.name != arg['sender'] assert passes >= len(allReplicas) - faultyNodes diff --git a/plenum/test/node_request/test_no_forwarding_without_election.py b/plenum/test/node_request/test_no_forwarding_without_election.py new file mode 100644 index 0000000000..9ebd603dea --- /dev/null +++ b/plenum/test/node_request/test_no_forwarding_without_election.py @@ -0,0 +1,50 @@ +from plenum.test import waits +from plenum.test.delayers import nom_delay +from plenum.test.helper import sendRandomRequests, \ + waitForSufficientRepliesForRequests, sendReqsToNodesAndVerifySuffReplies +from plenum.test.pool_transactions.conftest import looper, clientAndWallet1, \ + client1, wallet1, client1Connected +from plenum.test.test_node import ensureElectionsDone +from plenum.test.view_change.helper import ensure_view_change +from stp_core.loop.eventually import eventually + + +def test_node_stashes_requests_if_no_primary(looper, txnPoolNodeSet, client1, + wallet1, client1Connected): + """ + Node stashes requests while no primary is present, but once primary is + determined, the stashed requests are processed + """ + def chk_stashed(stashed): + for node in txnPoolNodeSet: + assert (len(node.reqs_stashed_for_primary) == 0) != stashed + + # No requests are stashed before and after sending any requests + chk_stashed(False) + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 2) + chk_stashed(False) + + delay = 3 + for node in txnPoolNodeSet: + node.nodeIbStasher.delay(nom_delay(delay)) + + # Ensure view change and soon as view starts, send requests + ensure_view_change(looper, txnPoolNodeSet, client1, wallet1) + + reqs = sendRandomRequests(wallet1, client1, 2) + + # The above requests must be stashed + looper.run(eventually(chk_stashed, True, retryWait=.1, + timeout=3)) + + # The elections must complete for the new view, though the election would + # take longer since nominates are delayed. The calculation below is approx. + timeout = waits.expectedPoolElectionTimeout(len(txnPoolNodeSet)) + \ + delay*(len(txnPoolNodeSet)) + ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=timeout) + + # The requests should be successful + waitForSufficientRepliesForRequests(looper, client1, requests=reqs) + + # No requests should be stashed in propagator. + chk_stashed(False) diff --git a/plenum/test/node_request/test_order/test_ordering_when_pre_prepare_not_received.py b/plenum/test/node_request/test_order/test_ordering_when_pre_prepare_not_received.py index 7f76575146..8d5426c981 100644 --- a/plenum/test/node_request/test_order/test_ordering_when_pre_prepare_not_received.py +++ b/plenum/test/node_request/test_order/test_ordering_when_pre_prepare_not_received.py @@ -22,13 +22,13 @@ def testOrderingWhenPrePrepareNotReceived(looper, nodeSet, up, client1, slowNode.nodeIbStasher.delay(ppDelay(delay, 0)) stash = [] - origMethod = slowRep.processReqDigest + origMethod = slowRep.processPrePrepare - def patched(self, msg): - stash.append(msg) + def patched(self, msg, sender): + stash.append((msg, sender)) patchedMethod = types.MethodType(patched, slowRep) - slowRep.processReqDigest = patchedMethod + slowRep.processPrePrepare = patchedMethod def chk1(): assert len(slowRep.commitsWaitingForPrepare) > 0 @@ -37,8 +37,8 @@ def chk1(): timeout = waits.expectedPrePrepareTime(len(nodeSet)) + delay looper.run(eventually(chk1, timeout=timeout)) - for item in stash: - origMethod(item) + for m, s in stash: + origMethod(m, s) def chk2(): assert len(slowRep.commitsWaitingForPrepare) == 0 diff --git a/plenum/test/node_request/test_order/test_request_ordering_1.py b/plenum/test/node_request/test_order/test_request_ordering_1.py index 6a735bb5b2..26398627ec 100644 --- a/plenum/test/node_request/test_order/test_request_ordering_1.py +++ b/plenum/test/node_request/test_order/test_request_ordering_1.py @@ -27,7 +27,7 @@ def doNotProcessReqDigest(self, rd: ReqDigest): pass patchedMethod = types.MethodType(doNotProcessReqDigest, replica) - replica.processReqDigest = patchedMethod + replica.processRequest = patchedMethod def chk(n): assert replica.spylog.count(replica.doOrder.__name__) == n diff --git a/plenum/test/node_request/test_order/test_request_ordering_2.py b/plenum/test/node_request/test_order/test_request_ordering_2.py index 1115014608..306b4048a2 100644 --- a/plenum/test/node_request/test_order/test_request_ordering_2.py +++ b/plenum/test/node_request/test_order/test_request_ordering_2.py @@ -4,6 +4,7 @@ from plenum.test.helper import sendRandomRequests, \ waitForSufficientRepliesForRequests, checkLedgerEquality, checkAllLedgersEqual from plenum.test.test_node import getNonPrimaryReplicas, getPrimaryReplica +from plenum.test import waits nodeCount = 7 @@ -42,7 +43,7 @@ def testOrderingCase2(looper, nodeSet, up, client1, wallet1): commitDelay = 3 # delay each COMMIT by this number of seconds delayedPpSeqNos = set() - requestCount = 15 + requestCount = 10 def specificCommits(wrappedMsg): nonlocal node3, node4, node5 @@ -50,8 +51,7 @@ def specificCommits(wrappedMsg): if isinstance(msg, PrePrepare): if len(delayedPpSeqNos) < ppSeqsToDelay: delayedPpSeqNos.add(msg.ppSeqNo) - logger.debug('ppSeqNo {} corresponding to request id {} would ' - 'be delayed'.format(msg.ppSeqNo, msg.reqId)) + logger.debug('ppSeqNo {} be delayed'.format(msg.ppSeqNo)) if isinstance(msg, Commit) and msg.instId == 0 and \ sender in (n.name for n in (node3, node4, node5)) and \ msg.ppSeqNo in delayedPpSeqNos: @@ -69,8 +69,7 @@ def ensureSlowNodesHaveAllTxns(): for node in node1, node2: assert len(node.domainLedger) == requestCount - from plenum.test import waits - timeout = waits.expectedCatchupTime(len(nodeSet)) + timeout = waits.expectedPoolGetReadyTimeout(len(nodeSet)) looper.run(eventually(ensureSlowNodesHaveAllTxns, retryWait=1, timeout=timeout)) diff --git a/plenum/test/node_request/test_pre_prepare/test_non_primary_sends_a_pre_prepare.py b/plenum/test/node_request/test_pre_prepare/test_non_primary_sends_a_pre_prepare.py index d25a7598e4..0b42be3f0d 100644 --- a/plenum/test/node_request/test_pre_prepare/test_non_primary_sends_a_pre_prepare.py +++ b/plenum/test/node_request/test_pre_prepare/test_non_primary_sends_a_pre_prepare.py @@ -6,7 +6,9 @@ from stp_core.loop.eventually import eventually from stp_core.common.log import getlogger from plenum.common.request import ReqDigest -from plenum.common.types import PrePrepare +from plenum.common.types import PrePrepare, f +from plenum.common.constants import DOMAIN_LEDGER_ID +from plenum.common.util import compareNamedTuple from plenum.server.suspicion_codes import Suspicions from plenum.test.helper import getNodeSuspicions from plenum.test import waits @@ -31,34 +33,29 @@ def dontSendPrePrepareRequest(self, reqDigest: ReqDigest): pr = getPrimaryReplica(nodeSet, instId) evilMethod = types.MethodType(dontSendPrePrepareRequest, pr) - pr.doPrePrepare = evilMethod + pr.sendPrePrepare = evilMethod def testNonPrimarySendsAPrePrepare(looper, nodeSet, setup, propagated1): - primaryReplica = getPrimaryReplica(nodeSet, instId) nonPrimaryReplicas = getNonPrimaryReplicas(nodeSet, instId) firstNpr = nonPrimaryReplicas[0] remainingNpr = nonPrimaryReplicas[1:] - def sendPrePrepareFromNonPrimary(replica): - firstNpr.doPrePrepare(propagated1.reqDigest) + def sendPrePrepareFromNonPrimary(): + firstNpr.requestQueues[DOMAIN_LEDGER_ID].append(propagated1) + ppReq = firstNpr.create3PCBatch(DOMAIN_LEDGER_ID) + firstNpr.sendPrePrepare(ppReq) + return ppReq - return PrePrepare( - replica.instId, - firstNpr.viewNo, - firstNpr.lastPrePrepareSeqNo, - propagated1.identifier, - propagated1.reqId, - propagated1.digest, - time.time()) - - ppr = sendPrePrepareFromNonPrimary(firstNpr) + ppr = sendPrePrepareFromNonPrimary() def chk(): - for r in (primaryReplica, *remainingNpr): + for r in remainingNpr: recvdPps = recvdPrePrepare(r) assert len(recvdPps) == 1 - assert recvdPps[0]['pp'][:-1] == ppr[:-1] + assert compareNamedTuple(recvdPps[0]['pp'], ppr, + f.DIGEST.nm, f.STATE_ROOT.nm, + f.TXN_ROOT.nm) nodeSuspicions = len(getNodeSuspicions( r.node, Suspicions.PPR_FRM_NON_PRIMARY.code)) assert nodeSuspicions == 1 diff --git a/plenum/test/plugin/stats_consumer/plugin_stats_consumer.py b/plenum/test/plugin/stats_consumer/plugin_stats_consumer.py index 6ab056a892..15747c3406 100644 --- a/plenum/test/plugin/stats_consumer/plugin_stats_consumer.py +++ b/plenum/test/plugin/stats_consumer/plugin_stats_consumer.py @@ -37,8 +37,11 @@ def _periodicStatsThroughput(self, stats: Dict[str, object]): def _sendStatsOnReqOrdered(self, stats: Dict[str, object]): assert stats.get("created_at") - if stats.get("hasMasterPrimary") == "Y": - assert stats.get("total requests") + # DEPR: Since the master instance does state validations it might + # reject some requests which other instances will not, since they + # do not do state based validation + # if stats.get("hasMasterPrimary") == "Y": + # assert stats.get("total requests") def _sendStatsOnNodeStart(self, stats: Dict[str, object]): assert stats.get("startedAtData") diff --git a/plenum/test/pool_transactions/conftest.py b/plenum/test/pool_transactions/conftest.py index b177fd550d..3dd0c565e3 100644 --- a/plenum/test/pool_transactions/conftest.py +++ b/plenum/test/pool_transactions/conftest.py @@ -6,7 +6,18 @@ from plenum.test.node_catchup.helper import \ ensureClientConnectedToNodesAndPoolLedgerSame from plenum.test.pool_transactions.helper import addNewStewardAndNode, \ - buildPoolClientAndWallet + buildPoolClientAndWallet, addNewSteward + + +@pytest.fixture(scope="module") +def tconf(conf, tdir, request): + conf.baseDir = tdir + # Lowering DELTA since some requests will result in validation errors and + # that will decrease master throughput. + # TODO: When monitoring metrics are calibrated, these things + # should be taken care of. + conf.DELTA = .6 + return conf @pytest.yield_fixture(scope="module") @@ -85,3 +96,12 @@ def client1Connected(looper, client1): looper.add(client1) looper.run(client1.ensureConnectedToNodes()) return client1 + + +@pytest.fixture(scope="function") +def newAdHocSteward(looper, tdir, steward1, stewardWallet): + newStewardName = "testClientSteward" + randomString(3) + newSteward, newStewardWallet = addNewSteward(looper, tdir, steward1, + stewardWallet, + newStewardName) + return newSteward, newStewardWallet \ No newline at end of file diff --git a/plenum/test/pool_transactions/helper.py b/plenum/test/pool_transactions/helper.py index cef270f5e5..48e859d80d 100644 --- a/plenum/test/pool_transactions/helper.py +++ b/plenum/test/pool_transactions/helper.py @@ -1,23 +1,23 @@ +from stp_core.types import HA from typing import Iterable, Union -from plenum.common.keygen_utils import initNodeKeysForBothStacks -from stp_core.network.port_dispenser import genHa - from plenum.client.client import Client from plenum.client.wallet import Wallet -from stp_core.loop.eventually import eventually -from plenum.common.signer_simple import SimpleSigner from plenum.common.constants import STEWARD, TXN_TYPE, NYM, ROLE, TARGET_NYM, ALIAS, \ NODE_PORT, CLIENT_IP, NODE_IP, DATA, NODE, CLIENT_PORT, VERKEY, SERVICES, \ VALIDATOR +from plenum.common.keygen_utils import initNodeKeysForBothStacks +from plenum.common.signer_simple import SimpleSigner from plenum.common.util import randomString, hexToFriendly from plenum.test.helper import waitForSufficientRepliesForRequests from plenum.test.test_client import TestClient, genTestClient -from plenum.test.test_node import TestNode +from plenum.test.test_node import TestNode, check_node_disconnected_from, \ + ensure_node_disconnected, checkNodesConnected +from stp_core.loop.eventually import eventually +from stp_core.network.port_dispenser import genHa -def addNewClient(role, looper, creatorClient: Client, creatorWallet: Wallet, - name: str): +def sendAddNewClient(role, name, creatorClient, creatorWallet): wallet = Wallet(name) wallet.addIdentifier() idr = wallet.defaultId @@ -34,18 +34,22 @@ def addNewClient(role, looper, creatorClient: Client, creatorWallet: Wallet, req = creatorWallet.signOp(op) creatorClient.submitReqs(req) + return req, wallet + +def addNewClient(role, looper, creatorClient: Client, creatorWallet: Wallet, + name: str): + req, wallet = sendAddNewClient(role, name, creatorClient, creatorWallet) waitForSufficientRepliesForRequests(looper, creatorClient, requests=[req], fVal=1) return wallet -def addNewNode(looper, stewardClient, stewardWallet, newNodeName, tdir, tconf, - allPluginsPath=None, autoStart=True, nodeClass=TestNode): +def sendAddNewNode(newNodeName, stewardClient, stewardWallet, + transformOpFunc=None): sigseed = randomString(32).encode() nodeSigner = SimpleSigner(seed=sigseed) - (nodeIp, nodePort), (clientIp, clientPort) = genHa(2) op = { @@ -60,10 +64,21 @@ def addNewNode(looper, stewardClient, stewardWallet, newNodeName, tdir, tconf, SERVICES: [VALIDATOR, ] } } + if transformOpFunc is not None: + transformOpFunc(op) req = stewardWallet.signOp(op) stewardClient.submitReqs(req) + return req, \ + op[DATA].get(NODE_IP), op[DATA].get(NODE_PORT), \ + op[DATA].get(CLIENT_IP), op[DATA].get(CLIENT_PORT), \ + sigseed + +def addNewNode(looper, stewardClient, stewardWallet, newNodeName, tdir, tconf, + allPluginsPath=None, autoStart=True, nodeClass=TestNode): + req, nodeIp, nodePort, clientIp, clientPort, sigseed \ + = sendAddNewNode(newNodeName, stewardClient, stewardWallet) waitForSufficientRepliesForRequests(looper, stewardClient, requests=[req], fVal=1) @@ -76,10 +91,9 @@ def addNewNode(looper, stewardClient, stewardWallet, newNodeName, tdir, tconf, return node -def addNewStewardAndNode(looper, creatorClient, creatorWallet, stewardName, - newNodeName, tdir, tconf, allPluginsPath=None, - autoStart=True, nodeClass=TestNode, - clientClass=TestClient): +def addNewSteward(looper, tdir, + creatorClient, creatorWallet, stewardName, + clientClass=TestClient): newStewardWallet = addNewClient(STEWARD, looper, creatorClient, creatorWallet, stewardName) newSteward = clientClass(name=stewardName, @@ -88,32 +102,41 @@ def addNewStewardAndNode(looper, creatorClient, creatorWallet, stewardName, looper.add(newSteward) looper.run(newSteward.ensureConnectedToNodes()) + return newSteward, newStewardWallet + + +def addNewStewardAndNode(looper, creatorClient, creatorWallet, stewardName, + newNodeName, tdir, tconf, allPluginsPath=None, + autoStart=True, nodeClass=TestNode, + clientClass=TestClient): + + newSteward, newStewardWallet = addNewSteward(looper, tdir, creatorClient, + creatorWallet, stewardName, + clientClass=clientClass) + newNode = addNewNode(looper, newSteward, newStewardWallet, newNodeName, tdir, tconf, allPluginsPath, autoStart=autoStart, nodeClass=nodeClass) return newSteward, newStewardWallet, newNode -def changeNodeHa(looper, stewardClient, stewardWallet, node, nodeHa, clientHa): +def sendUpdateNode(stewardClient, stewardWallet, node, node_data): nodeNym = hexToFriendly(node.nodestack.verhex) - (nodeIp, nodePort), (clientIp, clientPort) = nodeHa, clientHa op = { TXN_TYPE: NODE, TARGET_NYM: nodeNym, - DATA: { - NODE_IP: nodeIp, - NODE_PORT: nodePort, - CLIENT_IP: clientIp, - CLIENT_PORT: clientPort, - ALIAS: node.name - } + DATA: node_data, } req = stewardWallet.signOp(op) stewardClient.submitReqs(req) + return req + + +def updateNodeData(looper, stewardClient, stewardWallet, node, node_data): + req = sendUpdateNode(stewardClient, stewardWallet, node, node_data) waitForSufficientRepliesForRequests(looper, stewardClient, requests=[req], fVal=1) - # TODO: Not needed in ZStack, remove once raet is removed node.nodestack.clearLocalKeep() node.nodestack.clearRemoteKeeps() @@ -121,6 +144,36 @@ def changeNodeHa(looper, stewardClient, stewardWallet, node, nodeHa, clientHa): node.clientstack.clearRemoteKeeps() +def updateNodeDataAndReconnect(looper, steward, stewardWallet, node, + node_data, + tdirWithPoolTxns, tconf, txnPoolNodeSet): + updateNodeData(looper, steward, stewardWallet, node, node_data) + # restart the Node with new HA + node.stop() + node_alias = node_data.get(ALIAS, None) or node.name + node_ip = node_data.get(NODE_IP, None) or node.nodestack.ha.host + node_port = node_data.get(NODE_PORT, None) or node.nodestack.ha.port + client_ip = node_data.get(CLIENT_IP, None) or node.clientstack.ha.host + client_port = node_data.get(CLIENT_PORT, None) or node.clientstack.ha.port + looper.removeProdable(name=node.name) + restartedNode = TestNode(node_alias, basedirpath=tdirWithPoolTxns, + config=tconf, + ha=HA(node_ip, node_port), + cliha=HA(client_ip, client_port)) + looper.add(restartedNode) + + # replace node in txnPoolNodeSet + try: + idx = next(i for i, n in enumerate(txnPoolNodeSet) + if n.name == node.name) + except StopIteration: + raise Exception('{} is not the pool'.format(node)) + txnPoolNodeSet[idx] = restartedNode + + looper.run(checkNodesConnected(txnPoolNodeSet)) + return restartedNode + + def changeNodeKeys(looper, stewardClient, stewardWallet, node, verkey): nodeNym = hexToFriendly(node.nodestack.verhex) @@ -191,38 +244,49 @@ def buildPoolClientAndWallet(clientData, tempDir, clientClass=None, return client, w -def disconnectPoolNode(poolNodes: Iterable, disconnect: Union[str, TestNode]): +def disconnectPoolNode(poolNodes: Iterable, disconnect: Union[str, TestNode], stopNode=True): if isinstance(disconnect, TestNode): disconnect = disconnect.name assert isinstance(disconnect, str) for node in poolNodes: - if node.name == disconnect: + if node.name == disconnect and stopNode: node.stop() else: node.nodestack.disconnectByName(disconnect) -def checkNodeDisconnectedFrom(needle: str, haystack: Iterable[TestNode]): - """ - Check if the node name given by `needle` is disconnected from nodes in - `haystack` - :param needle: Node name which should be disconnected from nodes from - `haystack` - :param haystack: nodes who should be disconnected from `needle` - :return: - """ - assert all([needle not in node.nodestack.connecteds for node in haystack]) +def reconnectPoolNode(poolNodes: Iterable, connect: Union[str, TestNode], looper): + if isinstance(connect, TestNode): + connect = connect.name + assert isinstance(connect, str) + + for node in poolNodes: + if node.name == connect: + node.start(looper) + else: + node.nodestack.reconnectRemoteWithName(connect) -def ensureNodeDisconnectedFromPool(looper, poolNodes, - disconnect: Union[str, TestNode]): +def disconnect_node_and_ensure_disconnected(looper, poolNodes, + disconnect: Union[str, TestNode], + timeout=None, + stopNode=True): if isinstance(disconnect, TestNode): disconnect = disconnect.name assert isinstance(disconnect, str) - disconnectPoolNode(poolNodes, disconnect) - looper.run(eventually(checkNodeDisconnectedFrom, disconnect, - [n for n in poolNodes - if n.name != disconnect], - retryWait=1, timeout=len(poolNodes)-1)) + disconnectPoolNode(poolNodes, disconnect, stopNode=stopNode) + ensure_node_disconnected(looper, disconnect, poolNodes, + timeout=timeout) + + +def reconnect_node_and_ensure_connected(looper, poolNodes, + connect: Union[str, TestNode], + timeout=None): + if isinstance(connect, TestNode): + connect = connect.name + assert isinstance(connect, str) + + reconnectPoolNode(poolNodes, connect, looper) + looper.run(checkNodesConnected(poolNodes, customTimeout=timeout)) diff --git a/plenum/test/pool_transactions/test_adding_stewards.py b/plenum/test/pool_transactions/test_adding_stewards.py index 891da93de4..41394e7114 100644 --- a/plenum/test/pool_transactions/test_adding_stewards.py +++ b/plenum/test/pool_transactions/test_adding_stewards.py @@ -1,7 +1,8 @@ import pytest from plenum.common.constants import STEWARD -from plenum.test.pool_transactions.helper import addNewClient +from plenum.test.helper import waitRejectWithReason +from plenum.test.pool_transactions.helper import addNewClient, sendAddNewClient @pytest.fixture(scope="module") @@ -20,10 +21,14 @@ def reset(): def testOnlyAStewardCanAddAnotherSteward(looper, txnPoolNodeSet, tdirWithPoolTxns, poolTxnClientData, steward1, stewardWallet, - client1, wallet1): + client1, wallet1, client1Connected): addNewClient(STEWARD, looper, steward1, stewardWallet, "testSteward1") - with pytest.raises(AssertionError): - addNewClient(STEWARD, looper, client1, wallet1, "testSteward2") + + sendAddNewClient(STEWARD, "testSteward2", client1, wallet1) + for node in txnPoolNodeSet: + waitRejectWithReason(looper, client1, + 'Only Steward is allowed to do these transactions', + node.clientstack.name) def testStewardsCanBeAddedOnlyTillAThresholdIsReached(looper, tconf, @@ -32,6 +37,11 @@ def testStewardsCanBeAddedOnlyTillAThresholdIsReached(looper, tconf, poolTxnStewardData, steward1, stewardWallet): addNewClient(STEWARD, looper, steward1, stewardWallet, "testSteward3") - with pytest.raises(AssertionError): - addNewClient(STEWARD, looper, steward1, stewardWallet, "testSteward4") + sendAddNewClient(STEWARD, "testSteward4", steward1, stewardWallet) + for node in txnPoolNodeSet: + waitRejectWithReason(looper, steward1, + 'New stewards cannot be added by other ' + 'stewards as there are already {} ' + 'stewards in the system'.format(tconf.stewardThreshold), + node.clientstack.name) diff --git a/plenum/test/pool_transactions/test_change_ha_persists_post_nodes_restart.py b/plenum/test/pool_transactions/test_change_ha_persists_post_nodes_restart.py index 2f7f74e75d..a3590d9513 100644 --- a/plenum/test/pool_transactions/test_change_ha_persists_post_nodes_restart.py +++ b/plenum/test/pool_transactions/test_change_ha_persists_post_nodes_restart.py @@ -1,8 +1,9 @@ +from plenum.common.constants import ALIAS, NODE_IP, NODE_PORT, CLIENT_IP, CLIENT_PORT from stp_core.loop.eventually import eventually from stp_core.common.log import getlogger -from plenum.test.node_catchup.helper import waitNodeLedgersEquality, \ +from plenum.test.node_catchup.helper import waitNodeDataEquality, \ ensureClientConnectedToNodesAndPoolLedgerSame -from plenum.test.pool_transactions.helper import changeNodeHa, \ +from plenum.test.pool_transactions.helper import updateNodeData, \ buildPoolClientAndWallet from plenum.test.test_node import TestNode, checkNodesConnected from stp_core.network.port_dispenser import genHa @@ -25,8 +26,15 @@ def testChangeHaPersistsPostNodesRestart(looper, txnPoolNodeSet, clientNewHa)) # Making the change HA txn an confirming its succeeded - changeNodeHa(looper, newSteward, newStewardWallet, newNode, - nodeHa=nodeNewHa, clientHa=clientNewHa) + op = { + ALIAS: newNode.name, + NODE_IP: nodeNewHa.host, + NODE_PORT: nodeNewHa.port, + CLIENT_IP: clientNewHa.host, + CLIENT_PORT: clientNewHa.port, + } + updateNodeData(looper, newSteward, newStewardWallet, newNode, + op) # Stopping existing nodes for node in txnPoolNodeSet: @@ -50,7 +58,7 @@ def testChangeHaPersistsPostNodesRestart(looper, txnPoolNodeSet, restartedNodes.append(node) looper.run(checkNodesConnected(restartedNodes)) - waitNodeLedgersEquality(looper, node, *restartedNodes[:-1]) + waitNodeDataEquality(looper, node, *restartedNodes[:-1]) # Building a new client that reads from the genesis txn file # but is able to connect to all nodes diff --git a/plenum/test/pool_transactions/test_client_with_pool_txns.py b/plenum/test/pool_transactions/test_client_with_pool_txns.py index ff40ac1e2e..6e9dbc4490 100644 --- a/plenum/test/pool_transactions/test_client_with_pool_txns.py +++ b/plenum/test/pool_transactions/test_client_with_pool_txns.py @@ -58,7 +58,6 @@ def testClientConnectToRestartedNodes(looper, txnPoolNodeSet, tdirWithPoolTxns, poolTxnNodeNames, allPluginsPath): name = poolTxnClientNames[-1] - seed = poolTxnData["seeds"][name] newClient, w = genTestClient(tmpdir=tdirWithPoolTxns, nodes=txnPoolNodeSet, name=name, usePoolLedger=True) looper.add(newClient) @@ -83,7 +82,7 @@ def chk(): for node in txnPoolNodeSet: assert node.isParticipating - timeout = waits.expectedCatchupTime(len(txnPoolNodeSet)) + timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) looper.run(eventually(chk, retryWait=1, timeout=timeout)) bootstrapClientKeys(w.defaultId, w.getVerkey(), txnPoolNodeSet) @@ -93,4 +92,4 @@ def chk(): ensureClientConnectedToNodesAndPoolLedgerSame(looper, newClient, *txnPoolNodeSet) - sendReqsToNodesAndVerifySuffReplies(looper, w, newClient, 1, 1) + sendReqsToNodesAndVerifySuffReplies(looper, w, newClient, 3, 1) diff --git a/plenum/test/pool_transactions/test_multiple_clients.py b/plenum/test/pool_transactions/test_multiple_clients.py deleted file mode 100644 index 86e9bab231..0000000000 --- a/plenum/test/pool_transactions/test_multiple_clients.py +++ /dev/null @@ -1,41 +0,0 @@ -import os -import psutil - -import pytest -import zmq - -from stp_core.loop.eventually import eventually -from plenum.common.util import randomString -from plenum.test import waits -from plenum.test.pool_transactions.helper import addNewClient -from plenum.test.test_client import TestClient -from stp_core.network.port_dispenser import genHa - - -@pytest.mark.skip(reason='This is not a test') -def testMultipleClients(looper, txnPoolNodeSet, steward1, stewardWallet, - tdirWithPoolTxns): - n = txnPoolNodeSet[0] - n.nodestack.ctx.set(zmq.MAX_SOCKETS, 4096) - clientNum = 100 - pr = psutil.Process(os.getpid()) - print('Len connections before starting {}'.format(len(pr.connections()))) - for i in range(clientNum): - name = randomString() - wallet = addNewClient(None, looper, steward1, stewardWallet, - name) - - def chk(): - for node in txnPoolNodeSet: - assert wallet.defaultId in node.clientAuthNr.clients - - timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) - looper.run(eventually(chk, retryWait=1, timeout=timeout)) - newSteward = TestClient(name=name, - nodeReg=None, ha=genHa(), - basedirpath=tdirWithPoolTxns) - - looper.add(newSteward) - looper.run(newSteward.ensureConnectedToNodes()) - print('Connected {}'.format(i)) - print('Len connections {}'.format(len(pr.connections()))) diff --git a/plenum/test/pool_transactions/test_nodes_ha_change_back.py b/plenum/test/pool_transactions/test_nodes_ha_change_back.py new file mode 100644 index 0000000000..ea2fe0feb7 --- /dev/null +++ b/plenum/test/pool_transactions/test_nodes_ha_change_back.py @@ -0,0 +1,56 @@ +from plenum.common.constants import ALIAS, NODE_IP, NODE_PORT, CLIENT_IP, CLIENT_PORT +from plenum.test.pool_transactions.helper import updateNodeData +from plenum.test.test_node import TestNode, checkNodesConnected +from stp_core.network.port_dispenser import genHa + + +def testChangeNodeHaBack(looper, txnPoolNodeSet, tdirWithPoolTxns, + tconf, steward1, stewardWallet, nodeThetaAdded): + """ + The case: + The Node HA is updated with some HA (let's name it 'correct' HA). + Then the Steward makes a mistake and sends the NODE txn with other HA + ('wrong' HA). The Steward replaces back 'wrong' HA by 'correct' HA sending + yet another one NODE txn. + """ + + steward, stewardWallet, theta = nodeThetaAdded + clientHa = theta.cliNodeReg['ThetaC'] # use the same client HA + # do all exercises without the Node + theta.stop() + looper.removeProdable(name=theta.name) + + # step 1: set 'correct' HA + correctNodeHa = genHa(1) + op = { + ALIAS: theta.name, + NODE_IP: correctNodeHa.host, + NODE_PORT: correctNodeHa.port, + CLIENT_IP: clientHa.host, + CLIENT_PORT: clientHa.port, + } + updateNodeData(looper, steward, stewardWallet, theta, + op) + + # step 2: set 'wrong' HA + wrongNodeHa = genHa(1) + op.update({NODE_IP: wrongNodeHa.host, NODE_PORT: wrongNodeHa.port}) + updateNodeData(looper, steward, stewardWallet, theta, + op) + + # step 3: set 'correct' HA back + op.update({NODE_IP: correctNodeHa.host, NODE_PORT: correctNodeHa.port}) + updateNodeData(looper, steward, stewardWallet, theta, + op) + + # In order to save the time the pool connection is not maintaining + # during the steps, only the final result is checked. + restartedNode = TestNode(theta.name, basedirpath=tdirWithPoolTxns, + config=tconf, ha=correctNodeHa, cliha=clientHa) + looper.add(restartedNode) + txnPoolNodeSet[-1] = restartedNode + + looper.run(checkNodesConnected(txnPoolNodeSet)) + # check Theta HA + for n in txnPoolNodeSet: + assert n.nodeReg['Theta'] == correctNodeHa diff --git a/plenum/test/pool_transactions/test_nodes_with_pool_txns.py b/plenum/test/pool_transactions/test_nodes_with_pool_txns.py index 87e609a5e9..8b3344289a 100644 --- a/plenum/test/pool_transactions/test_nodes_with_pool_txns.py +++ b/plenum/test/pool_transactions/test_nodes_with_pool_txns.py @@ -1,8 +1,9 @@ +import itertools from copy import copy +import base58 import pytest -from plenum.common import util from plenum.common.keygen_utils import initNodeKeysForBothStacks from stp_core.network.port_dispenser import genHa from stp_core.types import HA @@ -10,15 +11,18 @@ from stp_core.loop.eventually import eventually from stp_core.common.log import getlogger from plenum.common.signer_simple import SimpleSigner -from plenum.common.constants import CLIENT_STACK_SUFFIX +from plenum.common.constants import * from plenum.common.util import getMaxFailures, randomString from plenum.test import waits from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, \ - checkReqNackWithReason -from plenum.test.node_catchup.helper import waitNodeLedgersEquality, \ + checkRejectWithReason, waitReqNackWithReason, waitRejectWithReason, \ + waitForSufficientRepliesForRequests, waitReqNackFromPoolWithReason, \ + waitRejectFromPoolWithReason +from plenum.test.node_catchup.helper import waitNodeDataEquality, \ ensureClientConnectedToNodesAndPoolLedgerSame from plenum.test.pool_transactions.helper import addNewClient, addNewNode, \ - changeNodeHa, addNewStewardAndNode, changeNodeKeys + updateNodeData, addNewStewardAndNode, changeNodeKeys, sendUpdateNode, \ + sendAddNewNode, updateNodeDataAndReconnect, addNewSteward from plenum.test.test_node import TestNode, checkNodesConnected, \ checkProtocolInstanceSetup @@ -60,25 +64,122 @@ def chk(): looper.run(eventually(chk, retryWait=1, timeout=timeout)) +def testStewardCannotAddNodeWithNonBase58VerKey(looper, tdir, + txnPoolNodeSet, + newAdHocSteward): + """ + The Case: + Steward accidentally sends the NODE txn with a non base58 verkey. + The expected result: + Steward gets NAck response from the pool. + """ + # create a new steward + newSteward, newStewardWallet = newAdHocSteward + + newNodeName = "Epsilon" + + # get hex VerKey + sigseed = randomString(32).encode() + nodeSigner = SimpleSigner(seed=sigseed) + b = base58.b58decode(nodeSigner.identifier) + hexVerKey = bytearray(b).hex() + + def _setHexVerkey(op): + op[TARGET_NYM] = hexVerKey + return op + + sendAddNewNode(newNodeName, newSteward, newStewardWallet, + transformOpFunc=_setHexVerkey) + waitReqNackFromPoolWithReason(looper, txnPoolNodeSet, newSteward, + 'is not a base58 string') + + +def testStewardCannotAddNodeWithInvalidHa(looper, tdir, + txnPoolNodeSet, + newAdHocSteward): + """ + The case: + Steward accidentally sends the NODE txn with an invalid HA. + The expected result: + Steward gets NAck response from the pool. + """ + newNodeName = "Epsilon" + + newSteward, newStewardWallet = newAdHocSteward + + # a sequence of the test cases for each field + tests = itertools.chain( + itertools.product( + (NODE_IP, CLIENT_IP), ('127.0.0.1 ', '256.0.0.1', '0.0.0.0') + ), + itertools.product( + (NODE_PORT, CLIENT_PORT), ('foo', '9700', 0, 65535 + 1, 4351683546843518184) + ), + ) + + for field, value in tests: + # create a transform function for each test + def _tnf(op): op[DATA].update({field: value}) + sendAddNewNode(newNodeName, newSteward, newStewardWallet, + transformOpFunc=_tnf) + # wait NAcks with exact message. it does not works for just 'is invalid' + # because the 'is invalid' will check only first few cases + waitReqNackFromPoolWithReason(looper, txnPoolNodeSet, newSteward, + "'{}' ('{}') is invalid".format(field, value)) + + +def testStewardCannotAddNodeWithOutFullFieldsSet(looper, tdir, + txnPoolNodeSet, + newAdHocSteward): + """ + The case: + Steward accidentally sends the NODE txn without full fields set. + The expected result: + Steward gets NAck response from the pool. + """ + newNodeName = "Epsilon" + + newSteward, newStewardWallet = newAdHocSteward + + # case from the ticket + def _renameNodePortField(op): + op[DATA].update({NODE_PORT + ' ': op[DATA][NODE_PORT]}) + del op[DATA][NODE_PORT] + + sendAddNewNode(newNodeName, newSteward, newStewardWallet, + transformOpFunc=_renameNodePortField) + waitReqNackFromPoolWithReason(looper, txnPoolNodeSet, newSteward, + "unknown field") + + for fn in (NODE_IP, CLIENT_IP, NODE_PORT, CLIENT_PORT): + def _tnf(op): del op[DATA][fn] + sendAddNewNode(newNodeName, newSteward, newStewardWallet, + transformOpFunc=_tnf) + # wait NAcks with exact message. it does not works for just 'is missed' + # because the 'is missed' will check only first few cases + waitReqNackFromPoolWithReason(looper, txnPoolNodeSet, newSteward, + "unknown field") + + def testStewardCannotAddMoreThanOneNode(looper, txnPoolNodeSet, steward1, stewardWallet, tdirWithPoolTxns, tconf, allPluginsPath): newNodeName = "Epsilon" - with pytest.raises(AssertionError): - addNewNode(looper, steward1, stewardWallet, newNodeName, - tdirWithPoolTxns, tconf, allPluginsPath) + sendAddNewNode(newNodeName, steward1, stewardWallet) + + for node in txnPoolNodeSet: + waitRejectWithReason(looper, steward1, + 'already has a node', + node.clientstack.name) def testNonStewardCannotAddNode(looper, txnPoolNodeSet, client1, wallet1, client1Connected, tdirWithPoolTxns, tconf, allPluginsPath): newNodeName = "Epsilon" - with pytest.raises(AssertionError): - addNewNode(looper, client1, wallet1, newNodeName, - tdirWithPoolTxns, tconf, allPluginsPath) - + sendAddNewNode(newNodeName, client1, wallet1) for node in txnPoolNodeSet: - checkReqNackWithReason(client1, 'is not a steward so cannot add a ' + waitRejectWithReason(looper, client1, 'is not a steward so cannot add a ' 'new node', node.clientstack.name) @@ -103,8 +204,7 @@ def chkNodeRegRecvd(): assert (len(steward1.nodeReg) - len(oldNodeReg)) == 1 assert (newNode.name + CLIENT_STACK_SUFFIX) in steward1.nodeReg - fVal = util.getMaxFailures(len(txnPoolNodeSet)) - timeout = waits.expectedClientConnectionTimeout(fVal) + timeout = waits.expectedClientToPoolConnectionTimeout(len(txnPoolNodeSet)) looper.run(eventually(chkNodeRegRecvd, retryWait=1, timeout=timeout)) ensureClientConnectedToNodesAndPoolLedgerSame(looper, steward1, *txnPoolNodeSet) @@ -128,9 +228,9 @@ def testAdd2NewNodes(looper, txnPoolNodeSet, tdirWithPoolTxns, tconf, steward1, tconf, allPluginsPath) txnPoolNodeSet.append(newNode) - looper.run(checkNodesConnected(txnPoolNodeSet)) - logger.debug("{} connected to the pool".format(newNode)) - waitNodeLedgersEquality(looper, newNode, *txnPoolNodeSet[:-1]) + looper.run(checkNodesConnected(txnPoolNodeSet)) + logger.debug("{} connected to the pool".format(newNode)) + waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1]) f = getMaxFailures(len(txnPoolNodeSet)) @@ -139,7 +239,7 @@ def checkFValue(): assert node.f == f assert len(node.replicas) == (f + 1) - timeout = waits.expectedClientConnectionTimeout(f) + timeout = waits.expectedClientToPoolConnectionTimeout(len(txnPoolNodeSet)) looper.run(eventually(checkFValue, retryWait=1, timeout=timeout)) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) @@ -149,16 +249,42 @@ def testNodePortCannotBeChangedByAnotherSteward(looper, txnPoolNodeSet, steward1, stewardWallet, nodeThetaAdded): _, _, newNode = nodeThetaAdded - nodeNewHa, clientNewHa = genHa(2) - logger.debug('{} changing HAs to {} {}'.format(newNode, nodeNewHa, - clientNewHa)) - with pytest.raises(AssertionError): - changeNodeHa(looper, steward1, stewardWallet, newNode, - nodeHa=nodeNewHa, clientHa=clientNewHa) + nodeNewHa = genHa(1) + new_port = nodeNewHa.port + node_ha = txnPoolNodeSet[0].nodeReg[newNode.name] + cli_ha = txnPoolNodeSet[0].cliNodeReg[newNode.name + CLIENT_STACK_SUFFIX] + node_data = { + ALIAS: newNode.name, + NODE_PORT: new_port, + NODE_IP: node_ha.host, + CLIENT_PORT: cli_ha.port, + CLIENT_IP: cli_ha.host, + } + + logger.debug('{} changing port to {} {}'.format(newNode, new_port, + newNode.nodestack.ha.port)) + sendUpdateNode(steward1, stewardWallet, newNode, + node_data) for node in txnPoolNodeSet: - checkReqNackWithReason(steward1, 'is not a steward of node', - node.clientstack.name) + waitRejectWithReason(looper, steward1, 'is not a steward of node', + node.clientstack.name) + + +def test_node_alias_cannot_be_changed(looper, txnPoolNodeSet, + tdirWithPoolTxns, + tconf, nodeThetaAdded): + """ + The node alias cannot be changed. + """ + newSteward, newStewardWallet, newNode = nodeThetaAdded + node_data = {ALIAS: 'foo'} + sendUpdateNode(newSteward, newStewardWallet, newNode, + node_data) + for node in txnPoolNodeSet: + waitRejectWithReason(looper, newSteward, + 'data has conflicts with request data', + node.clientstack.name) def testNodePortChanged(looper, txnPoolNodeSet, tdirWithPoolTxns, @@ -167,24 +293,26 @@ def testNodePortChanged(looper, txnPoolNodeSet, tdirWithPoolTxns, An running node's port is changed """ newSteward, newStewardWallet, newNode = nodeThetaAdded - nodeNewHa, clientNewHa = genHa(2) - logger.debug("{} changing HAs to {} {}".format(newNode, nodeNewHa, - clientNewHa)) - changeNodeHa(looper, newSteward, newStewardWallet, newNode, - nodeHa=nodeNewHa, clientHa=clientNewHa) - newNode.stop() - looper.removeProdable(name=newNode.name) - logger.debug("{} starting with HAs {} {}".format(newNode, nodeNewHa, - clientNewHa)) - node = TestNode(newNode.name, basedirpath=tdirWithPoolTxns, config=tconf, - ha=nodeNewHa, cliha=clientNewHa) - looper.add(node) - # The last element of `txnPoolNodeSet` is the node Theta that was just - # stopped - txnPoolNodeSet[-1] = node - looper.run(checkNodesConnected(txnPoolNodeSet)) - - waitNodeLedgersEquality(looper, node, *txnPoolNodeSet[:-1]) + nodeNewHa = genHa(1) + new_port = nodeNewHa.port + + node_ha = txnPoolNodeSet[0].nodeReg[newNode.name] + cli_ha = txnPoolNodeSet[0].cliNodeReg[newNode.name + CLIENT_STACK_SUFFIX] + node_data = { + ALIAS: newNode.name, + NODE_PORT: new_port, + NODE_IP: node_ha.host, + CLIENT_PORT: cli_ha.port, + CLIENT_IP: cli_ha.host, + } + + node = updateNodeDataAndReconnect(looper, newSteward, + newStewardWallet, newNode, + node_data, + tdirWithPoolTxns, tconf, + txnPoolNodeSet) + + waitNodeDataEquality(looper, node, *txnPoolNodeSet[:-1]) ensureClientConnectedToNodesAndPoolLedgerSame(looper, steward1, *txnPoolNodeSet) @@ -192,7 +320,6 @@ def testNodePortChanged(looper, txnPoolNodeSet, tdirWithPoolTxns, *txnPoolNodeSet) -@pytest.mark.skip(reason="SOV-881") def testNodeKeysChanged(looper, txnPoolNodeSet, tdirWithPoolTxns, tconf, steward1, nodeThetaAdded, allPluginsPath=None): @@ -209,7 +336,7 @@ def testNodeKeysChanged(looper, txnPoolNodeSet, tdirWithPoolTxns, verkey = SimpleSigner(seed=sigseed).naclSigner.verhex.decode() changeNodeKeys(looper, newSteward, newStewardWallet, newNode, verkey) initNodeKeysForBothStacks(newNode.name, tdirWithPoolTxns, sigseed, - override=True) + override=True) logger.debug("{} starting with HAs {} {}".format(newNode, nodeHa, nodeCHa)) node = TestNode(newNode.name, basedirpath=tdirWithPoolTxns, config=tconf, @@ -219,8 +346,10 @@ def testNodeKeysChanged(looper, txnPoolNodeSet, tdirWithPoolTxns, # stopped txnPoolNodeSet[-1] = node looper.run(checkNodesConnected(txnPoolNodeSet)) - waitNodeLedgersEquality(looper, node, *txnPoolNodeSet[:-1]) + waitNodeDataEquality(looper, node, *txnPoolNodeSet[:-1]) ensureClientConnectedToNodesAndPoolLedgerSame(looper, steward1, *txnPoolNodeSet) ensureClientConnectedToNodesAndPoolLedgerSame(looper, newSteward, *txnPoolNodeSet) + + diff --git a/plenum/test/primary_election/helpers.py b/plenum/test/primary_election/helpers.py index 00eea89aac..4532aebefa 100644 --- a/plenum/test/primary_election/helpers.py +++ b/plenum/test/primary_election/helpers.py @@ -1,3 +1,4 @@ +from plenum.common.types import Nomination, Primary from plenum.server.replica import Replica from plenum.test.test_node import TestNode @@ -6,7 +7,7 @@ def checkNomination(node: TestNode, nomineeName: str): matches = [replica.name for instId, replica in enumerate(node.elector.replicas) if node.elector.didReplicaNominate(instId) is True and replica.name in node.elector.nominations[instId] and - node.elector.nominations[instId][replica.name] == + node.elector.nominations[instId][replica.name][0] == Replica.generateName(nomineeName, instId)] assert len(matches) > 0 return matches[0] @@ -21,5 +22,15 @@ def getSelfNominationByNode(node: TestNode) -> int: """ for instId, replica in enumerate(node.elector.replicas): name = Replica.generateName(node.name, instId) - if node.elector.nominations.get(instId, {}).get(name, None) == name: - return instId \ No newline at end of file + if node.elector.nominations.get(instId, {}).get(name, [None, ])[0] == name: + return instId + + +def nominationByNode(name: str, byNode: TestNode, instId: int): + return Nomination(name, instId, byNode.viewNo, + byNode.replicas[instId].lastOrderedPPSeqNo) + + +def primaryByNode(name: str, byNode: TestNode, instId: int): + return Primary(name, instId, byNode.viewNo, + byNode.replicas[instId].lastOrderedPPSeqNo) diff --git a/plenum/test/primary_election/test_primary_election_case1.py b/plenum/test/primary_election/test_primary_election_case1.py index 42d23096cc..7fd4118963 100644 --- a/plenum/test/primary_election/test_primary_election_case1.py +++ b/plenum/test/primary_election/test_primary_election_case1.py @@ -8,7 +8,7 @@ from plenum.test.delayers import delayerMsgTuple from plenum.test.helper import whitelistNode from plenum.test.primary_election.helpers import checkNomination, \ - getSelfNominationByNode + getSelfNominationByNode, nominationByNode from plenum.test.test_node import TestNodeSet, checkNodesConnected, \ ensureElectionsDone from plenum.test import waits @@ -20,6 +20,7 @@ logger = getlogger() +delayOfNomination = 5 @pytest.fixture() def case1Setup(startedNodes: TestNodeSet): @@ -31,7 +32,7 @@ def case1Setup(startedNodes: TestNodeSet): # Node B delays self nomination so A's nomination reaches everyone nodeB.delaySelfNomination(10) # Node B delays nomination from all nodes - nodeB.nodeIbStasher.delay(delayerMsgTuple(5, Nomination)) + nodeB.nodeIbStasher.delay(delayerMsgTuple(delayOfNomination, Nomination)) # Add node C and node D nodeC = nodes.getNode(nodeNames[2]) @@ -68,9 +69,9 @@ def testPrimaryElectionCase1(case1Setup, looper, keySharedNodes): # Doesn't matter if nodes reach the ready state or not. Just start them looper.run(checkNodesConnected(nodes)) - # Node B sends multiple NOMINATE msgs for Node D but only after A has + # Node B sends multiple NOMINATE messages for Node D but only after A has # nominated itself - timeout = waits.expectedNominationTimeout(nodeCount=1) + timeout = waits.expectedPoolNominationTimeout(nodeCount=1) looper.run(eventually(checkNomination, nodeA, nodeA.name, retryWait=.25, timeout=timeout)) @@ -78,7 +79,8 @@ def testPrimaryElectionCase1(case1Setup, looper, keySharedNodes): instId = getSelfNominationByNode(nodeA) for i in range(5): - nodeB.send(Nomination(nodeD.name, instId, nodeB.viewNo)) + # nodeB.send(Nomination(nodeD.name, instId, nodeB.viewNo)) + nodeB.send(nominationByNode(nodeD.name, nodeB, instId)) nodeB.nodestack.flushOutBoxes() # No node from node A, node C, node D(node B is malicious anyway so not @@ -86,11 +88,13 @@ def testPrimaryElectionCase1(case1Setup, looper, keySharedNodes): # node D is slow. The one nomination for D, that nodes A, C # and D might have would be because of node B for node in [nodeA, nodeC, nodeD]: - assert list(node.elector.nominations[instId].values()).count( + assert [n[0] for n in node.elector.nominations[instId].values()].count( Replica.generateName(nodeD.name, instId)) \ <= 1 - primaryReplicas = ensureElectionsDone(looper=looper, nodes=nodes) + timeout = waits.expectedPoolElectionTimeout(nodeCount) + delayOfNomination + primaryReplicas = ensureElectionsDone(looper=looper, + nodes=nodes, customTimeout=timeout) for node in nodes: logger.debug( diff --git a/plenum/test/primary_election/test_primary_election_case2.py b/plenum/test/primary_election/test_primary_election_case2.py index 0b2162c8c8..ef3f8917bf 100644 --- a/plenum/test/primary_election/test_primary_election_case2.py +++ b/plenum/test/primary_election/test_primary_election_case2.py @@ -6,7 +6,7 @@ from plenum.server.suspicion_codes import Suspicions from plenum.test.delayers import delayerMsgTuple from plenum.test.primary_election.helpers import checkNomination, \ - getSelfNominationByNode + getSelfNominationByNode, nominationByNode from plenum.test.test_node import TestNodeSet, checkNodesConnected, \ ensureElectionsDone from plenum.test import waits @@ -51,7 +51,7 @@ def testPrimaryElectionCase2(case2Setup, looper, keySharedNodes): looper.run(checkNodesConnected(nodeSet)) # Node B sends multiple NOMINATE msgs but only after A has nominated itself - timeout = waits.expectedNominationTimeout(len(nodeSet)) + timeout = waits.expectedPoolNominationTimeout(len(nodeSet)) looper.run(eventually(checkNomination, A, A.name, retryWait=.25, timeout=timeout)) @@ -62,9 +62,11 @@ def testPrimaryElectionCase2(case2Setup, looper, keySharedNodes): DRep = Replica.generateName(D.name, instId) # Node B first sends NOMINATE msgs for Node C to all nodes - B.send(Nomination(CRep, instId, B.viewNo)) + # B.send(Nomination(CRep, instId, B.viewNo)) + B.send(nominationByNode(CRep, B, instId)) # Node B sends NOMINATE msgs for Node D to all nodes - B.send(Nomination(DRep, instId, B.viewNo)) + # B.send(Nomination(DRep, instId, B.viewNo)) + B.send(nominationByNode(DRep, B, instId)) # Ensure elections are done ensureElectionsDone(looper=looper, nodes=nodeSet) @@ -73,4 +75,4 @@ def testPrimaryElectionCase2(case2Setup, looper, keySharedNodes): # not considering it) should have nomination for node C from node B since # node B first nominated node C for node in [A, C, D]: - assert node.elector.nominations[instId][BRep] == CRep + assert node.elector.nominations[instId][BRep][0] == CRep diff --git a/plenum/test/primary_election/test_primary_election_case4.py b/plenum/test/primary_election/test_primary_election_case4.py index e52a54fe2f..6a3227723b 100644 --- a/plenum/test/primary_election/test_primary_election_case4.py +++ b/plenum/test/primary_election/test_primary_election_case4.py @@ -4,6 +4,7 @@ from plenum.common.types import Primary from plenum.server.suspicion_codes import Suspicions from plenum.test import waits +from plenum.test.primary_election.helpers import primaryByNode from plenum.test.test_node import TestNodeSet, checkNodesConnected, \ ensureElectionsDone @@ -13,6 +14,8 @@ 'doing nothing for now', 'know how to handle it'] +delaySelfNomination = 5 + @pytest.fixture() def case4Setup(keySharedNodes: TestNodeSet): @@ -22,7 +25,7 @@ def case4Setup(keySharedNodes: TestNodeSet): # Delay each of the nodes A, B and C's self nomination so Node B gets to # declare a primary before a primary is selected for n in (A, B, C): - n.delaySelfNomination(5) + n.delaySelfNomination(delaySelfNomination) # Node D is slow so it nominates itself after long time D.delaySelfNomination(25) @@ -51,21 +54,26 @@ def testPrimaryElectionCase4(case4Setup, looper): # Node B sends multiple declarations of node D's 0th protocol instance as # primary to all nodes for i in range(5): - B.send(Primary(D.name, 0, B.viewNo)) + # B.send(Primary(D.name, 0, B.viewNo)) + B.send(primaryByNode(D.name, B, 0)) # No node from node A, node C, node D(node B is malicious anyway so not # considering it) should have more than one primary declaration for node # D since node D is slow. The one primary declaration for node D, # that nodes A, C and D might have would be because of node B def x(): - primDecs = list(node.elector.primaryDeclarations[0].values()) + primDecs = [p[0] for p in node.elector.primaryDeclarations[0].values()] assert primDecs.count(D.name) <= 1 - timeout = waits.expectedNominationTimeout(len(allNodes)) + # also have to take into account the catchup procedure + timeout = waits.expectedPoolNominationTimeout(len(allNodes)) + \ + waits.expectedPoolCatchupTime(len(allNodes)) + for node in (A, C, D): looper.run(eventually(x, retryWait=.5, timeout=timeout)) - ensureElectionsDone(looper=looper, nodes=allNodes) + timeout = waits.expectedPoolElectionTimeout(len(allNodes)) + delaySelfNomination + ensureElectionsDone(looper=looper, nodes=allNodes, customTimeout=timeout) # Node D should not have any primary replica assert not D.hasPrimary diff --git a/plenum/test/primary_election/test_primary_election_case5.py b/plenum/test/primary_election/test_primary_election_case5.py index 9a24a9ffac..af71d04fc3 100644 --- a/plenum/test/primary_election/test_primary_election_case5.py +++ b/plenum/test/primary_election/test_primary_election_case5.py @@ -2,10 +2,12 @@ import pytest from plenum.common.types import Primary, Nomination +from plenum.test import waits from stp_core.common.log import getlogger from plenum.server.replica import Replica from plenum.server.suspicion_codes import Suspicions +from plenum.test.primary_election.helpers import primaryByNode from plenum.test.test_node import TestNodeSet, checkNodesConnected, \ ensureElectionsDone from plenum.test.delayers import delayerMsgTuple @@ -19,6 +21,9 @@ logger = getlogger() +# the total delay of election done +delayOfElectionDone = 20 + @pytest.fixture() def case5Setup(startedNodes: TestNodeSet): @@ -37,7 +42,7 @@ def case5Setup(startedNodes: TestNodeSet): node.whitelistNode(B.name, Suspicions.DUPLICATE_PRI_SENT.code) for node in [A, C, D]: - B.nodeIbStasher.delay(delayerMsgTuple(30, + B.nodeIbStasher.delay(delayerMsgTuple(delayOfElectionDone, Nomination, senderFilter=node.name, instFilter=0)) @@ -70,12 +75,18 @@ def testPrimaryElectionCase5(case5Setup, looper, keySharedNodes): DRep = Replica.generateName(D.name, 0) # Node B first sends PRIMARY msgs for Node C to all nodes - B.send(Primary(CRep, 0, B.viewNo)) + # B.send(Primary(CRep, 0, B.viewNo)) + B.send(primaryByNode(CRep, B, 0)) # Node B sends PRIMARY msgs for Node D to all nodes - B.send(Primary(DRep, 0, B.viewNo)) + # B.send(Primary(DRep, 0, B.viewNo)) + B.send(primaryByNode(DRep, B, 0)) # Ensure elections are done - ensureElectionsDone(looper=looper, nodes=nodeSet) + # also have to take into account the catchup procedure + timeout = waits.expectedPoolElectionTimeout(len(nodeSet)) + \ + waits.expectedPoolCatchupTime(len(nodeSet)) + \ + delayOfElectionDone + ensureElectionsDone(looper=looper, nodes=nodeSet, customTimeout=timeout) # All nodes from node A, node C, node D(node B is malicious anyway so not # considering it) should have primary declarations for node C from node B @@ -84,4 +95,4 @@ def testPrimaryElectionCase5(case5Setup, looper, keySharedNodes): logger.debug( "node {} should have primary declaration for C from node B" .format(node)) - assert node.elector.primaryDeclarations[0][BRep] == CRep + assert node.elector.primaryDeclarations[0][BRep][0] == CRep diff --git a/plenum/test/primary_election/test_primary_election_contested.py b/plenum/test/primary_election/test_primary_election_contested.py index 6c59af4658..6c9d186730 100644 --- a/plenum/test/primary_election/test_primary_election_contested.py +++ b/plenum/test/primary_election/test_primary_election_contested.py @@ -55,7 +55,7 @@ def testPrimaryElectionContested(electContFixture, looper, keySharedNodes): checkPoolReady(looper, nodeSet) logger.debug("Check nomination") - timeout = waits.expectedNominationTimeout(nodeCount) + timeout = waits.expectedPoolNominationTimeout(nodeCount) # Checking whether Node A nominated itself looper.run(eventually(checkNomination, A, A.name, diff --git a/plenum/test/primary_election/test_primary_election_with_clear_winner.py b/plenum/test/primary_election/test_primary_election_with_clear_winner.py index bbba0d4c08..099079758a 100644 --- a/plenum/test/primary_election/test_primary_election_with_clear_winner.py +++ b/plenum/test/primary_election/test_primary_election_with_clear_winner.py @@ -58,10 +58,10 @@ def testPrimaryElectionWithAClearWinner(electContFixture, looper, keySharedNodes checkPoolReady(looper, nodeSet) # Checking whether one of the replicas of Node A nominated itself - timeout = waits.expectedNominationTimeout(len(nodeSet)) + timeout = waits.expectedPoolNominationTimeout(len(nodeSet)) looper.run(eventually(checkNomination, A, A.name, retryWait=1, timeout=timeout)) - timeout = waits.expectedNominationTimeout(len(nodeSet)) + timeout = waits.expectedPoolNominationTimeout(len(nodeSet)) for n in nodesBCD: # Checking whether Node B, C and D nominated Node A looper.run(eventually(checkNomination, n, A.name, diff --git a/plenum/test/primary_election/test_primary_election_with_tie.py b/plenum/test/primary_election/test_primary_election_with_tie.py index 81372fe560..cd0713e044 100644 --- a/plenum/test/primary_election/test_primary_election_with_tie.py +++ b/plenum/test/primary_election/test_primary_election_with_tie.py @@ -68,7 +68,7 @@ def testPrimaryElectionWithTie(electTieFixture, looper, keySharedNodes): format(replica.name, replica.instId, node.elector.nominations.get(instId, {}))) - nominationTimeout = waits.expectedNominationTimeout(len(nodeSet)) + nominationTimeout = waits.expectedPoolNominationTimeout(len(nodeSet)) logger.debug("Check nomination") # Checking whether Node A nominated itself looper.run(eventually(checkNomination, A, A.name, diff --git a/plenum/test/primary_selection/test_primary_selection.py b/plenum/test/primary_selection/test_primary_selection.py index 09f796b47c..823f50b43d 100644 --- a/plenum/test/primary_selection/test_primary_selection.py +++ b/plenum/test/primary_selection/test_primary_selection.py @@ -57,7 +57,7 @@ def checkPrimaryPlacement(): assert node.replicas[2].isPrimary # Check if the primary is on the correct node - timeout = waits.expectedElectionTimeout(len(nodeSet)) + timeout = waits.expectedPoolElectionTimeout(len(nodeSet)) looper.run(eventually(checkPrimaryPlacement, retryWait=1, timeout=timeout)) # Check if every protocol instance has one and only one primary and any node # has no more than one primary diff --git a/plenum/test/propagate/test_propagate_recvd_before_request.py b/plenum/test/propagate/test_propagate_recvd_before_request.py index ac834ea3b6..3e9e4b173e 100644 --- a/plenum/test/propagate/test_propagate_recvd_before_request.py +++ b/plenum/test/propagate/test_propagate_recvd_before_request.py @@ -31,7 +31,7 @@ def x(): # A should have sent only one PROPAGATE assert len(sentPropagate(A)) == 1 - timeout = delaySec - 2 + timeout = waits.expectedNodeToNodeMessageDeliveryTime() + delaySec - 2 looper.run(eventually(x, retryWait=.5, timeout=timeout)) def y(): @@ -40,12 +40,12 @@ def y(): # A should still have sent only one PROPAGATE assert len(sentPropagate(A)) == 1 - timeout = delaySec + 2 + timeout = waits.expectedNodeToNodeMessageDeliveryTime() + delaySec + 2 looper.run(eventually(y, retryWait=.5, timeout=timeout)) def chk(): # A should have forwarded the request assertLength(forwardedRequest(A), 1) - timeout = waits.expectedClientRequestPropagationTime(len(nodeSet)) + timeout = waits.expectedClientRequestPropagationTime(len(nodeSet)) + delaySec looper.run(eventually(chk, retryWait=1, timeout=timeout)) diff --git a/plenum/test/replica/test_primary_marked_suspicious_for_sending_prepare.py b/plenum/test/replica/test_primary_marked_suspicious_for_sending_prepare.py index f24163bf8b..0601c8b16e 100644 --- a/plenum/test/replica/test_primary_marked_suspicious_for_sending_prepare.py +++ b/plenum/test/replica/test_primary_marked_suspicious_for_sending_prepare.py @@ -16,11 +16,13 @@ def testPrimarySendsAPrepareAndMarkedSuspicious(looper, nodeSet, preprepared1): def sendPrepareFromPrimary(instId): primary = getPrimaryReplica(nodeSet, instId) viewNo, ppSeqNo = next(iter(primary.sentPrePrepares.keys())) + ppReq = primary.sentPrePrepares[viewNo, ppSeqNo] prepare = Prepare(instId, viewNo, ppSeqNo, - preprepared1.digest, - time.time()) + ppReq.digest, + ppReq.stateRootHash, + ppReq.txnRootHash) primary.doPrepare(prepare) def chk(): diff --git a/plenum/test/replica/test_replica_reject_same_pre_prepare.py b/plenum/test/replica/test_replica_reject_same_pre_prepare.py index 68bcb19bcf..110d3b504a 100644 --- a/plenum/test/replica/test_replica_reject_same_pre_prepare.py +++ b/plenum/test/replica/test_replica_reject_same_pre_prepare.py @@ -2,9 +2,11 @@ import pytest +from plenum.test.spy_helpers import getAllArgs, getAllReturnVals from stp_core.loop.eventually import eventually from stp_core.common.log import getlogger from plenum.common.types import PrePrepare +from plenum.common.constants import DOMAIN_LEDGER_ID from plenum.common.util import getMaxFailures from plenum.test import waits from plenum.test.helper import checkPrePrepareReqSent, \ @@ -52,14 +54,18 @@ def testReplicasRejectSamePrePrepareMsg(looper, nodeSet, client1, wallet1): nonPrimaryReplicas = getNonPrimaryReplicas(nodeSet) logger.debug("Non Primary Replicas: " + str(nonPrimaryReplicas)) + reqIdr = [(request2.identifier, request2.reqId)] prePrepareReq = PrePrepare( primaryRepl.instId, primaryRepl.viewNo, primaryRepl.lastPrePrepareSeqNo, - wallet1.defaultId, - request2.reqId, - request2.digest, - time.time() + time.time(), + reqIdr, + 1, + primaryRepl.batchDigest([request2]), + DOMAIN_LEDGER_ID, + primaryRepl.stateRootHash(DOMAIN_LEDGER_ID), + primaryRepl.txnRootHash(DOMAIN_LEDGER_ID) ) logger.debug("""Checking whether all the non primary replicas have received @@ -73,12 +79,15 @@ def testReplicasRejectSamePrePrepareMsg(looper, nodeSet, client1, wallet1): logger.debug("""Check that none of the non primary replicas didn't send any prepare message " in response to the pre-prepare message""") - timeout = waits.expectedPrePrepareTime(len(nodeSet)) + timeout = waits.expectedPrepareTime(len(nodeSet)) + looper.runFor(timeout) # expect prepare processing timeout + + # check if prepares have not been sent for npr in nonPrimaryReplicas: with pytest.raises(AssertionError): looper.run(eventually(checkPrepareReqSent, npr, - wallet1.defaultId, + request2.identifier, request2.reqId, retryWait=1, timeout=timeout)) diff --git a/plenum/test/script/helper.py b/plenum/test/script/helper.py index c9952b2a50..db743f6137 100644 --- a/plenum/test/script/helper.py +++ b/plenum/test/script/helper.py @@ -20,32 +20,11 @@ logger = getlogger() -@pytest.fixture(scope="module") -def tconf(tconf, request): - oldVal = tconf.UpdateGenesisPoolTxnFile - tconf.UpdateGenesisPoolTxnFile = True - - def reset(): - tconf.UpdateGenesisPoolTxnFile = oldVal - - request.addfinalizer(reset) - return tconf - - @pytest.yield_fixture(scope="module") def looper(txnPoolNodesLooper): yield txnPoolNodesLooper -def checkIfGenesisPoolTxnFileUpdated(*nodesAndClients): - for item in nodesAndClients: - poolTxnFileName = item.poolManager.ledgerFile if \ - isinstance(item, TestNode) else item.ledgerFile - genFile = os.path.join(item.basedirpath, poolTxnFileName) - ledgerFile = os.path.join(item.dataLocation, poolTxnFileName) - assert filecmp.cmp(genFile, ledgerFile, shallow=False) - - def changeNodeHa(looper, txnPoolNodeSet, tdirWithPoolTxns, poolTxnData, poolTxnStewardNames, tconf, shouldBePrimary): @@ -55,12 +34,11 @@ def changeNodeHa(looper, txnPoolNodeSet, tdirWithPoolTxns, stewardsSeed = None for nodeIndex, n in enumerate(txnPoolNodeSet): - if (shouldBePrimary and n.primaryReplicaNo == 0) or \ - (not shouldBePrimary and n.primaryReplicaNo != 0): - subjectedNode = n - stewardName = poolTxnStewardNames[nodeIndex] - stewardsSeed = poolTxnData["seeds"][stewardName].encode() - break + if shouldBePrimary == (n.primaryReplicaNo == 0): + subjectedNode = n + stewardName = poolTxnStewardNames[nodeIndex] + stewardsSeed = poolTxnData["seeds"][stewardName].encode() + break nodeStackNewHA, clientStackNewHA = genHa(2) logger.debug("change HA for node: {} to {}". @@ -84,11 +62,16 @@ def changeNodeHa(looper, txnPoolNodeSet, tdirWithPoolTxns, config=tconf, ha=nodeStackNewHA, cliha=clientStackNewHA) looper.add(restartedNode) - txnPoolNodeSet[nodeIndex] = restartedNode - looper.run(checkNodesConnected(txnPoolNodeSet, customTimeout=70)) - ensureElectionsDone(looper, txnPoolNodeSet, retryWait=1) + + electionTimeout = waits.expectedPoolElectionTimeout( + nodeCount=len(txnPoolNodeSet), + numOfReelections=3) + ensureElectionsDone(looper, + txnPoolNodeSet, + retryWait=1, + customTimeout=electionTimeout) # start client and check the node HA anotherClient, _ = genTestClient(tmpdir=tdirWithPoolTxns, @@ -98,7 +81,3 @@ def changeNodeHa(looper, txnPoolNodeSet, tdirWithPoolTxns, stewardWallet = Wallet(stewardName) stewardWallet.addIdentifier(signer=SimpleSigner(seed=stewardsSeed)) sendReqsToNodesAndVerifySuffReplies(looper, stewardWallet, stewardClient, 8) - timeout = waits.expectedPoolLedgerCheck(len(txnPoolNodeSet) + 1) - looper.run(eventually(checkIfGenesisPoolTxnFileUpdated, *txnPoolNodeSet, - stewardClient, anotherClient, retryWait=1, - timeout=timeout)) diff --git a/plenum/test/script/test_change_non_primary_node_ha.py b/plenum/test/script/test_change_non_primary_node_ha.py index 8d014e58b4..d0e6933fad 100644 --- a/plenum/test/script/test_change_non_primary_node_ha.py +++ b/plenum/test/script/test_change_non_primary_node_ha.py @@ -1,6 +1,6 @@ import pytest -from plenum.test.script.helper import looper, tconf +from plenum.test.script.helper import looper from stp_core.common.log import getlogger from plenum.test.script.helper import changeNodeHa @@ -14,8 +14,13 @@ @pytest.mark.skipif('sys.platform == "win32"', reason='SOV-330') -@pytest.mark.skip(reason="SOV-941") def testChangeNodeHaForNonPrimary(looper, txnPoolNodeSet, tdirWithPoolTxns, poolTxnData, poolTxnStewardNames, tconf): - changeNodeHa(looper, txnPoolNodeSet, tdirWithPoolTxns, - poolTxnData, poolTxnStewardNames, tconf, shouldBePrimary=False) + + changeNodeHa(looper, + txnPoolNodeSet, + tdirWithPoolTxns, + poolTxnData, + poolTxnStewardNames, + tconf, + shouldBePrimary=False) diff --git a/plenum/test/script/test_change_primary_node_ha.py b/plenum/test/script/test_change_primary_node_ha.py index 4288a1ddcb..4dbb655b45 100644 --- a/plenum/test/script/test_change_primary_node_ha.py +++ b/plenum/test/script/test_change_primary_node_ha.py @@ -1,6 +1,6 @@ import pytest -from plenum.test.script.helper import looper, tconf +from plenum.test.script.helper import looper from stp_core.common.log import getlogger from plenum.test.script.helper import changeNodeHa @@ -13,8 +13,13 @@ 'got error while verifying message'] -@pytest.mark.skip(reason='SOV-330') +@pytest.mark.skipif('sys.platform == "win32"', reason='SOV-330') def testChangeNodeHaForPrimary(looper, txnPoolNodeSet, tdirWithPoolTxns, poolTxnData, poolTxnStewardNames, tconf): - changeNodeHa(looper, txnPoolNodeSet, tdirWithPoolTxns, - poolTxnData, poolTxnStewardNames, tconf, shouldBePrimary=True) + changeNodeHa(looper, + txnPoolNodeSet, + tdirWithPoolTxns, + poolTxnData, + poolTxnStewardNames, + tconf, + shouldBePrimary=True) diff --git a/plenum/test/storage/helper.py b/plenum/test/storage/helper.py index 9d7f784342..63a26612ab 100644 --- a/plenum/test/storage/helper.py +++ b/plenum/test/storage/helper.py @@ -12,6 +12,6 @@ def chk(node): assert result.get(f.IDENTIFIER.nm) == reply1.identifier assert result.get(TXN_TYPE) == reply1.operation.get(TXN_TYPE) - timeout = waits.expectedPoolLedgerCheck(len(nodes)) + timeout = waits.expectedPoolLedgerRepliedMsgPersisted(len(nodes)) for node in nodes: lpr.run(eventually(chk, node, retryWait=1, timeout=timeout)) diff --git a/plenum/test/storage/test_leveldb_hash_store.py b/plenum/test/storage/test_leveldb_hash_store.py new file mode 100644 index 0000000000..a6f0df0549 --- /dev/null +++ b/plenum/test/storage/test_leveldb_hash_store.py @@ -0,0 +1,56 @@ +import pytest + +from ledger.compact_merkle_tree import CompactMerkleTree +from ledger.ledger import Ledger +from ledger.test.test_file_hash_store import nodesLeaves, \ + generateHashes + +from plenum.persistence.leveldb_hash_store import LevelDbHashStore + + +@pytest.yield_fixture(scope="module") +def leveldbHashStore(tdir): + hs = LevelDbHashStore(tdir) + cleanup(hs) + yield hs + hs.close() + + +def cleanup(hs): + hs.reset() + hs.leafCount = 0 + + +def testIndexFrom1(leveldbHashStore): + with pytest.raises(IndexError): + leveldbHashStore.readLeaf(0) + + +def testReadWrite(leveldbHashStore, nodesLeaves): + nodes, leaves = nodesLeaves + for node in nodes: + leveldbHashStore.writeNode(node) + for leaf in leaves: + leveldbHashStore.writeLeaf(leaf) + onebyone = [leveldbHashStore.readLeaf(i + 1) for i in range(10)] + multiple = leveldbHashStore.readLeafs(1, 10) + assert onebyone == leaves + assert onebyone == multiple + + +def testRecoverLedgerFromHashStore(leveldbHashStore, tdir): + cleanup(leveldbHashStore) + tree = CompactMerkleTree(hashStore=leveldbHashStore) + ledger = Ledger(tree=tree, dataDir=tdir) + for d in range(10): + ledger.add(str(d).encode()) + updatedTree = ledger.tree + ledger.stop() + + tree = CompactMerkleTree(hashStore=leveldbHashStore) + restartedLedger = Ledger(tree=tree, dataDir=tdir) + assert restartedLedger.size == ledger.size + assert restartedLedger.root_hash == ledger.root_hash + assert restartedLedger.tree.hashes == updatedTree.hashes + assert restartedLedger.tree.root_hash == updatedTree.root_hash + restartedLedger.stop() diff --git a/plenum/test/storage/test_orientdb_hash_store.py b/plenum/test/storage/test_orientdb_hash_store.py deleted file mode 100644 index 555b7ffffc..0000000000 --- a/plenum/test/storage/test_orientdb_hash_store.py +++ /dev/null @@ -1,75 +0,0 @@ -import pyorient -import pytest - -from ledger.compact_merkle_tree import CompactMerkleTree -from ledger.ledger import Ledger -from ledger.test.test_file_hash_store import nodesLeaves, \ - generateHashes - -from plenum.persistence.orientdb_hash_store import OrientDbHashStore -from plenum.persistence.orientdb_store import OrientDbStore - - -@pytest.fixture(scope="module") -def odbhs(): - hs = OrientDbHashStore( - OrientDbStore(user="root", password="password", dbName="test")) - cleanup(hs) - return hs - - -def cleanup(hs): - for cls in [hs.nodeHashClass, hs.leafHashClass]: - if hs.store.classExists(cls): - hs.store.client.command("Truncate class {}".format(cls)) - hs.leafCount = 0 - - -def testOrientDbSetup(odbhs): - store = odbhs.store - # This seems to be a bug in pyorient. Reported. Bug #186 - # assert store.client.db_exists("test", pyorient.STORAGE_TYPE_MEMORY) - assert store.classExists(odbhs.leafHashClass) - assert store.classExists(odbhs.nodeHashClass) - - -def testIndexFrom1(odbhs: OrientDbHashStore): - with pytest.raises(IndexError): - odbhs.readLeaf(0) - - -def testReadWrite(odbhs: OrientDbHashStore, nodesLeaves): - nodes, leaves = nodesLeaves - for node in nodes: - odbhs.writeNode(node) - for leaf in leaves: - odbhs.writeLeaf(leaf) - onebyone = [odbhs.readLeaf(i + 1) for i in range(10)] - multiple = odbhs.readLeafs(1, 10) - assert onebyone == leaves - assert onebyone == multiple - - -def testUniqueConstraint(odbhs: OrientDbHashStore): - leafHash = generateHashes(1)[0] - odbhs.writeLeaf(leafHash) - with pytest.raises(pyorient.PyOrientORecordDuplicatedException): - odbhs.writeLeaf(leafHash) - - -def testRecoverLedgerFromHashStore(odbhs, tdir): - cleanup(odbhs) - tree = CompactMerkleTree(hashStore=odbhs) - ledger = Ledger(tree=tree, dataDir=tdir) - for d in range(10): - ledger.add(str(d).encode()) - updatedTree = ledger.tree - ledger.stop() - - tree = CompactMerkleTree(hashStore=odbhs) - restartedLedger = Ledger(tree=tree, dataDir=tdir) - assert restartedLedger.size == ledger.size - assert restartedLedger.root_hash == ledger.root_hash - assert restartedLedger.tree.hashes == updatedTree.hashes - assert restartedLedger.tree.root_hash == updatedTree.root_hash - restartedLedger.stop() diff --git a/plenum/test/storage/test_orientdb_version.py b/plenum/test/storage/test_orientdb_version.py deleted file mode 100644 index 653131aee5..0000000000 --- a/plenum/test/storage/test_orientdb_version.py +++ /dev/null @@ -1,13 +0,0 @@ -from plenum.persistence.orientdb_store import OrientDbStore -from plenum.common.config_util import getConfig - -config = getConfig() - - -def testOrientDbRequiredVersion(): - orientConf = config.OrientDB - db = OrientDbStore(user=orientConf["user"], password=orientConf["password"], - host=orientConf["host"], port=orientConf["port"], - dbName="test") - version = db.serverVersion - assert version and version[0] >= 2 and version[1] >= 2 diff --git a/plenum/test/test_delay.py b/plenum/test/test_delay.py index 3557c642bf..7984e0209b 100644 --- a/plenum/test/test_delay.py +++ b/plenum/test/test_delay.py @@ -29,7 +29,7 @@ def testTestNodeDelay(tdir_for_func): # set delay, then send another message # and find that it doesn't arrive - delay = 10 * slowFactor + delay = 5 * waits.expectedNodeToNodeMessageDeliveryTime() nodeB.nodeIbStasher.delay( delayerMsgTuple(delay, TestMsg, nodeA.name) ) @@ -38,8 +38,9 @@ def testTestNodeDelay(tdir_for_func): # but then find that it arrives after the delay # duration has passed + timeout = waits.expectedNodeToNodeMessageDeliveryTime() + delay looper.run(sendMessageAndCheckDelivery(nodes, nodeA, nodeB, - customTimeout=delay)) + customTimeout=timeout)) # reset the delay, and find another message comes quickly nodeB.nodeIbStasher.resetDelays() diff --git a/plenum/test/test_ledger_manager.py b/plenum/test/test_ledger_manager.py index 51eac321c2..63c0dab2af 100644 --- a/plenum/test/test_ledger_manager.py +++ b/plenum/test/test_ledger_manager.py @@ -4,7 +4,8 @@ @spyable(methods=[LedgerManager.startCatchUpProcess, LedgerManager.catchupCompleted, - LedgerManager.processConsistencyProofReq]) + LedgerManager.processConsistencyProofReq, + LedgerManager.canProcessConsistencyProof]) class TestLedgerManager(LedgerManager): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) \ No newline at end of file diff --git a/plenum/test/test_log_rotation.py b/plenum/test/test_log_rotation.py index fb5deefd16..e9badae8b0 100644 --- a/plenum/test/test_log_rotation.py +++ b/plenum/test/test_log_rotation.py @@ -65,4 +65,4 @@ def test_time_and_size_log_rotation(): time.sleep(1) logger.debug("line") - assert len(os.listdir(logDirPath)) == 8 \ No newline at end of file + assert len(os.listdir(logDirPath)) == 8 diff --git a/plenum/test/test_node.py b/plenum/test/test_node.py index d4e318d057..faa1c8981c 100644 --- a/plenum/test/test_node.py +++ b/plenum/test/test_node.py @@ -10,6 +10,7 @@ List from plenum.common.stacks import nodeStackClass, clientStackClass +from plenum.server.domain_req_handler import DomainRequestHandler from stp_core.crypto.util import randomSeed from stp_core.network.port_dispenser import genHa @@ -21,10 +22,10 @@ from stp_core.common.log import getlogger from stp_core.loop.looper import Looper from plenum.common.startable import Status -from plenum.common.types import TaggedTuples, NodeDetail -from plenum.common.constants import CLIENT_STACK_SUFFIX +from plenum.common.types import TaggedTuples, NodeDetail, f +from plenum.common.constants import CLIENT_STACK_SUFFIX, TXN_TYPE, \ + DOMAIN_LEDGER_ID from plenum.common.util import Seconds, getMaxFailures, adict -from plenum.persistence import orientdb_store from plenum.server import replica from plenum.server.instances import Instances from plenum.server.monitor import Monitor @@ -45,6 +46,21 @@ logger = getlogger() +class TestDomainRequestHandler(DomainRequestHandler): + def _updateStateWithSingleTxn(self, txn, isCommitted=False): + typ = txn.get(TXN_TYPE) + if typ == 'buy': + idr = txn.get(f.IDENTIFIER.nm) + rId = txn.get(f.REQ_ID.nm) + key = '{}:{}'.format(idr, rId).encode() + val = self.stateSerializer.serialize({TXN_TYPE: typ}) + self.state.set(key, val) + logger.trace('{} after adding to state, headhash is {}'. + format(self, self.state.headHash)) + else: + super()._updateStateWithSingleTxn(txn, isCommitted=isCommitted) + + NodeRef = TypeVar('NodeRef', Node, str) @@ -137,7 +153,7 @@ def whitelistNode(self, nodeName: str, *codes: int): self.whitelistedClients[nodeName] = set() self.whitelistedClients[nodeName].update(codes) logger.debug("{} whitelisting {} for codes {}" - .format(self, nodeName, codes)) + .format(self, nodeName, codes)) def blacklistNode(self, nodeName: str, reason: str=None, code: int=None): if nodeName in self.whitelistedClients: @@ -154,7 +170,7 @@ def whitelistClient(self, clientName: str, *codes: int): self.whitelistedClients[clientName] = set() self.whitelistedClients[clientName].update(codes) logger.debug("{} whitelisting {} for codes {}" - .format(self, clientName, codes)) + .format(self, clientName, codes)) def blacklistClient(self, clientName: str, reason: str=None, code: int=None): if clientName in self.whitelistedClients: @@ -174,7 +190,7 @@ def validateNodeMsg(self, wrappedMsg): async def eatTestMsg(self, msg, frm): logger.debug("{0} received Test message: {1} from {2}". - format(self.nodestack.name, msg, frm)) + format(self.nodestack.name, msg, frm)) def serviceReplicaOutBox(self, *args, **kwargs) -> int: for r in self.replicas: # type: TestReplica @@ -184,6 +200,11 @@ def serviceReplicaOutBox(self, *args, **kwargs) -> int: def ensureKeysAreSetup(self): pass + def getDomainReqHandler(self): + return TestDomainRequestHandler(self.domainLedger, + self.states[DOMAIN_LEDGER_ID], + self.reqProcessors) + @spyable(methods=[Node.handleOneNodeMsg, Node.handleInvalidClientMsg, @@ -204,7 +225,9 @@ def ensureKeysAreSetup(self): Node.sendInstanceChange, Node.processInstanceChange, Node.checkPerformance, - Node.processStashedOrderedReqs + Node.processStashedOrderedReqs, + Node.lost_master_primary, + Node.propose_view_change ]) class TestNode(TestNodeCore, Node): @@ -220,10 +243,6 @@ def __init__(self, *args, **kwargs): # Txns of all clients, each txn is a tuple like (from, to, amount) self.txns = [] # type: List[Tuple] - def _getOrientDbStore(self, name, dbType): - return orientdb_store.createOrientDbInMemStore( - self.config, name, dbType) - @property def nodeStackClass(self): return getTestableStack(self.NodeStackClass) @@ -233,9 +252,13 @@ def clientStackClass(self): return getTestableStack(self.ClientStackClass) def getLedgerManager(self): - return TestLedgerManager(self, ownedByNode=True) + return TestLedgerManager(self, ownedByNode=True, + postAllLedgersCaughtUp=self.allLedgersCaughtUp) +@spyable(methods=[ + PrimaryElector.discard + ]) class TestPrimaryElector(PrimaryElector): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -248,10 +271,10 @@ def _serviceActions(self): return super()._serviceActions() -@spyable(methods=[replica.Replica.doPrePrepare, +@spyable(methods=[replica.Replica.sendPrePrepare, replica.Replica.canProcessPrePrepare, - replica.Replica.canSendPrepare, - replica.Replica.isValidPrepare, + replica.Replica.canPrepare, + replica.Replica.validatePrepare, replica.Replica.addToPrePrepares, replica.Replica.processPrePrepare, replica.Replica.processPrepare, @@ -260,7 +283,6 @@ def _serviceActions(self): replica.Replica.doOrder, replica.Replica.discard, replica.Replica.stashOutsideWatermarks - # replica.Replica.orderPendingCommit ]) class TestReplica(replica.Replica): def __init__(self, *args, **kwargs): @@ -405,11 +427,12 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.masterReqLatenciesTest = {} - def requestOrdered(self, identifier: str, reqId: int, instId: int, + def requestOrdered(self, reqIdrs: List[Tuple[str, int]], instId: int, byMaster: bool = False): - duration = super().requestOrdered(identifier, reqId, instId, byMaster) - if byMaster and duration is not None: - self.masterReqLatenciesTest[(identifier, reqId)] = duration + durations = super().requestOrdered(reqIdrs, instId, byMaster) + if byMaster and durations: + for (identifier, reqId), duration in durations.items(): + self.masterReqLatenciesTest[identifier, reqId] = duration def reset(self): super().reset() @@ -454,7 +477,7 @@ def checkPoolReady(looper: Looper, Check that pool is in Ready state """ - timeout = customTimeout or waits.expectedPoolGetReadyTimeout(len(nodes)) + timeout = customTimeout or waits.expectedPoolStartUpTimeout(len(nodes)) looper.run( eventually(checkNodesAreReady, nodes, retryWait=.25, @@ -481,8 +504,7 @@ async def checkNodesConnected(stacks: Iterable[Union[TestNode, TestClient]], customTimeout=None): expectedRemoteState = expectedRemoteState if expectedRemoteState else CONNECTED # run for how long we expect all of the connections to take - timeout = customTimeout or \ - (waits.expectedNodeInterconnectionTime(len(stacks)) * len(stacks)) + timeout = customTimeout or waits.expectedPoolInterconnectionTime(len(stacks)) logger.debug("waiting for {} seconds to check connections...".format(timeout)) # verify every node can see every other as a remote funcs = [ @@ -507,7 +529,7 @@ def checkNodeRemotes(node: TestNode, states: Dict[str, RemoteState]=None, except Exception as ex: logger.debug("state checking exception is {} and args are {}" "".format(ex, ex.args)) - raise RuntimeError( + raise Exception( "Error with {} checking remote {} in {}".format(node.name, remote.name, states @@ -522,11 +544,11 @@ def checkIfSameReplicaIPrimary(looper: Looper, # on same primary def checkElectionDone(): - unknowns = sum(1 for r in replicas if r.isPrimary is None) - assert unknowns == 0, "election should be complete, but {} out of {} " \ - "don't know who the primary is for " \ - "protocol no {}".\ - format(unknowns, len(replicas), replicas[0].instId) + unknowns = [r for r in replicas if r.primaryName is None] + assert len(unknowns) == 0, "election should be complete, " \ + "but {} out of {} ({}) don't know who the primary " \ + "is for protocol instance {}".\ + format(len(unknowns), len(replicas), unknowns, replicas[0].instId) def checkPrisAreOne(): # number of expected primaries pris = sum(1 for r in replicas if r.isPrimary) @@ -567,10 +589,7 @@ def checkEveryProtocolInstanceHasOnlyOnePrimary(looper: Looper, coro = eventually(instances, nodes, retryWait=retryWait, timeout=timeout) insts, timeConsumed = timeThis(looper.run, coro) - - # TODO refactor this to just user eventuallyAll newTimeout = timeout - timeConsumed if timeout is not None else None - for instId, replicas in insts.items(): logger.debug("Checking replicas in instance: {}".format(instId)) checkIfSameReplicaIPrimary(looper=looper, @@ -587,7 +606,7 @@ def checkAtMostOnePrim(node): prims = [r for r in node.replicas if r.isPrimary] assert len(prims) <= 1 - timeout = customTimeout or waits.expectedElectionTimeout(len(nodes)) + timeout = customTimeout or waits.expectedPoolElectionTimeout(len(nodes)) for node in nodes: looper.run(eventually(checkAtMostOnePrim, node, @@ -595,24 +614,22 @@ def checkAtMostOnePrim(node): timeout=timeout)) -def checkProtocolInstanceSetup(looper: Looper, nodes: Sequence[TestNode], +def checkProtocolInstanceSetup(looper: Looper, + nodes: Sequence[TestNode], retryWait: float = 1, customTimeout: float = None): - totalTimeout = customTimeout or waits.expectedElectionTimeout(len(nodes)) - instanceTimeout = totalTimeout * 4/5 - nodeTimeout = totalTimeout * 1/5 - + timeout = customTimeout or waits.expectedPoolElectionTimeout(len(nodes)) checkEveryProtocolInstanceHasOnlyOnePrimary(looper=looper, nodes=nodes, retryWait=retryWait, - timeout=instanceTimeout) + timeout=timeout) checkEveryNodeHasAtMostOnePrimary(looper=looper, nodes=nodes, retryWait=retryWait, - customTimeout=nodeTimeout) + customTimeout=timeout) primaryReplicas = {replica.instId: replica for node in nodes @@ -624,31 +641,26 @@ def checkProtocolInstanceSetup(looper: Looper, nodes: Sequence[TestNode], def ensureElectionsDone(looper: Looper, nodes: Sequence[TestNode], retryWait: float = None, # seconds - timeout: float = None) -> Sequence[TestNode]: + customTimeout: float = None) -> Sequence[TestNode]: """ Wait for elections to be complete :param retryWait: - :param timeout: specific timeout + :param customTimeout: specific timeout :return: primary replica for each protocol instance """ if retryWait is None: retryWait = 1 - if timeout is None: - timeout = waits.expectedElectionTimeout(len(nodes)) - - poolReadyTimeout = 1/3 * timeout - setupCheckTimeout = 2/3 * timeout - - checkPoolReady(looper, nodes, customTimeout=poolReadyTimeout) + if customTimeout is None: + customTimeout = waits.expectedPoolElectionTimeout(len(nodes)) return checkProtocolInstanceSetup( looper=looper, nodes=nodes, retryWait=retryWait, - customTimeout=setupCheckTimeout) + customTimeout=customTimeout) def genNodeReg(count=None, names=None) -> Dict[str, NodeDetail]: @@ -711,8 +723,7 @@ def instances(nodes: Sequence[Node]) -> Dict[int, List[replica.Replica]]: instCount = getRequiredInstances(len(nodes)) for n in nodes: assert len(n.replicas) == instCount - return {i: [n.replicas[i] for n in nodes] - for i in range(instCount)} + return {i: [n.replicas[i] for n in nodes] for i in range(instCount)} def getRequiredInstances(nodeCount: int) -> int: @@ -741,3 +752,45 @@ def getNonPrimaryReplicas(nodes: Iterable[TestNode], instId: int = 0) -> \ def getAllReplicas(nodes: Iterable[TestNode], instId: int = 0) -> \ Sequence[TestReplica]: return [node.replicas[instId] for node in nodes] + + +def get_master_primary_node(nodes): + node = next(iter(nodes)) + if node.replicas[0].primaryName is not None: + nm = TestReplica.getNodeName(node.replicas[0].primaryName) + return nodeByName(nodes, nm) + + +def primaryNodeNameForInstance(nodes, instanceId): + primaryNames = {node.replicas[instanceId].primaryName for node in nodes} + assert 1 == len(primaryNames) + primaryReplicaName = next(iter(primaryNames)) + return primaryReplicaName[:-2] + + +def nodeByName(nodes, name): + for node in nodes: + if node.name == name: + return node + raise Exception("Node with the name '{}' has not been found.".format(name)) + + +def check_node_disconnected_from(needle: str, haystack: Iterable[TestNode]): + """ + Check if the node name given by `needle` is disconnected from nodes in + `haystack` + :param needle: Node name which should be disconnected from nodes from + `haystack` + :param haystack: nodes who should be disconnected from `needle` + :return: + """ + assert all([needle not in node.nodestack.connecteds for node in haystack]) + + +def ensure_node_disconnected(looper, disconnected_name, other_nodes, + timeout=None): + timeout = timeout or (len(other_nodes) - 1) + looper.run(eventually(check_node_disconnected_from, disconnected_name, + [n for n in other_nodes + if n.name != disconnected_name], + retryWait=1, timeout=timeout)) diff --git a/plenum/test/test_node_basic.py b/plenum/test/test_node_basic.py index 18fcc773b8..5e6cd42653 100644 --- a/plenum/test/test_node_basic.py +++ b/plenum/test/test_node_basic.py @@ -10,12 +10,6 @@ nodeCount = 4 -# @pytest.fixture(scope="module") -# def setup(request, tdir, nodeReg): -# for name in nodeReg: -# pass - - @pytest.fixture(scope="module") def pool(looper, nodeSet): # for n in nodeSet: # type: TestNode diff --git a/plenum/test/test_node_connection.py b/plenum/test/test_node_connection.py index 535b7cc5d0..12853a9980 100644 --- a/plenum/test/test_node_connection.py +++ b/plenum/test/test_node_connection.py @@ -31,11 +31,12 @@ def nodeReg(): 'Delta': NodeDetail(genHa(1), "DeltaC", genHa(1)) } + def initLocalKeys(tdir, nodeReg): for nName in nodeReg.keys(): sigseed = randomString(32).encode() initNodeKeysForBothStacks(nName, tdir, sigseed, override=True) - + logger.debug('Created keys for {}'.format(nName)) # Its a function fixture, deliberately @@ -47,12 +48,12 @@ def tdirAndLooper(nodeReg): yield td, looper - - -@pytest.mark.skip() +@pytest.mark.skip(reason='INDY-75') def testNodesConnectsWhenOneNodeIsLate(allPluginsPath, tdirAndLooper, - nodeReg, conf): + nodeReg): tdir, looper = tdirAndLooper + initLocalKeys(tdir, nodeReg) + nodes = [] names = list(nodeReg.keys()) logger.debug("Node names: {}".format(names)) @@ -60,12 +61,18 @@ def testNodesConnectsWhenOneNodeIsLate(allPluginsPath, tdirAndLooper, def create(name): node = TestNode(name, nodeReg, basedirpath=tdir, pluginPaths=allPluginsPath) - looper.add(node) nodes.append(node) + return node for name in names[:3]: create(name) + logger.debug("Creating keys") + + for node in nodes: + tellKeysToOthers(node, nodes) + looper.add(node) + looper.run(checkNodesConnected(nodes)) # wait for the election to complete with the first three nodes @@ -73,7 +80,12 @@ def create(name): # create the fourth and see that it learns who the primaries are # from the other nodes - create(names[3]) + lateNode = create(names[3]) + for node in nodes[:-1]: + tellKeysToOthers(lateNode, node) + tellKeysToOthers(node, lateNode) + + looper.add(lateNode) # TODO set timeout from 'waits' after the test enabled checkProtocolInstanceSetup(looper, nodes, customTimeout=10) stopNodes(nodes, looper) @@ -225,6 +237,6 @@ def chk(): assert C.name not in B.nodestack.nameRemotes assert C.name not in A.nodestack.nameRemotes - timeout = waits.expectedNodeInterconnectionTime(len(nodeReg)) + timeout = waits.expectedPoolInterconnectionTime(len(nodeReg)) looper.run(eventually(chk, retryWait=2, timeout=timeout)) stopNodes([A, B], looper) diff --git a/plenum/test/test_node_request.py b/plenum/test/test_node_request.py index c487ff99ff..26408d1452 100644 --- a/plenum/test/test_node_request.py +++ b/plenum/test/test_node_request.py @@ -166,8 +166,8 @@ async def checkIfPropagateRecvdFromNode(recvrNode: TestNode, # noinspection PyIncorrectDocstring -@pytest.mark.skip(reason="ZStack does not have any mechanism to have stats " - "either remove this once raet is removed " +@pytest.mark.skip(reason="INDY-76. ZStack does not have any mechanism to have " + "stats either remove this once raet is removed " "or implement a `stats` feature in ZStack") def testMultipleRequests(tdir_for_func): """ diff --git a/plenum/test/test_round_trip_with_one_faulty_node.py b/plenum/test/test_round_trip_with_one_faulty_node.py index 9880408603..1a2941a6c8 100644 --- a/plenum/test/test_round_trip_with_one_faulty_node.py +++ b/plenum/test/test_round_trip_with_one_faulty_node.py @@ -20,12 +20,13 @@ def alphaDoesntPropagate(startedNodes): propagate requests. """ nodes = startedNodes - async def evilProcessPropagate(self, msg, frm): + + def evilProcessPropagate(self, msg, frm): logger.info("TEST: Evil {} is not processing PROPAGATE".format(self)) def evilPropagateRequest(self, request, clientName): logger.info("TEST: Evil {} is not PROPAGATing client request". - format(self)) + format(self)) epp = types.MethodType(evilProcessPropagate, nodes.Alpha) nodes.Alpha.nodeMsgRouter.routes[Propagate] = epp diff --git a/plenum/test/test_stack.py b/plenum/test/test_stack.py index 483f09c528..8e34e8d4f9 100644 --- a/plenum/test/test_stack.py +++ b/plenum/test/test_stack.py @@ -1,3 +1,4 @@ +from functools import partial from typing import Any, Optional, NamedTuple from stp_core.network.network_interface import NetworkInterface @@ -46,30 +47,32 @@ def resetDelays(self): class StackedTester: def checkIfConnectedTo(self, count=None): - connected = 0 + connected = set() # TODO refactor to not use values for address in self.nodeReg.values(): for remote in self.nodestack.remotes.values(): if HA(*remote.ha) == address: if BaseStackClass.isRemoteConnected(remote): - connected += 1 + connected.add(remote.name) break + allRemotes = set(self.nodeReg) totalNodes = len(self.nodeReg) if count is None else count - if count is None and connected == 0: - raise NotConnectedToAny() - elif connected < totalNodes: - raise NotFullyConnected() + if count is None and len(connected) == 0: + raise NotConnectedToAny(allRemotes) + elif len(connected) < totalNodes: + raise NotFullyConnected(allRemotes - connected) else: - assert connected == totalNodes + assert len(connected) == totalNodes - async def ensureConnectedToNodes(self, customTimeout=None): - f = util.getQuorum(len(self.nodeReg)) - timeout = customTimeout or waits.expectedClientConnectionTimeout(f) + async def ensureConnectedToNodes(self, customTimeout=None, count=None): + timeout = customTimeout or \ + waits.expectedClientToPoolConnectionTimeout(len(self.nodeReg)) logger.debug( "waiting for {} seconds to check client connections to " "nodes...".format(timeout)) - await eventuallyAll(self.checkIfConnectedTo, + chk_connected = partial(self.checkIfConnectedTo, count) + await eventuallyAll(chk_connected, retryWait=.5, totalTimeout=timeout) diff --git a/plenum/test/test_verif_merkle_proof.py b/plenum/test/test_verif_merkle_proof.py index 3b89305757..cd200b1d83 100644 --- a/plenum/test/test_verif_merkle_proof.py +++ b/plenum/test/test_verif_merkle_proof.py @@ -1,3 +1,5 @@ +import pytest + from plenum.client.client import Client from plenum.test.helper import waitForSufficientRepliesForRequests, \ sendRandomRequest diff --git a/plenum/test/view_change/conftest.py b/plenum/test/view_change/conftest.py index 14fb4c243c..b8aebedf90 100644 --- a/plenum/test/view_change/conftest.py +++ b/plenum/test/view_change/conftest.py @@ -1,5 +1,7 @@ import pytest +from plenum.test.test_node import ensureElectionsDone + @pytest.fixture() def viewNo(nodeSet): diff --git a/plenum/test/view_change/helper.py b/plenum/test/view_change/helper.py new file mode 100644 index 0000000000..d62a9a5839 --- /dev/null +++ b/plenum/test/view_change/helper.py @@ -0,0 +1,68 @@ +import types + +from plenum.test.helper import checkViewNoForNodes, sendRandomRequests, \ + sendReqsToNodesAndVerifySuffReplies +from stp_core.common.log import getlogger +from stp_core.loop.eventually import eventually +from plenum.test import waits + +logger = getlogger() + + +def provoke_and_check_view_change(nodes, newViewNo, wallet, client): + + if {n.viewNo for n in nodes} == {newViewNo}: + return True + + # If throughput of every node has gone down then check that + # view has changed + tr = [n.monitor.isMasterThroughputTooLow() for n in nodes] + if all(tr): + logger.info('Throughput ratio gone down, its {}'.format(tr)) + checkViewNoForNodes(nodes, newViewNo) + else: + logger.info('Master instance has not degraded yet, ' + 'sending more requests') + sendRandomRequests(wallet, client, 10) + assert False + + +def provoke_and_wait_for_view_change(looper, + nodeSet, + expectedViewNo, + wallet, + client, + customTimeout=None): + timeout = customTimeout or waits.expectedPoolViewChangeStartedTimeout(len(nodeSet)) + # timeout *= 30 + return looper.run(eventually(provoke_and_check_view_change, + nodeSet, + expectedViewNo, + wallet, + client, + timeout=timeout)) + + +def ensure_view_change(looper, nodes, client, wallet): + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 2) + old_view_no = checkViewNoForNodes(nodes) + + old_meths = {} + view_changes = {} + for node in nodes: + old_meths[node.name] = node.monitor.isMasterDegraded + view_changes[node.name] = node.monitor.totalViewChanges + + def slow_master(self): + # Only allow one view change + return self.totalViewChanges == view_changes[self.name] + + node.monitor.isMasterDegraded = types.MethodType(slow_master, node.monitor) + + timeout = waits.expectedPoolViewChangeStartedTimeout(len(nodes)) + \ + client.config.PerfCheckFreq + looper.run(eventually(checkViewNoForNodes, nodes, old_view_no+1, + retryWait=1, timeout=timeout)) + for node in nodes: + node.monitor.isMasterDegraded = old_meths[node.name] + return old_view_no + 1 diff --git a/plenum/test/view_change/test_discard_inst_chng_msg_from_past_view.py b/plenum/test/view_change/test_discard_inst_chng_msg_from_past_view.py index 8221a1fdb8..33f25d7fed 100644 --- a/plenum/test/view_change/test_discard_inst_chng_msg_from_past_view.py +++ b/plenum/test/view_change/test_discard_inst_chng_msg_from_past_view.py @@ -1,5 +1,4 @@ from stp_core.loop.eventually import eventually -from plenum.common.types import InstanceChange from plenum.server.node import Node from plenum.test import waits from plenum.test.helper import checkDiscardMsg, waitForViewChange @@ -15,15 +14,17 @@ def testDiscardInstChngMsgFrmPastView(nodeSet, looper, ensureView): curViewNo = ensureView # Send an instance change for an old instance message to all nodes - icMsg = InstanceChange(curViewNo - 1) + icMsg = nodeSet.Alpha._create_instance_change_msg(curViewNo - 1, 0) nodeSet.Alpha.send(icMsg) # ensure every node but Alpha discards the invalid instance change request - timeout = waits.expectedViewChangeTime(len(nodeSet)) - looper.run(eventually(checkDiscardMsg, nodeSet, icMsg, - 'less than its view no', nodeSet.Alpha, timeout=timeout)) + timeout = waits.expectedPoolViewChangeStartedTimeout(len(nodeSet)) # Check that that message is discarded. + looper.run(eventually(checkDiscardMsg, nodeSet, icMsg, + 'which is not more than its view no', + nodeSet.Alpha, timeout=timeout)) + waitForViewChange(looper, nodeSet) @@ -45,7 +46,7 @@ def testDoNotSendInstChngMsgIfMasterDoesntSeePerformanceProblem( sentInstChanges[n.name] = n.spylog.count(instChngMethodName) # Send an instance change message to all nodes - icMsg = InstanceChange(curViewNo) + icMsg = nodeSet.Alpha._create_instance_change_msg(curViewNo, 0) nodeSet.Alpha.send(icMsg) # Check that that message is discarded. diff --git a/plenum/test/view_change/test_elections_after_view_change.py b/plenum/test/view_change/test_elections_after_view_change.py index 95f997e9d7..95a93a39ec 100644 --- a/plenum/test/view_change/test_elections_after_view_change.py +++ b/plenum/test/view_change/test_elections_after_view_change.py @@ -12,7 +12,7 @@ # noinspection PyIncorrectDocstring -def testElectionsAfterViewChange(delayedPerf, looper: Looper, +def testElectionsAfterViewChange(delayed_perf_chk, looper: Looper, nodeSet: TestNodeSet, up, wallet1, client1): """ Test that a primary election does happen after a view change @@ -29,12 +29,12 @@ def testElectionsAfterViewChange(delayedPerf, looper: Looper, sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4) # Ensure view change happened for both node and its primary elector - timeout = waits.expectedViewChangeTime(len(nodeSet)) + timeout = waits.expectedPoolViewChangeStartedTimeout(len(nodeSet)) for node in nodeSet: looper.run(eventually(partial(checkViewChangeInitiatedForNode, node, 1), retryWait=1, timeout=timeout)) # Ensure elections are done again and pool is setup again with appropriate # protocol instances and each protocol instance is setup properly too - timeout = waits.expectedElectionTimeout(len(nodeSet)) + delay + timeout = waits.expectedPoolElectionTimeout(len(nodeSet)) + delay checkProtocolInstanceSetup(looper, nodeSet, retryWait=1, customTimeout=timeout) diff --git a/plenum/test/view_change/test_instance_change_msg_checking.py b/plenum/test/view_change/test_instance_change_msg_checking.py index 384258daf0..07c65a0f79 100644 --- a/plenum/test/view_change/test_instance_change_msg_checking.py +++ b/plenum/test/view_change/test_instance_change_msg_checking.py @@ -14,8 +14,10 @@ def testInstanceChangeMsgTypeChecking(nodeSet, looper, up): ridBeta = nodeA.nodestack.getRemote(nodeB.name).uid badViewNo = "BAD" - nodeA.send(InstanceChange(badViewNo), ridBeta) + icMsg = nodeSet.Alpha._create_instance_change_msg(badViewNo, 0) + nodeA.send(icMsg, ridBeta) looper.runFor(0.2) + params = nodeB.spylog.getLastParams(TestNode.discard) def chk(): diff --git a/plenum/test/view_change/test_master_primary_different_from_previous.py b/plenum/test/view_change/test_master_primary_different_from_previous.py new file mode 100644 index 0000000000..65b2f9c4f6 --- /dev/null +++ b/plenum/test/view_change/test_master_primary_different_from_previous.py @@ -0,0 +1,93 @@ +import types + +import pytest + +from plenum.test.helper import checkViewNoForNodes, \ + sendReqsToNodesAndVerifySuffReplies, countDiscarded +from plenum.test.malicious_behaviors_node import slow_primary +from plenum.test.test_node import getPrimaryReplica, ensureElectionsDone +from plenum.test.pool_transactions.conftest import clientAndWallet1, client1, \ + wallet1, client1Connected, looper +from plenum.test.view_change.helper import provoke_and_wait_for_view_change + +from stp_core.common.log import getlogger +logger = getlogger() + + +@pytest.mark.skip(reason='SOV-1020') +def test_master_primary_different_from_previous(txnPoolNodeSet, + looper, client1, + wallet1, client1Connected): + """ + After a view change, primary must be different from previous primary for + master instance, it does not matter for other instance. The primary is + benign and does not vote for itself. + """ + old_view_no = checkViewNoForNodes(txnPoolNodeSet) + pr = slow_primary(txnPoolNodeSet, 0, delay=10) + old_pr_node_name = pr.node.name + + # View change happens + provoke_and_wait_for_view_change(looper, + txnPoolNodeSet, + old_view_no + 1, + wallet1, + client1) + logger.debug("VIEW HAS BEEN CHANGED!") + # Elections done + ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) + # New primary is not same as old primary + assert getPrimaryReplica(txnPoolNodeSet, 0).node.name != old_pr_node_name + + pr.outBoxTestStasher.resetDelays() + + # The new primary can still process requests + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5) + + + +@pytest.mark.skip(reason='SOV-1020') +def test_master_primary_different_from_previous_view_for_itself(txnPoolNodeSet, + looper, client1, + wallet1, client1Connected): + """ + After a view change, primary must be different from previous primary for + master instance, it does not matter for other instance. Break it into + 2 tests, one where the primary is malign and votes for itself but is still + not made primary in the next view. + """ + old_view_no = checkViewNoForNodes(txnPoolNodeSet) + pr = slow_primary(txnPoolNodeSet, 0, delay=10) + old_pr_node = pr.node + + def _get_undecided_inst_id(self): + undecideds = [i for i, r in enumerate(self.replicas) + if r.isPrimary is None] + # Try to nominate for the master instance + return undecideds, 0 + + # Patching old primary's elector's method to nominate itself + # again for the the new view + old_pr_node.elector._get_undecided_inst_id = types.MethodType( + _get_undecided_inst_id, old_pr_node.elector) + + # View change happens + provoke_and_wait_for_view_change(looper, + txnPoolNodeSet, + old_view_no + 1, + wallet1, + client1) + + # Elections done + ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) + # New primary is not same as old primary + assert getPrimaryReplica(txnPoolNodeSet, 0).node.name != old_pr_node.name + + # All other nodes discarded the nomination by the old primary + for node in txnPoolNodeSet: + if node != old_pr_node: + assert countDiscarded(node.elector, + 'of master in previous view too') == 1 + + # The new primary can still process requests + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5) diff --git a/plenum/test/view_change/test_queueing_req_from_future_view.py b/plenum/test/view_change/test_queueing_req_from_future_view.py index 896ba94ca5..02daf3f862 100644 --- a/plenum/test/view_change/test_queueing_req_from_future_view.py +++ b/plenum/test/view_change/test_queueing_req_from_future_view.py @@ -2,6 +2,7 @@ import pytest +from plenum.test.view_change.helper import provoke_and_wait_for_view_change from stp_core.loop.eventually import eventually from stp_core.common.log import getlogger from plenum.common.util import getMaxFailures @@ -17,70 +18,112 @@ logger = getlogger() +# TODO: This test needs to be implemented # noinspection PyIncorrectDocstring -def testQueueingReqFromFutureView(delayedPerf, looper, nodeSet, up, +@pytest.mark.skip(reason='INDY-84. Complete implementation') +def testQueueingReqFromFutureView(delayed_perf_chk, looper, nodeSet, up, wallet1, client1): """ Test if every node queues 3 Phase requests(PRE-PREPARE, PREPARE and COMMIT) - that come from a view which is greater than the current view + that come from a view which is greater than the current view. Slow down + the primary node of master protocol instance, delay reception and + processing of view change message by a non primary for master instance so + that it starts receiving 3 phase commit messages for next view """ - f = getMaxFailures(nodeCount) + nprs = getNonPrimaryReplicas(nodeSet, 0) + lagging_node = nprs[0].node + old_view_no = lagging_node.viewNo # Delay processing of instance change on a node - delayIcA = 60 - nodeA = nodeSet.Alpha - nodeA.nodeIbStasher.delay(icDelay(delayIcA)) + delay_ic = 60 + lagging_node.nodeIbStasher.delay(icDelay(delay_ic)) + logger.debug('{} will delay its view change'.format(lagging_node)) - nonPrimReps = getNonPrimaryReplicas(nodeSet, 0) # Delay processing of PRE-PREPARE from all non primary replicas of master # so master's throughput falls and view changes - delay = 5 - ppDelayer = ppDelay(delay, 0) - for r in nonPrimReps: - r.node.nodeIbStasher.delay(ppDelayer) + delay_pp = 5 + pp_delayer = ppDelay(delay_pp, 0) + for r in nprs: + r.node.nodeIbStasher.delay(pp_delayer) - timeout = waits.expectedTransactionExecutionTime(len(nodeSet)) + delay - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4, + timeout = waits.expectedTransactionExecutionTime(len(nodeSet)) + delay_pp + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5, customTimeoutPerReq=timeout) - # Every node except Node A should have a view change - timeout = waits.expectedViewChangeTime(len(nodeSet)) - for node in nodeSet: - if node.name == nodeA.name: - # Node A's view should not have changed yet - with pytest.raises(AssertionError): - looper.run(eventually(partial( - checkViewChangeInitiatedForNode, node, 1), - retryWait=1, - timeout=timeout)) + def chk_fut_view(view_no, is_empty): + length = len(lagging_node.msgsForFutureViews.get(view_no, ())) + if is_empty: + assert length == 0 else: - looper.run(eventually( - partial(checkViewChangeInitiatedForNode, node, 1), - retryWait=1, - timeout=timeout)) + assert length > 0 + return length + + # No messages queued for future view + chk_fut_view(old_view_no+1, is_empty=True) + logger.debug('{} does not have any messages for future views' + .format(lagging_node)) - # NodeA should not have any pending 3 phase request for a later view - for r in nodeA.replicas: # type: TestReplica - assert len(r.threePhaseMsgsForLaterView) == 0 + # Every node except Node A should do a view change + provoke_and_wait_for_view_change(looper, + [n for n in nodeSet if n != lagging_node], + old_view_no + 1, + wallet1, client1) - # Reset delays on incoming messages from all nodes for node in nodeSet: - node.nodeIbStasher.nodelay(ppDelayer) - - # Send one more request - sendRandomRequest(wallet1, client1) - - def checkPending3PhaseReqs(): - # Get all replicas that have their primary status decided - reps = [rep for rep in nodeA.replicas if rep.isPrimary is not None] - # At least one replica should have its primary status decided - assert len(reps) > 0 - for r in reps: # type: TestReplica - logger.debug("primary status for replica {} is {}" - .format(r, r.primaryNames)) - assert len(r.threePhaseMsgsForLaterView) > 0 - - # NodeA should now have pending 3 phase request for a later view - timeout = waits.expectedViewChangeTime(len(nodeSet)) + delayIcA - looper.run(eventually(checkPending3PhaseReqs, retryWait=1, timeout=timeout)) + node.nodeIbStasher.nodelay(pp_delayer) + + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 3, + customTimeoutPerReq=timeout) + + # Messages queued for future view + l = chk_fut_view(old_view_no + 1, is_empty=False) + logger.debug('{} has {} messages for future views' + .format(lagging_node, l)) + + # Eventually no messages queued for future view + looper.run(eventually(chk_fut_view, old_view_no + 1, True, + retryWait=1, timeout=delay_ic+10)) + logger.debug('{} exhausted pending messages for future views' + .format(lagging_node)) + + # timeout = waits.expectedPoolViewChangeStartedTimeout(len(nodeSet)-1) + # # for node in nodeSet: + # # if node.name == nodeA.name: + # # # Node A's view should not have changed yet + # # with pytest.raises(AssertionError): + # # looper.run(eventually(partial( + # # checkViewChangeInitiatedForNode, node, 1), + # # retryWait=1, + # # timeout=timeout)) + # # else: + # # looper.run(eventually( + # # partial(checkViewChangeInitiatedForNode, node, 1), + # # retryWait=1, + # # timeout=timeout)) + # + # + # # NodeA should not have any pending 3 phase request for a later view + # for r in nodeA.replicas: # type: TestReplica + # assert len(r.threePhaseMsgsForLaterView) == 0 + # + # # Reset delays on incoming messages from all nodes + # for node in nodeSet: + # node.nodeIbStasher.nodelay(pp_delayer) + # + # # Send one more request + # sendRandomRequest(wallet1, client1) + # + # def checkPending3PhaseReqs(): + # # Get all replicas that have their primary status decided + # reps = [rep for rep in nodeA.replicas if rep.isPrimary is not None] + # # At least one replica should have its primary status decided + # assert len(reps) > 0 + # for r in reps: # type: TestReplica + # logger.debug("primary status for replica {} is {}" + # .format(r, r.primaryNames)) + # assert len(r.threePhaseMsgsForLaterView) > 0 + # + # # NodeA should now have pending 3 phase request for a later view + # timeout = waits.expectedPoolViewChangeStartedTimeout(len(nodeSet)) + delayIcA + # looper.run(eventually(checkPending3PhaseReqs, retryWait=1, timeout=timeout)) diff --git a/plenum/test/view_change/test_view_change.py b/plenum/test/view_change/test_view_change.py index cde42349f1..fb9dbe0b8d 100644 --- a/plenum/test/view_change/test_view_change.py +++ b/plenum/test/view_change/test_view_change.py @@ -8,7 +8,9 @@ from plenum.test.delayers import delayNonPrimaries from plenum.test.helper import waitForViewChange, \ sendReqsToNodesAndVerifySuffReplies -from plenum.test.test_node import getPrimaryReplica +from plenum.test.test_node import getPrimaryReplica, get_master_primary_node, \ + ensureElectionsDone +from plenum.test.test_node import getPrimaryReplica, ensureElectionsDone nodeCount = 7 @@ -16,6 +18,7 @@ # noinspection PyIncorrectDocstring @pytest.fixture() def viewChangeDone(nodeSet, looper, up, wallet1, client1, viewNo): + m_primary_node = get_master_primary_node(list(nodeSet.nodes.values())) # Delay processing of PRE-PREPARE from all non primary replicas of master # so master's performance falls and view changes delayNonPrimaries(nodeSet, 0, 10) @@ -23,6 +26,9 @@ def viewChangeDone(nodeSet, looper, up, wallet1, client1, viewNo): sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4) waitForViewChange(looper, nodeSet, expectedViewNo=viewNo+1) + ensureElectionsDone(looper=looper, nodes=nodeSet) + new_m_primary_node = get_master_primary_node(list(nodeSet.nodes.values())) + assert m_primary_node.name != new_m_primary_node.name # noinspection PyIncorrectDocstring @@ -42,6 +48,8 @@ def testViewChangeCase1(nodeSet, looper, up, wallet1, client1, viewNo): when a quorum of nodes agree that master performance degraded """ + m_primary_node = get_master_primary_node(list(nodeSet.nodes.values())) + # Delay processing of PRE-PREPARE from all non primary replicas of master # so master's performance falls and view changes delayNonPrimaries(nodeSet, 0, 10) @@ -73,3 +81,7 @@ def testViewChangeCase1(nodeSet, looper, up, wallet1, client1, viewNo): else: assert n.spylog.count(instChngMethodName) == \ sentInstChanges.get(n.name, 0) + + ensureElectionsDone(looper=looper, nodes=nodeSet) + new_m_primary_node = get_master_primary_node(list(nodeSet.nodes.values())) + assert m_primary_node.name != new_m_primary_node.name diff --git a/plenum/test/view_change/test_view_change_happens_post_timeout.py b/plenum/test/view_change/test_view_change_happens_post_timeout.py new file mode 100644 index 0000000000..076f77028e --- /dev/null +++ b/plenum/test/view_change/test_view_change_happens_post_timeout.py @@ -0,0 +1,4 @@ +def test_view_change_happens_post_timeout(): + # TODO: + # View change should not happen unless the timeout expires + pass diff --git a/plenum/test/view_change/test_view_change_not_gamable.py b/plenum/test/view_change/test_view_change_not_gamable.py new file mode 100644 index 0000000000..f7493c724f --- /dev/null +++ b/plenum/test/view_change/test_view_change_not_gamable.py @@ -0,0 +1,5 @@ +def test_view_change_not_gamable(): + # # TODO: A malicious node should not be able to disrupt a + # view change by sending a message too early, this decreasing the + # available time to get enough view change messages + pass diff --git a/plenum/test/view_change/test_view_changes_if_backup_primary_disconnected.py b/plenum/test/view_change/test_view_changes_if_backup_primary_disconnected.py deleted file mode 100644 index 34d9781f0d..0000000000 --- a/plenum/test/view_change/test_view_changes_if_backup_primary_disconnected.py +++ /dev/null @@ -1,32 +0,0 @@ -from stp_core.loop.eventually import eventually -from plenum.test.conftest import txnPoolNodeSet, txnPoolNodesLooper -from plenum.test.helper import stopNodes, viewNoForNodes, \ - nodeByName, primaryNodeNameForInstance - - -def testViewChangesIfBackupPrimaryDisconnected(txnPoolNodeSet, - txnPoolNodesLooper): - - # Setup - nodes = set(txnPoolNodeSet) - looper = txnPoolNodesLooper - - viewNoBefore = viewNoForNodes(nodes) - primaryNodeForBackupInstance1Before = nodeByName( - nodes, primaryNodeNameForInstance(nodes, 1)) - - # Exercise - stopNodes([primaryNodeForBackupInstance1Before], looper) - - # Verify - remainingNodes = nodes - {primaryNodeForBackupInstance1Before} - - def assertNewPrimariesElected(): - viewNoAfter = viewNoForNodes(remainingNodes) - primaryNodeForBackupInstance1After = nodeByName( - nodes, primaryNodeNameForInstance(remainingNodes, 1)) - assert viewNoBefore + 1 == viewNoAfter - assert primaryNodeForBackupInstance1Before != \ - primaryNodeForBackupInstance1After - - looper.run(eventually(assertNewPrimariesElected, retryWait=1, timeout=30)) \ No newline at end of file diff --git a/plenum/test/view_change/test_view_changes_if_master_primary_disconnected.py b/plenum/test/view_change/test_view_changes_if_master_primary_disconnected.py index 190b0e7b66..d4c8104907 100644 --- a/plenum/test/view_change/test_view_changes_if_master_primary_disconnected.py +++ b/plenum/test/view_change/test_view_changes_if_master_primary_disconnected.py @@ -1,32 +1,45 @@ +import pytest + +from plenum.test.test_node import ensureElectionsDone, \ + primaryNodeNameForInstance, nodeByName, get_master_primary_node, \ + ensure_node_disconnected +from plenum.test import waits from stp_core.loop.eventually import eventually -from plenum.test.conftest import txnPoolNodeSet, txnPoolNodesLooper -from plenum.test.helper import stopNodes, viewNoForNodes, nodeByName, \ - primaryNodeNameForInstance +from plenum.test.pool_transactions.conftest import clientAndWallet1, \ + client1, wallet1, client1Connected, looper +from plenum.test.helper import stopNodes, checkViewNoForNodes, \ + sendReqsToNodesAndVerifySuffReplies +@pytest.mark.skip(reason='SOV-1020') def testViewChangesIfMasterPrimaryDisconnected(txnPoolNodeSet, - txnPoolNodesLooper): + looper, wallet1, client1, + client1Connected, tconf): + """ + View change occurs when master's primary is disconnected + """ # Setup - nodes = set(txnPoolNodeSet) - looper = txnPoolNodesLooper + nodes = txnPoolNodeSet - viewNoBefore = viewNoForNodes(nodes) - primaryNodeForMasterInstanceBefore = nodeByName( - nodes, primaryNodeNameForInstance(nodes, 0)) + viewNoBefore = checkViewNoForNodes(nodes) + old_pr_node = get_master_primary_node(nodes) - # Exercise - stopNodes([primaryNodeForMasterInstanceBefore], looper) + # Stop primary + stopNodes([old_pr_node], looper) + looper.removeProdable(old_pr_node) + remainingNodes = set(nodes) - {old_pr_node} + # Sometimes it takes time for nodes to detect disconnection + ensure_node_disconnected(looper, old_pr_node, remainingNodes, timeout=20) - # Verify - remainingNodes = nodes - {primaryNodeForMasterInstanceBefore} + looper.runFor(tconf.ToleratePrimaryDisconnection + 2) def assertNewPrimariesElected(): - viewNoAfter = viewNoForNodes(remainingNodes) - primaryNodeForMasterInstanceAfter = nodeByName( - nodes, primaryNodeNameForInstance(remainingNodes, 0)) - assert viewNoBefore + 1 == viewNoAfter - assert primaryNodeForMasterInstanceBefore != \ - primaryNodeForMasterInstanceAfter - - looper.run(eventually(assertNewPrimariesElected, retryWait=1, timeout=30)) + checkViewNoForNodes(remainingNodes, viewNoBefore + 1) + new_pr_node = get_master_primary_node(remainingNodes) + assert old_pr_node != new_pr_node + + # Give some time to detect disconnection and then verify that view has + # changed and new primary has been elected + looper.run(eventually(assertNewPrimariesElected, retryWait=1, timeout=90)) + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5) diff --git a/plenum/test/view_change/test_view_not_changed_if_backup_primary_disconnected.py b/plenum/test/view_change/test_view_not_changed_if_backup_primary_disconnected.py new file mode 100644 index 0000000000..c9432813b0 --- /dev/null +++ b/plenum/test/view_change/test_view_not_changed_if_backup_primary_disconnected.py @@ -0,0 +1,37 @@ +import pytest + +from stp_core.loop.eventually import eventually +from plenum.test.conftest import txnPoolNodeSet, txnPoolNodesLooper +from plenum.test.helper import stopNodes, checkViewNoForNodes +from plenum.test.test_node import primaryNodeNameForInstance, nodeByName + + +def testViewNotChangedIfBackupPrimaryDisconnected(txnPoolNodeSet, + txnPoolNodesLooper, tconf): + """ + View change does not occurs when master's primary is disconnected + """ + + # Setup + nodes = txnPoolNodeSet + looper = txnPoolNodesLooper + + viewNoBefore = checkViewNoForNodes(nodes) + primaryNodeForBackupInstance1Before = nodeByName( + nodes, primaryNodeNameForInstance(nodes, 1)) + + # Exercise + stopNodes([primaryNodeForBackupInstance1Before], looper) + + # Verify + remainingNodes = set(nodes) - {primaryNodeForBackupInstance1Before} + + looper.runFor(tconf.ToleratePrimaryDisconnection+2) + + def assertNewPrimariesElected(): + with pytest.raises(AssertionError): + assert checkViewNoForNodes(remainingNodes) == viewNoBefore + 1 + viewNoAfter = checkViewNoForNodes(remainingNodes, viewNoBefore) + assert viewNoBefore == viewNoAfter + + looper.run(eventually(assertNewPrimariesElected, retryWait=1, timeout=30)) diff --git a/plenum/test/view_change/test_view_not_changed_when_primary_disconnected_from_less_than_quorum.py b/plenum/test/view_change/test_view_not_changed_when_primary_disconnected_from_less_than_quorum.py new file mode 100644 index 0000000000..5418099ba0 --- /dev/null +++ b/plenum/test/view_change/test_view_not_changed_when_primary_disconnected_from_less_than_quorum.py @@ -0,0 +1,79 @@ +import types + +import pytest + +from plenum.test.node_catchup.helper import waitNodeDataEquality +from plenum.test.test_node import getNonPrimaryReplicas, get_master_primary_node +from stp_core.loop.eventually import eventually +from plenum.test.pool_transactions.conftest import clientAndWallet1, \ + client1, wallet1, client1Connected, looper +from plenum.test.helper import checkViewNoForNodes, \ + sendReqsToNodesAndVerifySuffReplies + + +def test_view_not_changed_when_primary_disconnected_from_less_than_quorum( + txnPoolNodeSet, looper, wallet1, client1, client1Connected): + """ + Less than quorum nodes lose connection with primary, this should not + trigger view change as the protocol can move ahead + """ + pr_node = get_master_primary_node(txnPoolNodeSet) + npr = getNonPrimaryReplicas(txnPoolNodeSet, 0) + partitioned_rep = npr[0] + partitioned_node = partitioned_rep.node + + lost_pr_calls = partitioned_node.spylog.count( + partitioned_node.lost_master_primary.__name__) + + recv_inst_chg_calls = {node.name: node.spylog.count( + node.processInstanceChange.__name__) for node in txnPoolNodeSet + if node != partitioned_node and node != pr_node} + + view_no = checkViewNoForNodes(txnPoolNodeSet) + orig_retry_meth = partitioned_node.nodestack.retryDisconnected + + def wont_retry(self, exclude=None): + # Do not attempt to retry connection + pass + + # simulating a partition here + # Disconnect a node from only the primary of the master and dont retry to + # connect to it + partitioned_node.nodestack.retryDisconnected = types.MethodType( + wont_retry, partitioned_node.nodestack) + r = partitioned_node.nodestack.getRemote(pr_node.nodestack.name) + r.disconnect() + + def chk1(): + # Check that the partitioned node detects losing connection with + # primary and sends an instance change which is received by other + # nodes except the primary (since its disconnected from primary) + assert partitioned_node.spylog.count( + partitioned_node.lost_master_primary.__name__) > lost_pr_calls + for node in txnPoolNodeSet: + if node != partitioned_node and node != pr_node: + assert node.spylog.count( + node.processInstanceChange.__name__) > recv_inst_chg_calls[node.name] + + looper.run(eventually(chk1, retryWait=1, timeout=10)) + + def chk2(): + # Check the view does not change + with pytest.raises(AssertionError): + assert checkViewNoForNodes(txnPoolNodeSet) == view_no + 1 + + looper.run(eventually(chk2, retryWait=1, timeout=10)) + # Send some requests and make sure the request execute + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5) + + # Repair the connection so the node is no longer partitioned + partitioned_node.nodestack.retryDisconnected = types.MethodType( + orig_retry_meth, partitioned_node.nodestack) + + # Send some requests and make sure the request execute + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5) + + # Partitioned node should have the same ledger and state as others + # eventually + waitNodeDataEquality(looper, partitioned_node, + *[n for n in txnPoolNodeSet if n != partitioned_node]) diff --git a/plenum/test/view_change/test_view_not_changed_when_short_disconnection.py b/plenum/test/view_change/test_view_not_changed_when_short_disconnection.py new file mode 100644 index 0000000000..9425d9c889 --- /dev/null +++ b/plenum/test/view_change/test_view_not_changed_when_short_disconnection.py @@ -0,0 +1,76 @@ +import pytest + +from stp_core.loop.eventually import eventually +from plenum.test.pool_transactions.conftest import clientAndWallet1, \ + client1, wallet1, client1Connected, looper +from plenum.test.helper import checkViewNoForNodes, \ + sendReqsToNodesAndVerifySuffReplies + +from plenum.test.test_node import get_master_primary_node + + +@pytest.mark.skip(reason='SOV-1020') +def test_view_not_changed_when_short_disconnection(txnPoolNodeSet, looper, + wallet1, client1, + client1Connected, tconf): + """ + When primary is disconnected but not long enough to trigger the timeout, + view change should not happen + """ + pr_node = get_master_primary_node(txnPoolNodeSet) + view_no = checkViewNoForNodes(txnPoolNodeSet) + + lost_pr_calls = {node.name: node.spylog.count( + node.lost_master_primary.__name__) for node in txnPoolNodeSet + if node != pr_node} + + prp_inst_chg_calls = {node.name: node.spylog.count( + node.propose_view_change.__name__) for node in txnPoolNodeSet + if node != pr_node} + + recv_inst_chg_calls = {node.name: node.spylog.count( + node.processInstanceChange.__name__) for node in txnPoolNodeSet + if node != pr_node} + + def chk1(): + # Check that non-primary nodes detects losing connection with + # primary + for node in txnPoolNodeSet: + if node != pr_node: + assert node.spylog.count(node.lost_master_primary.__name__) \ + > lost_pr_calls[node.name] + + def chk2(): + # Schedule an instance change but do not send it + # since primary joins again + for node in txnPoolNodeSet: + if node != pr_node: + assert node.spylog.count(node.propose_view_change.__name__) \ + > prp_inst_chg_calls[node.name] + assert node.spylog.count(node.processInstanceChange.__name__) \ + == recv_inst_chg_calls[node.name] + + # Disconnect master's primary + for node in txnPoolNodeSet: + if node != pr_node: + node.nodestack.getRemote(pr_node.nodestack.name).disconnect() + + timeout = min(tconf.ToleratePrimaryDisconnection-1, 1) + looper.run(eventually(chk1, retryWait=.2, timeout=timeout)) + + # Reconnect master's primary + for node in txnPoolNodeSet: + if node != pr_node: + node.nodestack.retryDisconnected() + + looper.run(eventually(chk2, retryWait=.2, timeout=timeout+1)) + + def chk3(): + # Check the view does not change + with pytest.raises(AssertionError): + assert checkViewNoForNodes(txnPoolNodeSet) == view_no + 1 + + looper.run(eventually(chk3, retryWait=1, timeout=10)) + + # Send some requests and make sure the request execute + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5) diff --git a/plenum/test/waits.py b/plenum/test/waits.py index f0301f9ca4..51ec77f36e 100644 --- a/plenum/test/waits.py +++ b/plenum/test/waits.py @@ -1,3 +1,6 @@ +from plenum.common import util +from stp_zmq.zstack import KITZStack + from stp_core.common.log import getlogger from plenum.common.config_util import getConfig from plenum.common.util import totalConnections @@ -7,95 +10,259 @@ config = getConfig() +# Peer (node/client) to peer message delivery time +__Peer2PeerRequestDeliveryTime = 0.5 +__Peer2PeerRequestExchangeTime = 2 * __Peer2PeerRequestDeliveryTime + +# It's expected what the Node will start in one second +__NodeStartUpTime = 1 + +# The Instance order time +__ProtocolInstanceOrderTime = 1 + +# Time from replied to persisted in ledger +__PersistRepliedTime = 1 + + +######################### +# The Node timeouts +######################### + +def expectedNodeStartUpTimeout(): + """ + From: The Node is not raised + To: The Node is ready to connect + """ + return __NodeStartUpTime + + +def expectedNodeToNodeMessageDeliveryTime(): + """ + From: The Node ready to send a message + To: The message is received other Node + """ + return __Peer2PeerRequestDeliveryTime + + ######################### # Pool internal timeouts ######################### +def expectedPoolStartUpTimeout(nodeCount): + """ + From: the Pool is not raised + To: the Pool is ready to connect + """ + return nodeCount * expectedNodeStartUpTimeout() -def expectedNodeInterconnectionTime(nodeCount): - count = totalConnections(nodeCount) - return count * config.ExpectedConnectTime +def expectedPoolInterconnectionTime(nodeCount): + """ + From: the Pool up + To: the Pool is fully connected + """ + interconnectionCount = totalConnections(nodeCount) + nodeConnectionTimeout = config.ExpectedConnectTime + # '+KITZStack.RETRY_TIMEOUT_RESTRICTED' is a workaround for + # bug (`'str' object has no attribute 'keys'`) which supposed to be + # fixed in the 3pcbatch feature + # https://evernym.atlassian.net/browse/SOV-995 + return interconnectionCount * nodeConnectionTimeout + \ + KITZStack.RETRY_TIMEOUT_RESTRICTED -def expectedCatchupTime(nodeCount, customConsistencyProofsTimeout=None): - timeout = customConsistencyProofsTimeout or config.ConsistencyProofsTimeout - return timeout * nodeCount +def expectedPoolDisconnectionTime(nodeCount): + return __Peer2PeerRequestDeliveryTime * nodeCount -def expectedPoolGetReadyTimeout(nodeCount): - # looks the same with catchup process - return expectedCatchupTime(nodeCount) +def expectedPoolConsistencyProof(nodeCount): + """ + From: any time the Pool ready for the consistency proof procedure + To: each of the Nodes finish the consistency proof procedure + (ready for catchup if it is needed) + """ + nodeCPTimeout = __Peer2PeerRequestExchangeTime + \ + config.ConsistencyProofsTimeout + return nodeCount * nodeCPTimeout -def expectedPoolLedgerCheck(nodeCount): + +def expectedPoolCatchupTime(nodeCount): """ - Expected time required for checking that 'pool ledger' on nodes and client - is the same + From: the consistency proof procedure is finished + To: each of the Nodes finished the the catchup procedure """ - return 5 * nodeCount + nodeCatchupTimeout = __Peer2PeerRequestExchangeTime + \ + config.CatchupTransactionsTimeout + return nodeCount * nodeCatchupTimeout -def expectedNodeStartUpTimeout(): - return 5 +def expectedPoolGetReadyTimeout(nodeCount): + """ + From: the Pool is disconnected + To: the pool ledger is equal across the Nodes + """ + return expectedPoolInterconnectionTime(nodeCount) + \ + expectedPoolConsistencyProof(nodeCount) + \ + expectedPoolCatchupTime(nodeCount) -def expectedPoolStartUpTimeout(nodeCount): - return nodeCount * expectedNodeStartUpTimeout() +def expectedPoolLedgerCheck(nodeCount): + # TODO this is a legacy for sovrin-node + # remove it and replace in the sovrin-node + return 5 * nodeCount -def expectedRequestStashingTime(): - return 20 +def expectedPoolLedgerRepliedMsgPersisted(nodeCount): + """ + From: a message is replied to client + To: the message is stored in the ledger + """ + return nodeCount * __PersistRepliedTime ######################### # Pool election timeouts ######################### -def expectedNominationTimeout(nodeCount): - return 3 * nodeCount +def expectedPoolViewChangeStartedTimeout(nodeCount): + """ + From: the VIEW_CHANGE is send + To: the view is changed started (before NOMINATE) + """ + interconnectionCount = totalConnections(nodeCount) + return expectedNodeToNodeMessageDeliveryTime() * interconnectionCount -def expectedElectionTimeout(nodeCount): - return expectedNominationTimeout(nodeCount) + 4 * nodeCount +def expectedPoolNominationTimeout(nodeCount): + """ + From: the NOMINATE is sent + To: the NOMINATE is received by each node in the Pool + """ + interconnectionCount = totalConnections(nodeCount) + return expectedNodeToNodeMessageDeliveryTime() * interconnectionCount -def expectedNextPerfCheck(nodes): - return max([n.perfCheckFreq for n in nodes]) + 1 +def expectedPoolElectionTimeout(nodeCount, numOfReelections=0): + """ + From: the Pool ready for the view change procedure + To: the Pool changed the View + """ + # not sure what nomination + primary is enough + interconnectionCount = totalConnections(nodeCount) + primarySelectTimeout = \ + expectedNodeToNodeMessageDeliveryTime() * interconnectionCount + + oneElectionTimeout = \ + expectedPoolViewChangeStartedTimeout(nodeCount) + \ + expectedPoolNominationTimeout(nodeCount) + \ + primarySelectTimeout + + return (1 + numOfReelections) * oneElectionTimeout -def expectedViewChangeTime(nodeCount): - return int(0.75 * nodeCount) +def expectedPoolNextPerfCheck(nodes): + """ + From: any time + To: the performance check is finished across the Pool + """ + # +1 means 'wait awhile after max timeout' + return max([n.perfCheckFreq for n in nodes]) + 1 ######################### # Processing timeouts ######################### -def expectedNodeToNodeMessageDeliveryTime(): - return 5 - def expectedPropagateTime(nodeCount): + """ + From: the Client sent the requests + To: the requests are propageted + """ count = totalConnections(nodeCount) return expectedNodeToNodeMessageDeliveryTime() * count def expectedPrePrepareTime(nodeCount): + """ + From: the requests are propageted + To: the requests are pre-prepared + """ + count = totalConnections(nodeCount) + return expectedNodeToNodeMessageDeliveryTime() * count + + +def expectedPrepareTime(nodeCount): + """ + From: the requests are pre-prepared + To: the requests are prepared + """ + count = totalConnections(nodeCount) + return expectedNodeToNodeMessageDeliveryTime() * count + + +def expectedCommittedTime(nodeCount): + """ + From: the requests are prepared + To: the requests are committed + """ count = totalConnections(nodeCount) return expectedNodeToNodeMessageDeliveryTime() * count def expectedOrderingTime(numInstances): - return int(2.14 * numInstances) + """ + From: the requests are committed + To: the requests are ordered + """ + return __ProtocolInstanceOrderTime * numInstances ######################### # Client timeouts ######################### -def expectedClientConnectionTimeout(fVal): - # TODO calc fVal here, get nodeCount - return 3 * fVal + +def expectedClientToPoolConnectionTimeout(nodeCount): + """ + From: the Client is not connected to the Pool + To: the Client is connected to the Pool + """ + # '+KITZStack.RETRY_TIMEOUT_RESTRICTED' is a workaround for + # bug (`'str' object has no attribute 'keys'`) which supposed to be + # fixed in the 3pcbatch feature + # https://evernym.atlassian.net/browse/SOV-995 + return config.ExpectedConnectTime * nodeCount + \ + KITZStack.RETRY_TIMEOUT_RESTRICTED + + +def expectedClientConsistencyProof(nodeCount): + """ + From: the Client is connected to the Pool + To: the Client finished the consistency proof procedure + """ + qN = util.getQuorum(nodeCount) + return qN * __Peer2PeerRequestExchangeTime + \ + config.ConsistencyProofsTimeout + + +def expectedClientCatchupTime(nodeCount): + """ + From: the Client finished the consistency proof procedure + To: the Client finished the catchup procedure + """ + qN = util.getQuorum(nodeCount) + return qN * 2 * __Peer2PeerRequestExchangeTime + \ + config.CatchupTransactionsTimeout + + +def expectedClientToPoolRequestDeliveryTime(nodeCount): + """ + From: the Client send a request + To: the request is delivered to f nodes + """ + qN = util.getQuorum(nodeCount) + return __Peer2PeerRequestExchangeTime * qN def expectedClientToNodeMessageDeliveryTime(nodeCount): @@ -103,18 +270,33 @@ def expectedClientToNodeMessageDeliveryTime(nodeCount): def expectedClientRequestPropagationTime(nodeCount): - return int(2.5 * nodeCount) + """ + From: The requests are sent + To: The Propagation procedure finish + """ + return expectedPropagateTime(nodeCount) def expectedTransactionExecutionTime(nodeCount): - return int(CLIENT_REPLY_TIMEOUT * nodeCount) + # QUESTION: Why is the expected execution time a multiple of + # CLIENT_REPLY_TIMEOUT, its huge, + # it should be a little less than CLIENT_REPLY_TIMEOUT + # return int(CLIENT_REPLY_TIMEOUT * nodeCount) + return CLIENT_REPLY_TIMEOUT * 0.25 * nodeCount def expectedReqAckQuorumTime(): + # TODO depends from nodeCount return CLIENT_REQACK_TIMEOUT def expectedReqNAckQuorumTime(): + # TODO depends from nodeCount + return CLIENT_REQACK_TIMEOUT + + +def expectedReqRejectQuorumTime(): + # TODO depends from nodeCount return CLIENT_REQACK_TIMEOUT @@ -125,4 +307,3 @@ def expectedReqNAckQuorumTime(): def expectedAgentCommunicationTime(): # TODO: implement if it is needed raise NotImplementedError() - diff --git a/plenum/test/zstack_tests/test_zstack_reconnection.py b/plenum/test/zstack_tests/test_zstack_reconnection.py index a269f6101b..53181dc9eb 100644 --- a/plenum/test/zstack_tests/test_zstack_reconnection.py +++ b/plenum/test/zstack_tests/test_zstack_reconnection.py @@ -1,11 +1,14 @@ import pytest +from stp_core.common.log import getlogger from stp_core.loop.eventually import eventually from plenum.test.pool_transactions.conftest import looper, clientAndWallet1, \ client1, wallet1, client1Connected from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, stopNodes from plenum.test.test_node import TestNode, ensureElectionsDone +logger = getlogger() + @pytest.fixture(scope="module") def tconf(conf, tdirWithPoolTxns): @@ -26,6 +29,7 @@ def checkNodesSendingCommits(nodeSet): assert otherReplica.name in senders +@pytest.mark.skip(reason='SOV-1020') def testZStackNodeReconnection(tconf, looper, txnPoolNodeSet, client1, wallet1, tdirWithPoolTxns, client1Connected): sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1) @@ -44,6 +48,7 @@ def checkFlakyConnected(conn=True): checkFlakyConnected(True) nodeToCrash.stop() + logger.debug('Stopped node {}'.format(nodeToCrash)) looper.removeProdable(nodeToCrash) looper.runFor(1) stopNodes([nodeToCrash], looper) @@ -57,6 +62,6 @@ def checkFlakyConnected(conn=True): # TODO Select or create the timeout from 'waits'. Don't use constant. looper.run(eventually(checkFlakyConnected, True, retryWait=2, timeout=50)) # TODO Select or create the timeout from 'waits'. Don't use constant. - ensureElectionsDone(looper, txnPoolNodeSet, retryWait=2, timeout=50) + ensureElectionsDone(looper, txnPoolNodeSet, retryWait=2, customTimeout=50) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1) checkNodesSendingCommits(txnPoolNodeSet) diff --git a/scripts/load.py b/scripts/load.py new file mode 100644 index 0000000000..a4efec1373 --- /dev/null +++ b/scripts/load.py @@ -0,0 +1,42 @@ +from plenum.client.client import Client +from plenum.client.wallet import Wallet +from plenum.test.helper import sendRandomRequests, \ + waitForSufficientRepliesForRequests +from stp_core.network.port_dispenser import genHa +from stp_core.loop.looper import Looper +from time import * +from plenum.common.signer_simple import SimpleSigner +from stp_core.types import HA + +numReqs = 10000 +splits = 5 + + +def load(): + port = genHa()[1] + ha = HA('0.0.0.0', port) + name = "hello" + wallet = Wallet(name) + wallet.addIdentifier( + signer=SimpleSigner(seed=b'000000000000000000000000Steward1')) + client = Client(name, ha=ha) + with Looper(debug=True) as looper: + looper.add(client) + print('Will send {} reqs in all'.format(numReqs)) + requests = sendRandomRequests(wallet, client, numReqs) + start = perf_counter() + for i in range(0, numReqs, numReqs // splits): + print('Will wait for {} now'.format(numReqs // splits)) + s = perf_counter() + reqs = requests[i:i + numReqs // splits + 1] + waitForSufficientRepliesForRequests(looper, client, requests=reqs, + fVal=2, customTimeoutPerReq=3) + print('>>> Got replies for {} requests << in {}'. + format(numReqs // splits, perf_counter() - s)) + end = perf_counter() + print('>>>{}<<<'.format(end - start)) + exit(0) + + +if __name__ == "__main__": + load() diff --git a/scripts/start_plenum_node b/scripts/start_plenum_node index 46afa8db04..1732baede5 100755 --- a/scripts/start_plenum_node +++ b/scripts/start_plenum_node @@ -5,6 +5,7 @@ import sys from ioflo.aid.consoling import Console +from plenum.test.test_node import TestNode from stp_core.loop.looper import Looper from stp_core.types import HA from plenum.common.config_util import getConfig diff --git a/setup.py b/setup.py index 84786ef9c3..3adf664df0 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,7 @@ -import shutil -import sys import os -from setuptools import setup, find_packages, __version__ -from pip.req import parse_requirements -import data +import sys +from setuptools import setup, find_packages, __version__ v = sys.version_info if sys.version_info < (3, 5): @@ -61,9 +58,9 @@ data_files=[( (BASE_DIR, ['data/pool_transactions_sandbox', ]) )], - install_requires=['ledger==0.2.13', 'stp==0.1.8', - 'jsonpickle', 'portalocker==0.5.7', - 'prompt_toolkit==0.57', 'pyorient', 'pygments', + install_requires=['ledger==0.2.14', 'stp==0.1.9', + 'state-trie==0.1.1', 'jsonpickle', + 'prompt_toolkit==0.57', 'pygments', 'ioflo==1.5.4', 'semver', 'base58', 'orderedset', 'sortedcontainers==1.5.7', 'psutil', 'pip'], extras_require={ @@ -88,10 +85,6 @@ "example\n" f.write(msg) -DATA_DIR = os.path.dirname(data.__file__) -shutil.copyfile(os.path.join(DATA_DIR, "pool_transactions_sandbox"), - POOL_TXN_FILE) - # TODO: This code should not be copied here. import getpass diff --git a/tutorial/tutorial.py b/tutorial/tutorial.py index 9844db9bbb..6d0645299e 100644 --- a/tutorial/tutorial.py +++ b/tutorial/tutorial.py @@ -145,7 +145,7 @@ each node. """ for node in alpha, beta, gamma, delta: - node.clientAuthNr.addClient(*idAndKey) + node.clientAuthNr.addIdr(*idAndKey) """ We give the client a little time to connect From 54effafa0fdd1d2d561c0761916298324d99f8a6 Mon Sep 17 00:00:00 2001 From: Muzychenko Victor Date: Mon, 5 Jun 2017 15:04:28 +0400 Subject: [PATCH 009/100] Stable release (#205) * setting resource limit for tests * handling exception while raising resource limits * moved wallet persistence and restoration logic to helper methods so that it can be re-utilized from different places, some other minor refactoring (#106) * moved wallet persistence and restoration logic to helper methods so that it can be re-utilized from different places, some other minor refactoring * removed unwanted error code, refactored list keyrings command so that it correctly works for agent cli * removed unused imports * making CLI accept hex seed of size 64 * move logging to stp repo * mitigating key error in pool manager * changing timeouts so the diconnect check happens twice * handling unavailabily of resource module * some corrects to gc * setting the lastOrdered sequence number of the replica after election * fixing obsolete imports * adding temporary info logs to show up during load testing, problem identified for PREPREPARE getting rejects, state tree has different root at beginning for different nodes * reverting config changes * using ordered json encoder * add declarations of common request handling methods to ReqHandler * refactor method commit * add docstring for ReqHandler, rearrange methods * rename *ReqHandlers to *RequestHandlers * add TODOs about fixing hierarchy of State and PruningState * rename validateReq to validate * add config=None argument to validate method to make sub and super signatures match * rename applyReq to apply * add super initialization, update usage of super methods * update docstring of commit * remove commitReq since it does the same as commit * adding timeout to test utility method * changes for load testing * sanitising log messages * Versioning fix (#113) * changes for request handlers * [Closes SOV-905] Big Fixed: Orientdb is not installing with the latest build 0.3.16 * changing to rlp encoding * moving error codes to stp * upgrading version of stp * req_handler changes * fixing import * temp commit * Stp (#116) * use common stp interface for changing of node's HA * get rid of explicitly calling zmq and raet; move all transport stuff into stp interfaces. * fix tests; increment stp version * add AuthMode enum instead of auto parameter * fixed testConnectWithoutKeySharingFails test * increased test timeout * minor changes to serialization * Plenum test runner fix (#115) * Fixed a bug in plenum test runner with an incorrect test results summary pattern. Previously it had been expected that the line with the test results summary must begin with equality signs and end with them. But this line is padded with equality signs only if it is shorter than 80 characters. Now the test results summary patterns don't require equality signs. Instead they may match only the last line in the file. * Corrected test results summary patterns in plenum test runner. * updating apply of request handler * fix import of OP_FIELD_NAME * up version of stp to 0.1.24 * Agent issuer wallet restoration (#117) * supplied keys parameter as True to wallet encoding and decoding functions so that dictionary keys which are custom objects restore properly, otherwise it used to be restored as string oppossed to actual object it was before persistence * minor changes in list keyrings to show issuer wallet for agent as well * minor change to print base wallet first and then any other related wallets * up stp version to 0.1.26 * skip tests in test_new_node_catchup.py * scheduling primary selection * Skip failing tests (#122) * skeip test testChangeNodeHaForNonPrimary due to SOV-941 * skip test testProtocolInstanceCannotBecomeActiveWithLessThanFourServers due to SOV-940 * leveldb HashStore * parametrize test timeouts (#120) * rename expectedWait to expectedNodeInterconnectionTime * add signature for other time expect functions * use named timeouts in conftest * move totalConnections from waits to utils * use named timeout in ensureElectionsDone * use float instead of int for seconds * use default args of ensureElectionsDone where it is possible * use named argument for timeouts * use named timeout in helper * use names for parameters * rename 'timeoutPerReq' of checkSufficientRepliesForRequests and sendReqsToNodesAndVerifySuffReplies to 'customTimeoutPerRequest' to emphasize the fact that there is a default one * use default timeout instead of custom timeout of checkSufficientRepliesForRequests and sendReqsToNodesAndVerifySuffReplies where it can be used; use named argument for timeouts * add comments for two functions with similar names * rename checkSufficientRepliesRecvd to checkSufficientRepliesReceived * rename checkSufficientRepliesForRequests to waitForSufficientRepliesForRequests * add 'returns' to docstrings * fix passing of args to waits.expectedElectionTimeout * use waitForSufficientRepliesForRequests and it's default timeout instead of checkSufficientRepliesReceived everywhere it is possible * update doc lines * create waitForViewChange and use it in ensureView * replace usages of checkViewNoForNodes with eventually by waitForViewChange * add waits.expectedNodeToNodeMessageDeliveryTime and use it in sendMsgAndCheck * rename checkMsg to checkMessageReceived * rename sendMsgAndCheck to sendMessageAndCheckDelivery * add docstring to sendMessageAndCheckDelivery * remove unused helper function * add expectedGetReadyTimeout and use it in checkPoolReady * rename overrideTimeout parameter to customTimeout in checkNodesConnected * use default timeout of checkNodesConnected * create expectedPoolLedgerCheck and expectedClientConnectionTimeout waits and use them * add todo for ensureDisconnectedToNodes * update waits.expectedPoolLedgerCheck * add todo for checkNodesParticipating * add requestIds parameter for waitForSufficientRepliesForRequests * update docstring of waitForSufficientRepliesForRequests * fix waitForSufficientRepliesForRequests * remove unused imports from test_log_filtering * use named timeout in test_status_command * use waits.expectedTransactionExecutionTime in testTransactions * refactor testTransactions * use waitRequestSuccess and waitBalanceChange in test_cli_with_auction_plugin, move them to test.cli.helper * use named timeout in test_basic_client_commands * use named timeout in helper.checkRequest * create waitClientConnected and use it instead of checkClientConnected with eventually * add docstrings * rename checkNodeStarted to waitNodeStarted and use named timeout 'expectedNodeStartUpTimeout' in it * rename expectedGetReadyTimeout to expectedPoolGetReadyTimeout * rename checkAllNodesStarted to waitAllNodesStarted * fix default value of customTimeout of checkPoolReady * create waitAllNodesUp and use it instead of checkAllNodesUp with eventually * create waitReplyCount and use instead of checkReplyCount and eventually * use named timeouts in test_client * use named timeouts in some more tests * add basic implementation for waits.expectedCatchupTime and use it * make expectedCatchupTime get custom ConsistencyProofsTimeout as parameter * use named timeout in testNodeDiscardMessageFromUnknownView * use waits.expectedElectionTimeout and rename timeout arg to custonTimeout in checkEveryNodeHasAtMostOnePrimary * rename timeout argument of plenum/test/node_catchup/test_discard_view_no.py to customTimeout and make it used named timeouts from waits as default * update timeouts in testNodeDiscardMessageFromUnknownView * create waits.expectedRequestStashingTime and use it * add TODO to test_catchup_delayed_nodes * create waitNodeLedgersEquality and use it instead of directo usage of checkNodeLedgersEquality * use waits.expectedPoolLedgerCheck in waitNodeLedgersEquality * use named timeout in testOrderingCase2 * add waits.expectedNominationTimeout and use it * use named timeout in some more tests * add missing empty lines * update waits * add 'TODO[slow-factor]' * update timeouts in the tests * fix testTestNodeDelay and missed import * skip testMultipleRequests test * skip testStatusAfterClientAdded test * fix testInstChangeWithLowerRatioThanDelta * fix test_new_node_catchup.py * fix testChangeHaPersistsPostNodesRestart * fix testAdd2NewNodes * increase expectedElectionTimeout timeout * rename logger.warn => logger.warning * tune timeouts in the tests * make sections in waits.py * add --repat for runner.py * increase expectedCatchupTime timeout * improve runner.py * tune the test timeouts * skip some catchup tests * parametrize test timeouts * rm eventually.py * fix testQueueingReqFromFutureView, testNumOfPrePrepareWithFPlusOneFaults, testNumOfPrepareWithFPlusOneFaults * fix testProtocolInstanceCannotBecomeActiveWithLessThanFourServers * tune propagate and preprepare test timeouts * skip testNumOf*WithFPlusOneFaults * removing commented code and fixing bugs in pool request handler * lowering Delta in pool_transactions conftest * fix jenkins build (#123) * fixed deps in setup.py * added pypi publishing * updated deps * updated deps stp-dev -> stp-perf-imp * removed unnecessary sources * updated setup.py to hold correct deps names and self name for publishing * fixed plenum for the latest stp (#127) * fixed plenum for the latest stp * increment stp version * archive runner.py output for all agents (#107) * archive runner.py results * using env variable NODE_NAME instaed of func param for artifacts * configured archiveArtifacts to allow empty/no archive * sending last pre-prepare sequence number in consistency proof so already processed request can be ignored, re-enabling tests and re-adding proper timeouts * Feature Added: Deb build * Feature Added: Deb build * Do view change if a primary is disconnected (#128) * check whether function is a partial function in getCallableName * add tests for view change when primary goes down * start view change if primary went offline * use startViewChangeIfPrimaryWentOffline instead of doElectionIfNeeded * processing stashed ordered requests after all ledgers have caught up and applying any request if mode changed while processing and removing obsolete code from pool manager * Unclosed file warnings (#124) * Removed "ignore" rule for ResourceWarning about an unclosed file from the warnings filter. * Fixed some causes of ResourceWarnings about unclosed files. * - Fixed some causes of ResourceWarnings about unclosed files. - Corrected plenum.common.txn_util.updateGenesisPoolTxnFile function. * - Fixed the rest causes of ResourceWarnings about unclosed files. - Removed TimeAndSizeRotatingFileHandler class which is not used anymore (the class with the same name from stp is now used instead). * Updated stp-dev dependency to the new version. * Reverted update of stp-dev dependency. * Skipped the tests in plenum.test.test_log_rotation module since they require stp-dev dependency of a newer version (0.1.28 or higher). * refactoring, documentation and updating setup to get rid of pyorient * refactoring * Updated ledger dep * deprecating orientdb * check state equality too while checking ledger equality in tests * removing obsolete test helper method * Agent generalization (#126) * refactored test_network_setup; added some role-base helper classes for members of the network * fixed issue with wrong import and minor refactoring * more work on tests * bugfixes * interim changes during refactoring * adding init_utils * cleaning up before keys * removed type hint causing issues; this issue is fixed in Python 3.5.3 with a new typing.Coroutine * added a check in DidVerifier to check that we have a verkey * fixed unreliable tmpdir test fixture * modifications to tests to make more robust when running from command line * changed spyable to be able to monkey patch a single method * Added a run_till_quiet, waits for quiet prods This function will wait till there is a quiet period from the prodables. Dependent on prodables correctly reporting events. Will run forever if there is always activity. * small tweek to log format to keep | alined * changes to fix some issues happening in end-to-end test * using same seed for creating keys * ignore log files * added a small test for verifier * removed some excpetion class that were moved to STP * init Local keys for client in test network * Add default values for role and verkey in state and fix test timeouts * refactor * import logger * updated to use new API from shared lib * updated to use new API from shared lib * handle pyorient import * handle pyorient import * increment stp version * Enabled the tests in plenum.test.test_log_rotation module. (#135) * increment stp version * close files in tests * check for close method before calling it (#136) * minor fix in batch creation logic and caching primary replica no in node to avoid recomputation * fix testNumOf*WithFPlusOneFaults tests * Updated stp dep * Merge branches '3pc-batch' and 'master' of github.com:evernym/plenum into 3pc-batch # Conflicts: # plenum/common/test_network_setup.py # plenum/server/node.py # plenum/test/test_node.py # setup.py * add missing __init__ for member package * added missed __init__.py for common/member * skip temporarily a failing test * fix primary disconnection and new primary not being same as old for master instance * add more tests for primary disconnection, same primary not elected in the next view and refactor * add new test for minor network glitch with primary * add missing parameter to bootstrap test network (#143) * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * minor change in a test and removing sip statements * fix the catchup tests (#140) * make tests a little robust * fix test, check for disconnections after processing sent and received messages and little documentation * Close SOV-976 (#144) * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * [Closes SOV-976] Unable to create the genesis transaction files * Merge branches '3pc-batch' and 'master' of github.com:evernym/plenum into 3pc-batch # Conflicts: # plenum/test/node_catchup/test_new_node_catchup.py # setup.py * up stp to 0.1.42 (#148) * refactor, enable test, adding timeout for view change and create remotes as soon as nodestack starts * fix broken test * [Closes SOV-981] Bug Fixed: Unable to create pool transaction file on the client machine without passing it a node number (#149) * increment stp * increment ledger * up stp to 0.1.14 * move state and kv storage to 'state' repo * get rid of orientdb code * fix tests * fix generation of trustee txns * fix genesis txns for trustee * fix generation of trustee txns (#151) * remove obsolete code * client to attempt establishing connections with nodes on start * refactor monitor.isMasterThroughputTooLow * update method for forcing view change cna checking it; intoduce wait method for it * use provoke_and_wait_for_view_change instead of eventually and check * remove unused dependency and skip tests - removed unused dependencies - minor fixes - skip tests that fail in master too (we have separate tickets for this) * increment ledger * increment ledger * enable debug logging in tests * using reverted changes in state interface * increment state version * Removed redundant copying of the file "pool_transactions_sandbox" to the directory "~/.plenum". (It is already done using "data_files" argument of "setup" function call.) (#152) * speed up tests (#147) * speed up test_nodes_with_pool_txns.py tests * reduce the timeout in checkNodesConnected * speed up the pool_transactions test * fix the timeout in test_delay.py * fix the timeout in test_primary_election_case5.py * speed up monitoring tests * revert changes in test_instance_change_with_Delta.py * each test's running time is limited by TestRunningTimeLimitSec * increase pool interconnection timeout on zstack retry timeout * fix timeout in test_delay.py * fix tests, make them faster * speedup the tests, tidy test timeouts * add a timeout into waits * bump stp-dev==0.1.43 * Unskip testProtocolInstanceCannotBecomeActiveWithLessThanFourServers since it works well (#153) * unskip testProtocolInstanceCannotBecomeActiveWithLessThanFourServers since it works well * decrease number of nodes in test_instance_cannot_become_active_with_less_than_four_servers to 13 * fix change back HA, SOV-908 (#154) * Fix testChangeNodeHaForNonPrimary (#157) * unskip test_change_non_primary_node_ha * fix waits name - it was renamed * verify genesis ledgers only if such option set in config * use primaryName instead of isPrimary to check that election is done * add numOfReelections parameter to waits.expectedPoolElectionTimeout * refactor test_node.py * set custom election timeout (numOfReelections=3) in changeNodeHa * remove debug prints * check VerKey is base58 for NODE txn, SOV-988 (#158) * client using a method which makes the code more testable, update to an old test with more checks and removing async from a method where it was not needed * Fix test testChangeNodeHaForPrimary (#160) * unskip testChangeNodeHaForPrimary * simplify conditional in changeNodeHa * node which is going down should not participate in a view change * change formating in testChangeNodeHaForPrimary * refactor to make method overridable and adding an argument to a test function * updated pypi package name for state repo and its version (#159) * Squashed commit of the following: create LedgerInfo class and replace collections of LedgerManager by fields * rename LedgerManager.ledgers to ledgerRegistry * fix usages of LedgerManager.ledgerRegistry * use correct attribute and renamed method * bring change from master and unskip a test * use correct attribute and set state to be committed when received from catchup * formatting * up ledger-3pc-batch version to 0.2.16 * up stp-3pc-batch version to 0.1.15 * improve the check of the arguments in "generate_sovrin_pool_transactions" (#162) * check client input for the NODE txn (#161) * fix testInstChangeWithLowerRatioThanDelta - decrease number of messages in a batch * decrease timeout to fix provoke_and_wait_for_view_change * fixing test * Fix post 3pc-batch merge (#163) * remove development options from Jenkinsfile * skip some broken tests * Update versions and skip windows build steps (#164) * switch off windows related build steps * update versions of ledger and stp * fix name * up ledger and stp version * skip test testNodeRequestingTxns * using correct timeout in tests * move some enums to correct location, using ledger's string encoding methods and test for stashing requests * bump dependency version * fix timeout in tests * make order of ledger sync customizable, add tranformer for transactions before adding to ledger, update seqno map db for transactions from catchup and update tests to check this * temporarily change config variable * fix regression where node starts catchup process if sent a consistency proff * bumping dependency version * bumping dependency version * bumping dependency version * Fix redundant reconnections and refactor (#165) * refactor retryForExpected * use maintainConnections in retryForExpected instead of connect * refactor resendRequests * up stp version to 0.1.49 * replace CLIENT_REQACK_TIMEOUT by CLIENT_REPLY_TIMEOUT when calling _filterExpected for REPLYs * up ledger to 0.2.19 * Add new helper-methods for conversion base58 to hex (#166) * increment state-trie and ledger versions (#168) * Make changes to support ChunkedFileStore (#167) * update _defaultStore of Ledger to make it correspond to original one * remove initialization of domain ledger - now it is done by ledger and file store it uses * replace manual copying of transaction files by usage of defaultFile * increase timeout for testNodeRequestingTxns * skip test testInstChangeWithLowerRatioThanDelta because it fails intermittently * up ledger version to 0.2.28 * Bump dependency version * [Closes SOV-980] Bug Fixed: A node should start catchup process if it realises that it has lagged behind the other node (#172) * [Closes SOV-980] Bug Fixed: A node should start catchup process if it realises that it has lagged behind the other node * Fixed failing tests * Fixed unsed imports * Update conftest.py * Increased test time global limit * Reverted timeouts * Added logs * Fixed filtering CPs * Fixed filtering CPs * Fixed filtering CPs * Input validation (#170) * Input sanitization: Add base logic and test cases * Input sanitization: add NODE and NYM txn, fixes * Input sanitization: implement node and client messages * roll away node to node validation * role field is option for NYM txn * fix tests * fixes for sovrin-node * implement validation for merkle root hash * uncomment new ConsistencyProof message implementation * add "nullable" property for FieldBase * fix usage of alphabet * add JsonField * add validation on message level * use hash size range instead of one value; use base58.alphabet instead of own list * fix usage hasSizes; made error comment more verbose * uncomment new implementation LedgerStatus message * uncomment new implementation for Prepare message and fix fields types * roll away node to node checkes * uncomment some new message implementations * use HexField instead of MerkleRootHashField for state_root and rxn_root in PrePrepare * make message about hash length in MerkleRootField more verbose * limit timestamp field by 253402290000.0 * modify randomString to remove length limitation * add tests for merkle root field validation * make randomString work not only with indexable containers * refactor tests for merkle root * fix timestamp validation * add test for timestamp validation * update validation of timestamp * check each node connected * improve log messages * fix testAdd2NewNodes * Improvement of validation rules (#173) * fix Node txn, INDY-9 * add tests for the Node operation * add checking that number of init parameters of message is equal to number of fields declared in schema * use OrderedDict in __dict__ * add test for merkle, fix logic * use OrderedDict for _fields and update all realted methods * - Provided all the disabled tests with the ticket references. (#176) - Enabled the tests testChangeNodeHaForPrimary and testChangeNodeHaForNonPrimary on non-Windows platforms. - Commented out parametrizers at the disabled tests in plenum.test.input_validation.test_common_checks module. * more removal of op to the top since it can affect checking of a number of args * Make SERVICES field required for add NODE txn * Added another test scenario for catchup (#178) * override items, keys, values methods of Mapping in MessageBase to make it compatible * Added another test scenario for catchup fix * clean MessageBase * INDY-73: Removed a commented out @skipif decorator (#177) * Removed a commented out @skipif decorator. * Updated a ticket reference at a disabled test. * Add base58 str class; destnode; destnym; * use - as default ppSeqNo instead of -1 * overtride __ftr__ for MessageBase * comment checking of presence of SERVICE field * move alphabet validation from _specific_validation to avoid accidental overriding of it * INDY-73: Added ticket references to test stubs (#180) * Removed a commented out @skipif decorator. * Updated a ticket reference at a disabled test. * - Added ticket references to the not implemented tests which had not contained them yet. - Uncommented testStopScriptIfNodeIsRunning and disabled it since it fails. * Added raising NotImplementedError to not implemented tests. * Move all specific checks to from validate method * Eliminate multiple inheritance * Enabled and updated testNodesConnectsWhenOneNodeIsLate (#181) * Enabled and updated testNodesConnectsWhenOneNodeIsLate * updated assertions logic * INDY-109 (#184) * updates to test helpers and change in forwarding requests logic (#187) * Use secure random number generator for randomString() Current usage of random.choice() is not secure so replaced that with libsodium provided randombytes_uniform() which is secure and also available on all platforms where libsodium is available * Fixes for INDY-71 (#185) * Refactor check_endpoint_valid func * Fix validation, INDY-71 * Fix imports * Do not connect to a new, non-active node * Remove confusing comments * Minor refactor * Fix test testAddInactiveNodeThenActivate * Validation for view change and 3pc messages (#193) * add exception for type error and raise it in MessageValidator * make error message in ChooseField more verbose * uncomment new implementations of Reelection, Primary and InstanceChange messages * update and fix testInstanceChangeMsgTypeChecking * fix testDiscardInstChngMsgFrmPastView * use NonNegativeField instead of TimestampFied inTieAmountField for its seconds part * make name pf test for merkle root field more standard * move tests for fields to own package * add missing empty line * remove ununsed imports * refactor test for timestamp * remove todo about code duplication since it is not valid anymore * add tests for some field types * make IterableField check type of inner field and that it is not none * add test for hex field * add test for ledger id * add test for request id field * override __eq__ for MessageBase * use NonNegativeNumberField instead of TimestampField in RequestIdentifierField * use IdentifierField instead of NonEmptyStringField in RequestIdentifierField * update test for request identifier * add test for IdentifierField * update test for RequestIdentifier field * implement validation for IdentifierField and reimplement DestNodeField using it * uncomment new implementations for Propagate, Prepare, Commit, Ordered * Refactor base58 check; add Verkey check; dest node; dest nym; merkelroot; * move test for base58 field to field validation package * remove unused import * add checking of max length hash to merkle tree root field test * update test for identifier field * update test for request identifier field * fix formatting * fix checking of zero * remove unused commented line * rename test_non_negative_number to test_positive_number * Make verkey validator accept empty strings * Add super's __init__ params to child calsses * Improve getMatchedHelpableMsg logic (#195) * Use original batch parameters in all tests except the ones that use batches and use alternate of eventuallyAll to check a collection of functions under a timeout (#190) * updates to test helpers and change in forwarding requests logic * overriding batch params in tests * use correct timeouts in tests * accounting for nomination delay * Disable view change (#196) * repaired calls to deprecated methods * Added unsafe 'disable_view_change' for testing - Added 'unsafe' attribute to default config. "unsafe" is a set of attributes that can set certain behaviors that are not safe, for example, 'disable_view_change' disables view changes from happening. This might be useful in testing scenarios, but never in a live network. - Added 'unsafe' behavior 'disable_view_change' which causes Monitor to always return false when asked if master instance performance is degraded. - Refactored view change fixture so it could be reused to test new 'disable_view_change' behavior. * fixed import for missed fixture in test * Fix validation types (#197) * Enable and update testNodeDiscardMessageFromUnknownView (#194) * added equality method for MessageBase * added ability to cancel created delayers * enabled and updated testNodeDiscardMessageFromUnknownView * updated test to recent changes in node logic * reverted recent changes not necessary for now * Catch up demoted (#201) * add tests for the issues with catch-up after demotion/disconnection * updates replica's last_ordered_pp_seq_no after catch-up * disconnect a demoted node * Fixes for dead catchup (#202) * add tests for the issues with catch-up after demotion/disconnection * updates replica's last_ordered_pp_seq_no after catch-up * disconnect a demoted node * up stp version to 0.1.56 * up stp version to 0.1.57 * fixed tests * fix test * Disable the test: INDY-147. * fix README * fix setup.py * up stp version to 0.1.10 Signed-off-by: Muzychenko Victor Sign-off-executed-by: toktar Approved-at: h-master --- plenum/cli/cli.py | 4 +- plenum/client/client.py | 2 +- plenum/client/pool_manager.py | 2 +- plenum/common/config_util.py | 7 + plenum/common/constants.py | 1 + plenum/common/keygen_utils.py | 10 +- plenum/common/ledger.py | 4 + plenum/common/ledger_manager.py | 11 +- plenum/common/message_processor.py | 2 +- plenum/common/messages/client_request.py | 8 +- plenum/common/messages/fields.py | 117 ++++-- plenum/common/messages/message_base.py | 80 ++-- plenum/common/request.py | 9 +- plenum/common/stack_manager.py | 6 +- plenum/common/types.py | 356 ++++++++-------- plenum/common/util.py | 72 +++- plenum/config.py | 22 +- plenum/server/models.py | 2 +- plenum/server/monitor.py | 3 + plenum/server/node.py | 309 ++++++++------ plenum/server/pool_manager.py | 7 +- plenum/server/pool_req_handler.py | 7 - plenum/server/primary_elector.py | 26 +- plenum/server/propagator.py | 31 +- plenum/server/replica.py | 387 +++++++++++++----- plenum/server/router.py | 4 +- plenum/test/batching_3pc/conftest.py | 5 +- .../test/batching_3pc/test_basic_batching.py | 20 +- .../test/batching_3pc/test_batch_rejection.py | 4 - .../batching_3pc/test_batching_scenarios.py | 3 +- .../test/batching_3pc/test_client_requests.py | 9 +- plenum/test/checkpoints/conftest.py | 17 +- .../checkpoints/test_basic_checkpointing.py | 15 +- .../test_discard_old_checkpoint_messages.py | 6 +- .../test_message_outside_watermark.py | 7 +- .../test_message_outside_watermark1.py | 25 +- .../checkpoints/test_stable_checkpoint.py | 25 +- .../checkpoints/test_stable_checkpoint1.py | 6 +- .../test/cli/test_cli_with_bank_req_plugin.py | 1 - plenum/test/cli/test_new_steward.py | 2 +- plenum/test/conftest.py | 31 +- plenum/test/delayers.py | 67 ++- plenum/test/helper.py | 92 +++-- .../fields_validation/__init__.py | 0 .../fields_validation/test_base58_field.py | 34 ++ .../fields_validation/test_hex_field.py | 22 + .../test_identifier_field.py | 29 ++ .../fields_validation/test_iterable_field.py | 15 + .../fields_validation/test_ledger_id_field.py | 15 + .../test_merkle_tree_root_field.py | 26 ++ .../test_non_empty_string_field.py | 12 + .../test_non_negative_number_field.py | 20 + .../test_request_identifier_field.py | 49 +++ .../test_time_among_field.py | 17 + .../fields_validation/test_timestamp_field.py | 20 + plenum/test/input_validation/utils.py | 8 + .../test/instances/test_pre_prepare_digest.py | 9 +- plenum/test/malicious_behaviors_node.py | 15 +- .../monitoring/test_monitor_reconnection.py | 6 +- plenum/test/node_catchup/helper.py | 3 +- .../test/node_catchup/test_catchup_demoted.py | 46 +++ .../test/node_catchup/test_discard_view_no.py | 37 +- .../node_catchup/test_new_node_catchup.py | 2 +- .../test_node_catchup_after_disconnect.py | 10 +- .../test/node_request/node_request_helper.py | 25 +- ...st_num_of_commit_with_f_plus_one_faults.py | 3 +- .../test_non_primary_sends_a_pre_prepare.py | 2 +- .../node_request/test_request_forwarding.py | 62 +++ plenum/test/pool_transactions/helper.py | 9 +- .../test_node_key_changed.py | 53 +++ .../test_nodes_data_changed.py | 132 ++++++ .../test_nodes_with_pool_txns.py | 165 ++------ .../test_primary_election_case1.py | 6 +- .../primary_election/test_primary_forfeit.py | 2 +- .../test_primary_selection.py | 6 +- .../test_replica_reject_same_pre_prepare.py | 4 +- plenum/test/script/test_change_node_ha.py | 37 +- plenum/test/stasher.py | 18 +- plenum/test/test_delay.py | 1 + plenum/test/test_log_rotation.py | 1 - plenum/test/test_node.py | 42 +- plenum/test/test_node_connection.py | 26 +- plenum/test/test_node_request.py | 60 +-- plenum/test/test_stack.py | 3 + plenum/test/test_stasher.py | 7 +- plenum/test/test_testable.py | 28 +- plenum/test/view_change/conftest.py | 26 +- plenum/test/view_change/helper.py | 50 ++- .../view_change/test_disable_view_change.py | 17 + ...st_discard_inst_chng_msg_from_past_view.py | 2 +- .../test_instance_change_msg_checking.py | 21 +- plenum/test/view_change/test_view_change.py | 21 +- .../test_view_change_happens_post_timeout.py | 6 +- .../test_view_change_not_gamable.py | 6 +- plenum/test/waits.py | 16 +- .../zstack_tests/test_zstack_reconnection.py | 6 +- setup.py | 2 +- 97 files changed, 2057 insertions(+), 1027 deletions(-) create mode 100644 plenum/test/input_validation/fields_validation/__init__.py create mode 100644 plenum/test/input_validation/fields_validation/test_base58_field.py create mode 100644 plenum/test/input_validation/fields_validation/test_hex_field.py create mode 100644 plenum/test/input_validation/fields_validation/test_identifier_field.py create mode 100644 plenum/test/input_validation/fields_validation/test_iterable_field.py create mode 100644 plenum/test/input_validation/fields_validation/test_ledger_id_field.py create mode 100644 plenum/test/input_validation/fields_validation/test_merkle_tree_root_field.py create mode 100644 plenum/test/input_validation/fields_validation/test_non_empty_string_field.py create mode 100644 plenum/test/input_validation/fields_validation/test_non_negative_number_field.py create mode 100644 plenum/test/input_validation/fields_validation/test_request_identifier_field.py create mode 100644 plenum/test/input_validation/fields_validation/test_time_among_field.py create mode 100644 plenum/test/input_validation/fields_validation/test_timestamp_field.py create mode 100644 plenum/test/input_validation/utils.py create mode 100644 plenum/test/node_catchup/test_catchup_demoted.py create mode 100644 plenum/test/node_request/test_request_forwarding.py create mode 100644 plenum/test/pool_transactions/test_node_key_changed.py create mode 100644 plenum/test/pool_transactions/test_nodes_data_changed.py create mode 100644 plenum/test/view_change/test_disable_view_change.py diff --git a/plenum/cli/cli.py b/plenum/cli/cli.py index c8f482aec3..d2b4fe93f5 100644 --- a/plenum/cli/cli.py +++ b/plenum/cli/cli.py @@ -1154,7 +1154,9 @@ def _showLicense(self): """) def getMatchedHelpableMsg(self, helpable): - matchedHelpMsgs = [hm for hm in self.cmdHandlerToCmdMappings().values() if hm and hm.id == helpable] + cmd_prefix = ' '.join(helpable.split(' ')[:2]) + matchedHelpMsgs = [hm for hm in self.cmdHandlerToCmdMappings().values() + if hm and hm.id == cmd_prefix] if matchedHelpMsgs: return matchedHelpMsgs[0] return None diff --git a/plenum/client/client.py b/plenum/client/client.py index dc5040d6ae..ac5a7a3c58 100644 --- a/plenum/client/client.py +++ b/plenum/client/client.py @@ -572,7 +572,7 @@ def retryForExpected(self): # even if pool is just busy and cannot answer quickly, # that's why using maintainConnections instead # self.nodestack.connect(name=remote.name) - self.nodestack.maintainConnections() + self.nodestack.maintainConnections(force=True) if aliveRequests: # Need a delay in case connection has to be established with some diff --git a/plenum/client/pool_manager.py b/plenum/client/pool_manager.py index 46be8db260..a1c986a1e0 100644 --- a/plenum/client/pool_manager.py +++ b/plenum/client/pool_manager.py @@ -118,7 +118,7 @@ def nodeServicesChanged(self, txn): # If validator service is disabled del self.nodeReg[remoteName] try: - rid = self.nodestack.removeRemoteByName(remoteName) + rid = TxnStackManager.removeRemote(self.nodestack, remoteName) if rid: self.nodestack.outBoxes.pop(rid, None) except RemoteNotFound: diff --git a/plenum/common/config_util.py b/plenum/common/config_util.py index 18245a2b99..f68396401b 100644 --- a/plenum/common/config_util.py +++ b/plenum/common/config_util.py @@ -46,5 +46,12 @@ def getConfig(homeDir=None): except FileNotFoundError: pass refConfig.baseDir = os.path.expanduser(refConfig.baseDir) + + # "unsafe" is a set of attributes that can set certain behaviors that + # are not safe, for example, 'disable_view_change' disables view changes + # from happening. This might be useful in testing scenarios, but never + # in a live network. + if not hasattr(refConfig, 'unsafe'): + setattr(refConfig, 'unsafe', set()) CONFIG = refConfig return CONFIG diff --git a/plenum/common/constants.py b/plenum/common/constants.py index 05b3f430ec..344a95c2e8 100644 --- a/plenum/common/constants.py +++ b/plenum/common/constants.py @@ -30,6 +30,7 @@ ORDERED = "ORDERED" REQDIGEST = "REQDIGEST" +REQKEY = "REQKEY" INSTANCE_CHANGE = "INSTANCE_CHANGE" diff --git a/plenum/common/keygen_utils.py b/plenum/common/keygen_utils.py index be307ffda2..fd8d498eef 100644 --- a/plenum/common/keygen_utils.py +++ b/plenum/common/keygen_utils.py @@ -15,13 +15,13 @@ def initLocalKeys(name, baseDir, sigseed, override=False, config=None): return pubkey, verkey -def initRemoteKeys(name, baseDir, sigseed, verkey, override=False, config=None): - nodeStackClass.initRemoteKeys(name, baseDir, sigseed, verkey, - override=override) +def initRemoteKeys(name, remote_name, baseDir, verkey, override=False): + nodeStackClass.initRemoteKeys(name, remote_name, baseDir, verkey, + override=override) -def initNodeKeysForBothStacks(name, baseDir, sigseed, override=False, config=None): - # `sigseed` is initailsed to keep the seed same for both stacks. +def initNodeKeysForBothStacks(name, baseDir, sigseed, override=False): + # `sigseed` is initialised to keep the seed same for both stacks. # Both node and client stacks need to have same keys sigseed = sigseed or randomSeed() diff --git a/plenum/common/ledger.py b/plenum/common/ledger.py index fdfc882fb4..f04678d3fc 100644 --- a/plenum/common/ledger.py +++ b/plenum/common/ledger.py @@ -30,6 +30,10 @@ def __init__(self, *args, **kwargs): self.uncommittedRootHash = None self.uncommittedTree = None + @property + def uncommitted_size(self) -> int: + return self.size + len(self.uncommittedTxns) + def appendTxns(self, txns: List): # These transactions are not yet committed so they do not go to # the ledger diff --git a/plenum/common/ledger_manager.py b/plenum/common/ledger_manager.py index eb9e6d54cd..e7e11be7e8 100644 --- a/plenum/common/ledger_manager.py +++ b/plenum/common/ledger_manager.py @@ -29,8 +29,7 @@ class LedgerManager(HasActionQueue): def __init__(self, owner, ownedByNode: bool=True, - postAllLedgersCaughtUp: - Optional[Callable]=None): + postAllLedgersCaughtUp:Optional[Callable]=None): self.owner = owner self.ownedByNode = ownedByNode @@ -390,17 +389,15 @@ def processCatchupReq(self, req: CatchupReq, frm: str): .format(self, end, ledger.size)) end = ledger.size - # TODO: This is very inefficient for long ledgers - txns = ledger.getAllTxn(start, end) - logger.debug("node {} requested catchup for {} from {} to {}" .format(frm, end - start+1, start, end)) - logger.debug("{} generating consistency proof: {} from {}". format(self, end, req.catchupTill)) consProof = [Ledger.hashToStr(p) for p in ledger.tree.consistency_proof(end, req.catchupTill)] + # TODO: This is very inefficient for long ledgers if the ledger does not use `ChunkedFileStore` + txns = ledger.getAllTxn(start, end) for seq_no in txns: txns[seq_no] = self.owner.update_txn_with_extra_data(txns[seq_no]) self.sendTo(msg=CatchupRep(getattr(req, f.LEDGER_ID.nm), txns, @@ -878,7 +875,7 @@ def getLedgerForMsg(self, msg: Any) -> Ledger: def getLedgerInfoByType(self, ledgerType) -> LedgerInfo: if ledgerType not in self.ledgerRegistry: - raise ValueError("Invalid ledger type: {}".format(ledgerType)) + raise KeyError("Invalid ledger type: {}".format(ledgerType)) return self.ledgerRegistry[ledgerType] def appendToLedger(self, ledgerId: int, txn: Any) -> Dict: diff --git a/plenum/common/message_processor.py b/plenum/common/message_processor.py index c8e23b1bcc..7ccea9bc55 100644 --- a/plenum/common/message_processor.py +++ b/plenum/common/message_processor.py @@ -56,4 +56,4 @@ def prepForSending(self, msg: Dict, signer: Signer = None) -> Dict: msg = self.toDict(msg) if signer: return signer.sign(msg) - return msg \ No newline at end of file + return msg diff --git a/plenum/common/messages/client_request.py b/plenum/common/messages/client_request.py index 11ab241ebb..7d42d2b48e 100644 --- a/plenum/common/messages/client_request.py +++ b/plenum/common/messages/client_request.py @@ -14,6 +14,7 @@ class ClientNodeOperationData(MessageValidator): ) def _validate_message(self, dct): + # TODO: make ha fields truly optional (needs changes in stackHaChanged) required_ha_fields = {NODE_IP, NODE_PORT, CLIENT_IP, CLIENT_PORT} ha_fields = {f for f in required_ha_fields if f in dct} if ha_fields and len(ha_fields) != len(required_ha_fields): @@ -24,7 +25,7 @@ class ClientNodeOperation(MessageValidator): schema = ( (TXN_TYPE, ConstantField(NODE)), (DATA, ClientNodeOperationData()), - (TARGET_NYM, IdentifierField()), + (TARGET_NYM, DestNodeField()), (VERKEY, VerkeyField(optional=True)), ) @@ -34,7 +35,7 @@ class ClientNYMOperation(MessageValidator): (TXN_TYPE, ConstantField(NYM)), (ALIAS, NonEmptyStringField(optional=True)), (VERKEY, VerkeyField(optional=True)), - (TARGET_NYM, IdentifierField()), + (TARGET_NYM, DestNymField()), (ROLE, RoleField(optional=True)), # TODO: validate role using ChooseField, # do roles list expandable form outer context @@ -64,5 +65,4 @@ def validate(self, dct): if schema_type in self.operations: # check only if the schema is defined op = self.operations[schema_type] - self._validate_fields_with_schema(dct, op.schema) - self._validate_message(dct) + op.validate(dct) diff --git a/plenum/common/messages/fields.py b/plenum/common/messages/fields.py index c8abc6eb9c..61b197305d 100644 --- a/plenum/common/messages/fields.py +++ b/plenum/common/messages/fields.py @@ -2,6 +2,7 @@ import json import base58 import re +from datetime import datetime from plenum.common.constants import DOMAIN_LEDGER_ID, POOL_LEDGER_ID @@ -25,7 +26,10 @@ def validate(self, val): type_er = self.__type_check(val) if type_er: return type_er - return self._specific_validation(val) + + spec_err = self._specific_validation(val) + if spec_err: + return spec_err def _specific_validation(self, val): raise NotImplementedError @@ -94,6 +98,9 @@ class IterableField(FieldBase): _base_types = (list, tuple) def __init__(self, inner_field_type: FieldValidator, **kwargs): + assert inner_field_type + assert isinstance(inner_field_type, FieldValidator) + self.inner_field_type = inner_field_type super().__init__(**kwargs) @@ -154,8 +161,8 @@ def __init__(self, values, **kwargs): def _specific_validation(self, val): if val not in self._possible_values: - return "expected '{}' unknown value '{}'" \ - "".format(', '.join(map(str, self._possible_values)), val) + return "expected one of '{}', unknown value '{}'" \ + .format(', '.join(map(str, self._possible_values)), val) class LedgerIdField(ChooseField): @@ -166,9 +173,63 @@ def __init__(self, **kwargs): super().__init__(self.ledger_ids, **kwargs) -class IdentifierField(NonEmptyStringField): +class Base58Field(FieldBase): + _base_types = (str,) + + #long id is 32 bye long; short is 16 bytes long; + #upper limit is calculated according to formula + #for the max length of encoded data + #ceil(n * 138 / 100 + 1) + #lower formula is based on data from field + def __init__(self, short=False, long=False, *args, **kwargs): + super().__init__(*args, **kwargs) + self._alphabet = set(base58.alphabet) + self._lengthLimits = [] + if short: + self._lengthLimits.append(range(15, 26)) + if long: + self._lengthLimits.append(range(43, 46)) + + def _specific_validation(self, val): + if self._lengthLimits: + inlen = len(val) + goodlen = any(inlen in r for r in self._lengthLimits) + if not goodlen: + return 'value length {} is not in ranges {}'\ + .format(inlen, self._lengthLimits) + if set(val) - self._alphabet: + return 'should not contains chars other than {}' \ + .format(self._alphabet) + + +class IdentifierField(Base58Field): _base_types = (str, ) - # TODO implement the rules + + def __init__(self, *args, **kwargs): + # TODO the tests in client are failing because the field + # can be short and long both. It is can be an error. + # We have to double check the type of the field. + super().__init__(short=True, long=True, *args, **kwargs) + + +class DestNodeField(Base58Field): + _base_types = (str,) + + def __init__(self, *args, **kwargs): + # TODO the tests in client are failing because the field + # can be short and long both. It is can be an error. + # We have to double check the type of the field. + super().__init__(short=True, long=True, *args, **kwargs) + + +class DestNymField(Base58Field): + _base_types = (str, ) + + def __init__(self, *args, **kwargs): + # TODO the tests in client are failing because the field + # can be short and long both. It is can be an error. + # We have to double check the type of the field. + super().__init__(short=True, long=True, *args, **kwargs) class RequestIdentifierField(FieldBase): @@ -178,10 +239,10 @@ class RequestIdentifierField(FieldBase): def _specific_validation(self, val): if len(val) != self._length: return "should have length {}".format(self._length) - idr_error = NonEmptyStringField().validate(val[0]) + idr_error = IdentifierField().validate(val[0]) if idr_error: return idr_error - ts_error = TimestampField().validate(val[1]) + ts_error = NonNegativeNumberField().validate(val[1]) if ts_error: return ts_error @@ -189,7 +250,6 @@ def _specific_validation(self, val): class TieAmongField(FieldBase): _base_types = (list, tuple) _length = 2 - # TODO eliminate duplication with RequestIdentifierField def _specific_validation(self, val): if len(val) != self._length: @@ -197,17 +257,25 @@ def _specific_validation(self, val): idr_error = NonEmptyStringField().validate(val[0]) if idr_error: return idr_error - ts_error = TimestampField().validate(val[1]) + ts_error = NonNegativeNumberField().validate(val[1]) if ts_error: return ts_error +# TODO: think about making it a subclass of Base58Field class VerkeyField(FieldBase): _base_types = (str, ) - # TODO implement the rules + _b58short = Base58Field(short=True) + _b58long = Base58Field(long=True) def _specific_validation(self, val): - return None + if len(val) == 0: + return None + if val.startswith('~'): + #short base58 + return self._b58short.validate(val[1:]) + #long base58 + return self._b58long.validate(val) class HexField(FieldBase): @@ -226,29 +294,24 @@ def _specific_validation(self, val): return "length should be {} length".format(self._length) -class MerkleRootField(FieldBase): +class MerkleRootField(Base58Field): _base_types = (str, ) - # Raw merkle root is 32 bytes length, - # but when it is base58'ed it is 44 bytes - hashSizes = range(43, 46) - alphabet = base58.alphabet - - def _specific_validation(self, val): - if len(val) not in self.hashSizes: - return 'length should be one of {}'.format(self.hashSizes) - if set(val).isdisjoint(self.alphabet): - return 'should not contains chars other than {}' \ - .format(self.alphabet) + def __init__(self, *args, **kwargs): + super().__init__(long=True, *args, **kwargs) class TimestampField(FieldBase): _base_types = (float, int) def _specific_validation(self, val): - # TODO finish implementation - if val < 0: - return 'should be a positive number' + normal_val = val + if isinstance(val, int): + # This is needed because timestamp is usually multiplied + # by 1000 to "make it compatible to JavaScript Date()" + normal_val /= 1000 + if normal_val <= 0: + return 'should be a positive number but was {}'.format(val) class JsonField(FieldBase): @@ -258,4 +321,4 @@ def _specific_validation(self, val): try: json.loads(val) except json.decoder.JSONDecodeError: - return 'should be valid JSON string' + return 'should be a valid JSON string' diff --git a/plenum/common/messages/message_base.py b/plenum/common/messages/message_base.py index 865b2e9a96..bc67fac0a5 100644 --- a/plenum/common/messages/message_base.py +++ b/plenum/common/messages/message_base.py @@ -2,11 +2,10 @@ import itertools from typing import Mapping - +from collections import OrderedDict from plenum.common.constants import OP_FIELD_NAME from plenum.common.messages.fields import FieldValidator - class MessageValidator(FieldValidator): # the schema has to be an ordered iterable because the message class @@ -20,8 +19,7 @@ def validate(self, dct): def _validate_fields_with_schema(self, dct, schema): if not isinstance(dct, dict): - # TODO raise invalid type exception - self._raise_invalid_fields('', dct, 'wrong type') + self._raise_invalid_type(dct) schema_dct = dict(schema) required_fields = filter(lambda x: not x[1].optional, schema) required_field_names = map(lambda x: x[0], required_fields) @@ -35,6 +33,11 @@ def _validate_fields_with_schema(self, dct, schema): if validation_error: self._raise_invalid_fields(k, v, validation_error) + + def _raise_invalid_type(self, dct): + raise TypeError("validation error: invalid type {}, dict expected" + .format(type(dct))) + def _validate_message(self, dct): return None @@ -58,30 +61,38 @@ class MessageBase(Mapping, MessageValidator): typename = None def __init__(self, *args, **kwargs): - assert not (args and kwargs), '*args, **kwargs cannot be used together' - if args: - input_as_dict = dict(zip(map(itemgetter(0), self.schema), args)) - else: - input_as_dict = kwargs - # remove op field before the validation procedure - input_as_dict.pop(OP_FIELD_NAME, None) + assert not (args and kwargs), \ + '*args, **kwargs cannot be used together' + + if kwargs: + # op field is not required since there is self.typename + kwargs.pop(OP_FIELD_NAME, None) + + argsLen = len(args or kwargs) + assert argsLen == len(self.schema), \ + "number of parameters should be the " \ + "same as a number of fields in schema, but it was {}" \ + .format(argsLen) + + input_as_dict = kwargs if kwargs else self._join_with_schema(args) + self.validate(input_as_dict) - self._fields = [(k, input_as_dict[k]) for k, _ in self.schema if k in input_as_dict] + + self._fields = OrderedDict((name, input_as_dict[name]) for name, _ in self.schema) + + def _join_with_schema(self, args): + return dict(zip(map(itemgetter(0), self.schema), args)) def __getattr__(self, item): - for k, v in self._fields: - if item == k: - return v - raise AttributeError + return self._fields[item] def __getitem__(self, key): + values = list(self._fields.values()) if isinstance(key, slice): - r = range(key.start or 0, min([len(self), key.stop or len(self)]), key.step or 1) - return [self._fields[i][1] for i in r] - elif isinstance(key, int): - return self._fields[key][1] - else: - raise TypeError("Invalid argument type.") + return values[key] + if isinstance(key, int): + return values[key] + raise TypeError("Invalid argument type.") def _asdict(self): """ @@ -94,15 +105,34 @@ def __dict__(self): """ Return a dictionary form. """ - return dict(self._fields + [(OP_FIELD_NAME, self.typename)]) + m = self._fields.copy() + m[OP_FIELD_NAME] = self.typename + m.move_to_end(OP_FIELD_NAME, False) + return m @property def __name__(self): return self.typename def __iter__(self): - for k, v in self._fields: - yield v + return self._fields.values().__iter__() def __len__(self): return len(self._fields) + + def items(self): + return self._fields.items() + + def keys(self): + return self._fields.keys() + + def values(self): + return self._fields.values() + + def __str__(self): + return "{}{}".format(self.typename, dict(self.items())) + + def __eq__(self, other): + if not issubclass(other.__class__, self.__class__): + return False + return self._asdict() == other._asdict() diff --git a/plenum/common/request.py b/plenum/common/request.py index cfbce0e1bf..a11708d85c 100644 --- a/plenum/common/request.py +++ b/plenum/common/request.py @@ -4,7 +4,7 @@ from stp_core.types import Identifier from plenum.common.signing import serializeMsg -from plenum.common.constants import REQDIGEST +from plenum.common.constants import REQDIGEST, REQKEY from plenum.common.types import f, OPERATION, ClientMessageValidator @@ -79,8 +79,15 @@ def key(self): return self.identifier, self.reqId +class ReqKey(NamedTuple(REQKEY, [f.IDENTIFIER, f.REQ_ID])): + pass + + class SafeRequest(Request, ClientMessageValidator): def __init__(self, **kwargs): self.validate(kwargs) super().__init__(**kwargs) + + + diff --git a/plenum/common/stack_manager.py b/plenum/common/stack_manager.py index 67272b5d75..0b8560ccbd 100644 --- a/plenum/common/stack_manager.py +++ b/plenum/common/stack_manager.py @@ -163,7 +163,11 @@ def stackKeysChanged(self, txn, remoteName, nodeOrClientObj): # Removing remote so that the nodestack will attempt to connect rid = self.removeRemote(nodeOrClientObj.nodestack, remoteName) - verkey = txn[VERKEY] + if txn[VERKEY][0] == '~': # abbreviated + verkey = cryptonymToHex(txn[TARGET_NYM]) + cryptonymToHex(txn[VERKEY][1:]) + else: + verkey = cryptonymToHex(txn[VERKEY]) + try: # Override any keys found initRemoteKeys(self.name, remoteName, self.basedirpath, diff --git a/plenum/common/types.py b/plenum/common/types.py index 733e13d1ac..3137eb50e1 100644 --- a/plenum/common/types.py +++ b/plenum/common/types.py @@ -119,20 +119,20 @@ class ClientMessageValidator(MessageValidator): ) -# class Nomination(MessageBase): -# typename = NOMINATE -# -# schema = ( -# (f.NAME.nm, NonEmptyStringField()), -# (f.INST_ID.nm, NonNegativeNumberField()), -# (f.VIEW_NO.nm, NonNegativeNumberField()), -# (f.ORD_SEQ_NO.nm, NonNegativeNumberField()), -# ) -Nomination = TaggedTuple(NOMINATE, [ - f.NAME, - f.INST_ID, - f.VIEW_NO, - f.ORD_SEQ_NO]) +class Nomination(MessageBase): + typename = NOMINATE + + schema = ( + (f.NAME.nm, NonEmptyStringField()), + (f.INST_ID.nm, NonNegativeNumberField()), + (f.VIEW_NO.nm, NonNegativeNumberField()), + (f.ORD_SEQ_NO.nm, NonNegativeNumberField()), + ) +# Nomination = TaggedTuple(NOMINATE, [ +# f.NAME, +# f.INST_ID, +# f.VIEW_NO, +# f.ORD_SEQ_NO]) Batch = TaggedTuple(BATCH, [ @@ -147,37 +147,37 @@ class ClientMessageValidator(MessageValidator): # already seen then it rejects that message -# class Reelection(MessageBase): -# typename = REELECTION -# -# schema = ( -# (f.INST_ID.nm, NonNegativeNumberField()), -# (f.ROUND.nm, NonNegativeNumberField()), -# (f.TIE_AMONG.nm, IterableField(TieAmongField())), -# (f.VIEW_NO.nm, NonNegativeNumberField()), -# ) -Reelection = TaggedTuple(REELECTION, [ - f.INST_ID, - f.ROUND, - f.TIE_AMONG, - f.VIEW_NO]) +class Reelection(MessageBase): + typename = REELECTION + + schema = ( + (f.INST_ID.nm, NonNegativeNumberField()), + (f.ROUND.nm, NonNegativeNumberField()), + (f.TIE_AMONG.nm, IterableField(TieAmongField())), + (f.VIEW_NO.nm, NonNegativeNumberField()), + ) +# Reelection = TaggedTuple(REELECTION, [ +# f.INST_ID, +# f.ROUND, +# f.TIE_AMONG, +# f.VIEW_NO]) # Declaration of a winner -# class Primary(MessageBase): -# typename = PRIMARY -# -# schema = ( -# (f.NAME.nm, NonEmptyStringField()), -# (f.INST_ID.nm, NonNegativeNumberField()), -# (f.VIEW_NO.nm, NonNegativeNumberField()), -# (f.ORD_SEQ_NO.nm, NonNegativeNumberField()), -# ) -Primary = TaggedTuple(PRIMARY, [ - f.NAME, - f.INST_ID, - f.VIEW_NO, - f.ORD_SEQ_NO]) +class Primary(MessageBase): + typename = PRIMARY + + schema = ( + (f.NAME.nm, NonEmptyStringField()), + (f.INST_ID.nm, NonNegativeNumberField()), + (f.VIEW_NO.nm, NonNegativeNumberField()), + (f.ORD_SEQ_NO.nm, NonNegativeNumberField()), + ) +# Primary = TaggedTuple(PRIMARY, [ +# f.NAME, +# f.INST_ID, +# f.VIEW_NO, +# f.ORD_SEQ_NO]) BlacklistMsg = NamedTuple(BLACKLIST, [ f.SUSP_CODE, @@ -202,108 +202,103 @@ class ClientMessageValidator(MessageValidator): ]) -# class Ordered(MessageBase): -# typename = ORDERED -# schema = ( -# (f.INST_ID.nm, NonNegativeNumberField()), -# (f.VIEW_NO.nm, NonNegativeNumberField()), -# (f.REQ_IDR.nm, IterableField(RequestIdentifierField())), -# (f.PP_SEQ_NO.nm, NonNegativeNumberField()), -# (f.PP_TIME.nm, TimestampField()), -# (f.LEDGER_ID.nm, LedgerIdField()), -# (f.STATE_ROOT.nm, MerkleRootField()), -# (f.TXN_ROOT.nm, MerkleRootField()), -# ) -Ordered = NamedTuple(ORDERED, [ - f.INST_ID, - f.VIEW_NO, - f.REQ_IDR, - f.PP_SEQ_NO, - f.PP_TIME, - f.LEDGER_ID, - f.STATE_ROOT, - f.TXN_ROOT, - ]) +class Ordered(MessageBase): + typename = ORDERED + schema = ( + (f.INST_ID.nm, NonNegativeNumberField()), + (f.VIEW_NO.nm, NonNegativeNumberField()), + (f.REQ_IDR.nm, IterableField(RequestIdentifierField())), + (f.PP_SEQ_NO.nm, NonNegativeNumberField()), + (f.PP_TIME.nm, TimestampField()), + (f.LEDGER_ID.nm, LedgerIdField()), + (f.STATE_ROOT.nm, HexField(length=64, nullable=True)), + (f.TXN_ROOT.nm, HexField(length=64, nullable=True)), + ) +# Ordered = NamedTuple(ORDERED, [ +# f.INST_ID, +# f.VIEW_NO, +# f.REQ_IDR, +# f.PP_SEQ_NO, +# f.PP_TIME, +# f.LEDGER_ID, +# f.STATE_ROOT, +# f.TXN_ROOT, +# ]) # σc, i>~μi # s = client sequence number (comes from Aardvark paper) -# Propagate needs the name of the sender client since every node needs to know -# who sent the request to send the reply. If all clients had name same as -# their identifier same as client name (stack name, the name which RAET knows) - - -# class Propagate(MessageBase): -# typename = PROPAGATE -# schema = ( -# (f.REQUEST.nm, ClientMessageValidator()), -# (f.SENDER_CLIENT.nm, NonEmptyStringField()), -# ) -Propagate = TaggedTuple(PROPAGATE, [ - f.REQUEST, - f.SENDER_CLIENT]) - - -# class PrePrepare(MessageBase): -# typename = PREPREPARE -# schema = ( -# (f.INST_ID.nm, NonNegativeNumberField()), -# (f.VIEW_NO.nm, NonNegativeNumberField()), -# (f.PP_SEQ_NO.nm, NonNegativeNumberField()), -# (f.PP_TIME.nm, TimestampField()), -# (f.REQ_IDR.nm, IterableField(RequestIdentifierField())), -# (f.DISCARDED.nm, NonNegativeNumberField()), -# (f.DIGEST.nm, NonEmptyStringField()), -# (f.LEDGER_ID.nm, LedgerIdField()), -# (f.STATE_ROOT.nm, MerkleRootField()), -# (f.TXN_ROOT.nm, MerkleRootField()), -# ) -PrePrepare = TaggedTuple(PREPREPARE, [ - f.INST_ID, - f.VIEW_NO, - f.PP_SEQ_NO, - f.PP_TIME, - f.REQ_IDR, - f.DISCARDED, - f.DIGEST, - f.LEDGER_ID, - f.STATE_ROOT, - f.TXN_ROOT, - ]) - - -# class Prepare(MessageBase): -# typename = PREPARE -# schema = ( -# (f.INST_ID.nm, NonNegativeNumberField()), -# (f.VIEW_NO.nm, NonNegativeNumberField()), -# (f.PP_SEQ_NO.nm, NonNegativeNumberField()), -# (f.DIGEST.nm, NonEmptyStringField()), -# (f.STATE_ROOT.nm, HexField(length=64, nullable=True)), -# (f.TXN_ROOT.nm, HexField(length=64, nullable=True)), -# ) -Prepare = TaggedTuple(PREPARE, [ - f.INST_ID, - f.VIEW_NO, - f.PP_SEQ_NO, - f.DIGEST, - f.STATE_ROOT, - f.TXN_ROOT, - ]) +class Propagate(MessageBase): + typename = PROPAGATE + schema = ( + (f.REQUEST.nm, ClientMessageValidator()), + (f.SENDER_CLIENT.nm, NonEmptyStringField()), + ) +# Propagate = TaggedTuple(PROPAGATE, [ +# f.REQUEST, +# f.SENDER_CLIENT]) -# class Commit(MessageBase): -# typename = COMMIT -# schema = ( -# (f.INST_ID.nm, NonNegativeNumberField()), -# (f.VIEW_NO.nm, NonNegativeNumberField()), -# (f.PP_SEQ_NO.nm, NonNegativeNumberField()), -# ) -Commit = TaggedTuple(COMMIT, [ - f.INST_ID, - f.VIEW_NO, - f.PP_SEQ_NO - ]) +class PrePrepare(MessageBase): + typename = PREPREPARE + schema = ( + (f.INST_ID.nm, NonNegativeNumberField()), + (f.VIEW_NO.nm, NonNegativeNumberField()), + (f.PP_SEQ_NO.nm, NonNegativeNumberField()), + (f.PP_TIME.nm, TimestampField()), + (f.REQ_IDR.nm, IterableField(RequestIdentifierField())), + (f.DISCARDED.nm, NonNegativeNumberField()), + (f.DIGEST.nm, NonEmptyStringField()), + (f.LEDGER_ID.nm, LedgerIdField()), + (f.STATE_ROOT.nm, HexField(length=64, nullable=True)), + (f.TXN_ROOT.nm, HexField(length=64, nullable=True)), + ) +# PrePrepare = TaggedTuple(PREPREPARE, [ +# f.INST_ID, +# f.VIEW_NO, +# f.PP_SEQ_NO, +# f.PP_TIME, +# f.REQ_IDR, +# f.DISCARDED, +# f.DIGEST, +# f.LEDGER_ID, +# f.STATE_ROOT, +# f.TXN_ROOT, +# ]) + + +class Prepare(MessageBase): + typename = PREPARE + schema = ( + (f.INST_ID.nm, NonNegativeNumberField()), + (f.VIEW_NO.nm, NonNegativeNumberField()), + (f.PP_SEQ_NO.nm, NonNegativeNumberField()), + (f.DIGEST.nm, NonEmptyStringField()), + (f.STATE_ROOT.nm, HexField(length=64, nullable=True)), + (f.TXN_ROOT.nm, HexField(length=64, nullable=True)), + ) +# Prepare = TaggedTuple(PREPARE, [ +# f.INST_ID, +# f.VIEW_NO, +# f.PP_SEQ_NO, +# f.DIGEST, +# f.STATE_ROOT, +# f.TXN_ROOT, +# ]) + + +class Commit(MessageBase): + typename = COMMIT + schema = ( + (f.INST_ID.nm, NonNegativeNumberField()), + (f.VIEW_NO.nm, NonNegativeNumberField()), + (f.PP_SEQ_NO.nm, NonNegativeNumberField()), + ) +# Commit = TaggedTuple(COMMIT, [ +# f.INST_ID, +# f.VIEW_NO, +# f.PP_SEQ_NO +# ]) # class Checkpoint(MessageBase): # typename = CHECKPOINT @@ -344,53 +339,52 @@ class ClientMessageValidator(MessageValidator): Reply = TaggedTuple(REPLY, [f.RESULT]) -# class InstanceChange(MessageBase): -# typename = INSTANCE_CHANGE -# schema = ( -# (f.VIEW_NO.nm, NonNegativeNumberField()), -# (f.REASON.nm, NonNegativeNumberField()), -# (f.ORD_SEQ_NOS.nm, IterableField(NonNegativeNumberField())), -# ) -InstanceChange = TaggedTuple(INSTANCE_CHANGE, [ - f.VIEW_NO, - f.REASON, - f.ORD_SEQ_NOS, -]) +class InstanceChange(MessageBase): + typename = INSTANCE_CHANGE + schema = ( + (f.VIEW_NO.nm, NonNegativeNumberField()), + (f.REASON.nm, NonNegativeNumberField()) + ) +# InstanceChange = TaggedTuple(INSTANCE_CHANGE, [ +# f.VIEW_NO, +# f.REASON +# ]) -# class LedgerStatus(MessageBase): -# typename = LEDGER_STATUS -# schema = ( -# (f.LEDGER_ID.nm, LedgerIdField()), -# (f.TXN_SEQ_NO.nm, NonNegativeNumberField()), -# (f.MERKLE_ROOT.nm, MerkleRootField()), -# ) -LedgerStatus = TaggedTuple(LEDGER_STATUS, [ - f.LEDGER_ID, - f.TXN_SEQ_NO, - f.MERKLE_ROOT]) +class LedgerStatus(MessageBase): + typename = LEDGER_STATUS + schema = ( + (f.LEDGER_ID.nm, LedgerIdField()), + (f.TXN_SEQ_NO.nm, NonNegativeNumberField()), + (f.MERKLE_ROOT.nm, MerkleRootField()), + ) +# LedgerStatus = TaggedTuple(LEDGER_STATUS, [ +# f.LEDGER_ID, +# f.TXN_SEQ_NO, +# f.MERKLE_ROOT]) -# class ConsistencyProof(MessageBase): -# typename = CONSISTENCY_PROOF -# schema = ( -# (f.LEDGER_ID.nm, LedgerIdField()), -# (f.SEQ_NO_START.nm, NonNegativeNumberField()), -# (f.SEQ_NO_END.nm, NonNegativeNumberField()), -# (f.PP_SEQ_NO.nm, NonNegativeNumberField()), -# (f.OLD_MERKLE_ROOT.nm, MerkleRootField()), -# (f.NEW_MERKLE_ROOT.nm, MerkleRootField()), -# (f.HASHES.nm, IterableField(NonEmptyStringField())), -# ) -ConsistencyProof = TaggedTuple(CONSISTENCY_PROOF, [ - f.LEDGER_ID, - f.SEQ_NO_START, - f.SEQ_NO_END, - f.PP_SEQ_NO, - f.OLD_MERKLE_ROOT, - f.NEW_MERKLE_ROOT, - f.HASHES -]) + +class ConsistencyProof(MessageBase): + typename = CONSISTENCY_PROOF + schema = ( + (f.LEDGER_ID.nm, LedgerIdField()), + (f.SEQ_NO_START.nm, NonNegativeNumberField()), + (f.SEQ_NO_END.nm, NonNegativeNumberField()), + (f.PP_SEQ_NO.nm, NonNegativeNumberField()), + (f.OLD_MERKLE_ROOT.nm, MerkleRootField()), + (f.NEW_MERKLE_ROOT.nm, MerkleRootField()), + (f.HASHES.nm, IterableField(NonEmptyStringField())), + ) +# ConsistencyProof = TaggedTuple(CONSISTENCY_PROOF, [ +# f.LEDGER_ID, +# f.SEQ_NO_START, +# f.SEQ_NO_END, +# f.PP_SEQ_NO, +# f.OLD_MERKLE_ROOT, +# f.NEW_MERKLE_ROOT, +# f.HASHES +# ]) # TODO: Catchup is not a good name, replace it with `sync` or something which # is familiar diff --git a/plenum/common/util.py b/plenum/common/util.py index 84368a8233..1c4eceb748 100644 --- a/plenum/common/util.py +++ b/plenum/common/util.py @@ -21,6 +21,7 @@ import base58 import libnacl.secret +from libnacl import randombytes_uniform import psutil from jsonpickle import encode, decode from six import iteritems, string_types @@ -40,19 +41,26 @@ def randomString(size: int = 20, - chars: str = string.ascii_letters + string.digits) -> str: + chars = string.ascii_letters + string.digits) -> str: """ - Generate a random string of the specified size. - - Ensure that the size is less than the length of chars as this function uses random.choice - which uses random sampling without replacement. + Generate a random string of the specified size :param size: size of the random string to generate :param chars: the set of characters to use to generate the random string. Uses alphanumerics by default. :return: the random string generated """ - assert size < len(chars), 'size should be less than the number of characters' - return ''.join(random.sample(chars, size)) + + if not hasattr(chars, "__getitem__"): + # choice does not work with non indexed containers + chars = list(chars) + + def randomChar(): + # DONOT use random.choice its as PRNG not secure enough for our needs + # return random.choice(chars) + rn = randombytes_uniform(len(chars)) + return chars[rn] + + return ''.join(randomChar() for _ in range(size)) def mostCommonElement(elements: Iterable[T]) -> T: @@ -167,7 +175,7 @@ def getMaxFailures(nodeCount: int) -> int: return 0 -def getQuorum(nodeCount: int = None, f: int = None) -> int: +def get_strong_quorum(nodeCount: int = None, f: int = None) -> int: r""" Return the minimum number of nodes where the number of correct nodes is greater than the number of faulty nodes. @@ -182,6 +190,13 @@ def getQuorum(nodeCount: int = None, f: int = None) -> int: return 2 * f + 1 +def get_weak_quorum(nodeCount: int = None, f: int = None) -> int: + if nodeCount is not None: + f = getMaxFailures(nodeCount) + if f is not None: + return f + 1 + + def getNoInstances(nodeCount: int) -> int: """ Return the number of protocol instances which is equal to f + 1. See @@ -498,22 +513,27 @@ def createDirIfNotExists(dir): os.makedirs(dir) -def is_valid_port(port): - return port.isdigit() and int(port) in range(1, 65536) +def is_network_port_valid(port): + return port.isdigit() and 0 < int(port) < 65536 -def check_endpoint_valid(endpoint, required: bool=True): - if not endpoint: - if required: - raise MissingEndpoint() - else: - return - ip, port = endpoint.split(':') +def is_network_ip_address_valid(ip_address): try: - ipaddress.ip_address(ip) - except Exception as exc: - raise InvalidEndpointIpAddress(endpoint) from exc - if not is_valid_port(port): + ipaddress.ip_address(ip_address) + except ValueError: + return False + else: + return True + + +def check_endpoint_valid(endpoint): + if ':' not in endpoint: + # TODO: replace with more suitable exception + raise InvalidEndpointIpAddress(endpoint) + ip, port = endpoint.split(':') + if not is_network_ip_address_valid(ip): + raise InvalidEndpointIpAddress(endpoint) + if not is_network_port_valid(port): raise InvalidEndpointPort(endpoint) @@ -527,6 +547,7 @@ def getFormattedErrorMsg(msg): errorLine = "-" * msgHalfLength + "ERROR" + "-" * msgHalfLength return "\n\n" + errorLine + "\n " + msg + "\n" + errorLine + "\n" + def normalizedWalletFileName(walletName): return "{}.{}".format(walletName.lower(), WALLET_FILE_EXTENSION) @@ -559,3 +580,12 @@ def getLastModifiedTime(file): newest = max(glob.iglob('{}/{}'.format(dir, filePattern)), key=getLastModifiedTime) return basename(newest) + + +def pop_keys(mapping: Dict, cond: Callable): + rem = [] + for k in mapping: + if cond(k): + rem.append(k) + for i in rem: + mapping.pop(i) diff --git a/plenum/config.py b/plenum/config.py index cb816e44eb..33f40f21cd 100644 --- a/plenum/config.py +++ b/plenum/config.py @@ -138,14 +138,6 @@ # Expected time for one stack to get connected to another ExpectedConnectTime = 3.3 if sys.platform == 'win32' else 2 - -# After ordering every `CHK_FREQ` requests, replica sends a CHECKPOINT -CHK_FREQ = 100000 - -# Difference between low water mark and high water mark -LOG_SIZE = 3*CHK_FREQ - - # Since the ledger is stored in a flat file, this makes the ledger do # an fsync on every write. Making it True can significantly slow # down writes as shown in a test `test_file_store_perf.py` in the ledger @@ -167,11 +159,8 @@ # Max batch size for 3 phase commit Max3PCBatchSize = 100 # Max time to wait before creating a batch for 3 phase commit -Max3PCBatchWait = 1 +Max3PCBatchWait = .001 -# Maximum lifespan for a batch, this needs to be changed if -# `Max3PCBatchSize` is changed -ThreePCBatchTimeout = 25 # Each node keeps a map of PrePrepare sequence numbers and the corresponding # txn seqnos that came out of it. Helps in servicing Consistency Proof Requests @@ -186,7 +175,14 @@ MaxStateProofTime = 3 +# After ordering every `CHK_FREQ` batches, replica sends a CHECKPOINT +CHK_FREQ = 10000 + +# Difference between low water mark and high water mark +LOG_SIZE = 3*CHK_FREQ + + CLIENT_REQACK_TIMEOUT = 5 -CLIENT_REPLY_TIMEOUT = Max3PCBatchWait + 10 +CLIENT_REPLY_TIMEOUT = 15 CLIENT_MAX_RETRY_ACK = 5 CLIENT_MAX_RETRY_REPLY = 5 diff --git a/plenum/server/models.py b/plenum/server/models.py index cfc3f3d6bf..8d3582b431 100644 --- a/plenum/server/models.py +++ b/plenum/server/models.py @@ -128,7 +128,7 @@ class InstanceChanges(TrackedMsgs): """ def newVoteMsg(self, msg): - return InsChgVotes(msg.viewNo, set(), msg.ordSeqNos) + return InsChgVotes(msg.viewNo, set()) def getKey(self, msg): return msg if isinstance(msg, int) else msg.viewNo diff --git a/plenum/server/monitor.py b/plenum/server/monitor.py index 712851f569..bde5bd97b8 100644 --- a/plenum/server/monitor.py +++ b/plenum/server/monitor.py @@ -126,6 +126,9 @@ def __init__(self, name: str, Delta: float, Lambda: float, Omega: float, self.startRepeating(self.checkPerformance, config.notifierEventTriggeringConfig['clusterThroughputSpike']['freq']) + if 'disable_view_change' in config.unsafe: + self.isMasterDegraded = lambda: False + def __repr__(self): return self.name diff --git a/plenum/server/node.py b/plenum/server/node.py index 6d5e12d53a..c06d8b2d66 100644 --- a/plenum/server/node.py +++ b/plenum/server/node.py @@ -17,6 +17,8 @@ from ledger.stores.hash_store import HashStore from ledger.stores.memory_hash_store import MemoryHashStore from ledger.util import F +from orderedset._orderedset import OrderedSet + from plenum.client.wallet import Wallet from plenum.common.config_util import getConfig from plenum.common.constants import TXN_TYPE, TXN_TIME, POOL_TXN_TYPES, \ @@ -50,7 +52,7 @@ PLUGIN_TYPE_VERIFICATION, PLUGIN_TYPE_PROCESSING, PoolLedgerTxns, \ ConsProofRequest, ElectionType, ThreePhaseType, Checkpoint, ThreePCState, \ Reject -from plenum.common.util import friendlyEx, getMaxFailures +from plenum.common.util import friendlyEx, getMaxFailures, pop_keys from plenum.common.verifier import DidVerifier from plenum.persistence.leveldb_hash_store import LevelDbHashStore from plenum.persistence.req_id_to_txn import ReqIdrToTxn @@ -143,15 +145,10 @@ def __init__(self, Motor.__init__(self) self.hashStore = self.getHashStore(self.name) + self.primaryStorage = storage or self.getPrimaryStorage() - self.ledgerManager = self.getLedgerManager() self.states = {} # type: Dict[int, State] - self.ledgerManager.addLedger(DOMAIN_LEDGER_ID, - self.domainLedger, - preCatchupStartClbk=self.preDomainLedgerCatchUp, - postCatchupCompleteClbk=self.postDomainLedgerCaughtUp, - postTxnAddedToLedgerClbk=self.postTxnFromCatchupAddedToLedger) self.states[DOMAIN_LEDGER_ID] = self.loadDomainState() self.reqHandler = self.getDomainReqHandler() self.initDomainState() @@ -198,28 +195,59 @@ def __init__(self, self.setF() - self.replicas = [] # type: List[replica.Replica] - - self.instanceChanges = InstanceChanges() - - self.viewNo = 0 # type: int - - self.rank = self.getRank(self.name, self.nodeReg) + self.clientBlacklister = SimpleBlacklister( + self.name + CLIENT_BLACKLISTER_SUFFIX) # type: Blacklister - self.elector = None # type: PrimaryDecider + self.nodeBlacklister = SimpleBlacklister( + self.name + NODE_BLACKLISTER_SUFFIX) # type: Blacklister - self.forwardedRequests = set() # type: Set[Tuple[(str, int)]] + self.nodeInfo = { + 'data': {} + } self.instances = Instances() + # QUESTION: Why does the monitor need blacklister? + self.monitor = Monitor(self.name, + Delta=self.config.DELTA, + Lambda=self.config.LAMBDA, + Omega=self.config.OMEGA, + instances=self.instances, + nodestack=self.nodestack, + blacklister=self.nodeBlacklister, + nodeInfo=self.nodeInfo, + notifierEventTriggeringConfig=self. + config.notifierEventTriggeringConfig, + pluginPaths=pluginPaths) + self.replicas = [] # type: List[replica.Replica] # Requests that are to be given to the replicas by the node. Each # element of the list is a deque for the replica with number equal to # its index in the list and each element of the deque is a named tuple self.msgsToReplicas = [] # type: List[deque] + # Any messages that are intended for protocol instances not created. + # Helps in cases where a new protocol instance have been added by a + # majority of nodes due to joining of a new node, but some slow nodes + # are not aware of it. Key is instance id and value is a deque + self.msgsForFutureReplicas = {} + + self.adjustReplicas() + + self.instanceChanges = InstanceChanges() + + self.viewNo = 0 # type: int + + self.rank = self.getRank(self.name, self.nodeReg) + + self.elector = None # type: PrimaryDecider # Requests that are to be given to the elector by the node self.msgsToElector = deque() + self.ledgerManager = self.getLedgerManager() + self.init_ledger_manager() + if self.poolLedger: + self.states[POOL_LEDGER_ID] = self.poolManager.state + nodeRoutes = [(Propagate, self.processPropagate), (InstanceChange, self.processInstanceChange)] @@ -246,28 +274,6 @@ def __init__(self, self.initInsChngThrottling() - self.clientBlacklister = SimpleBlacklister( - self.name + CLIENT_BLACKLISTER_SUFFIX) # type: Blacklister - - self.nodeBlacklister = SimpleBlacklister( - self.name + NODE_BLACKLISTER_SUFFIX) # type: Blacklister - - self.nodeInfo = { - 'data': {} - } - - self.monitor = Monitor(self.name, - Delta=self.config.DELTA, - Lambda=self.config.LAMBDA, - Omega=self.config.OMEGA, - instances=self.instances, - nodestack=self.nodestack, - blacklister=self.nodeBlacklister, - nodeInfo=self.nodeInfo, - notifierEventTriggeringConfig=self. - config.notifierEventTriggeringConfig, - pluginPaths=pluginPaths) - # BE CAREFUL HERE # This controls which message types are excluded from signature # verification. These are still subject to RAET's signature verification @@ -287,13 +293,6 @@ def __init__(self, # case the node crashes before sending the reply to the client self.requestSender = {} # Dict[Tuple[str, int], str] - # TODO: this and tons of akin stuff should be exterminated - if isinstance(self.poolManager, TxnPoolManager): - self.ledgerManager.addLedger(POOL_LEDGER_ID, self.poolLedger, - postCatchupCompleteClbk=self.postPoolLedgerCaughtUp, - postTxnAddedToLedgerClbk=self.postTxnFromCatchupAddedToLedger) - self.states[POOL_LEDGER_ID] = self.poolManager.state - nodeRoutes.extend([ (LedgerStatus, self.ledgerManager.processLedgerStatus), (ConsistencyProof, self.ledgerManager.processConsistencyProof), @@ -319,19 +318,15 @@ def __init__(self, # and received replies while catching up. self.reqsFromCatchupReplies = set() - # Any messages that are intended for protocol instances not created. - # Helps in cases where a new protocol instance have been added by a - # majority of nodes due to joining of a new node, but some slow nodes - # are not aware of it. Key is instance id and value is a deque - # TODO: Do GC for `msgsForFutureReplicas` - self.msgsForFutureReplicas = {} - # Any messages that are intended for view numbers higher than the # current view. - # TODO: Do GC for `msgsForFutureViews` self.msgsForFutureViews = {} - self.adjustReplicas() + self._primary_replica_no = None + + # Need to keep track of the time when lost connection with primary, + # help in voting for/against a view change. + self.lost_primary_at = None self._primary_replica_no = None @@ -352,6 +347,7 @@ def __init__(self, # Stores the last txn seqNo that was executed for a ledger in a batch self.batchToSeqNos = OrderedDict() # type: OrderedDict[int, int] + self.view_change_in_progress = False @property def id(self): @@ -434,6 +430,25 @@ def stateRootHash(self, ledgerId, isCommitted=True): raise RuntimeError('State with id {} does not exist') return state.committedHeadHash if isCommitted else state.headHash + @property + def ledger_ids(self): + return [POOL_LEDGER_ID, DOMAIN_LEDGER_ID] + + def getLedgerRootHash(self, ledgerId, isCommitted=True): + ledgerInfo = self.ledgerManager.getLedgerInfoByType(ledgerId) + if not ledgerInfo: + raise RuntimeError('Ledger with id {} does not exist') + ledger = ledgerInfo.ledger + if isCommitted: + return ledger.root_hash + return ledger.uncommittedRootHash or ledger.root_hash + + def stateRootHash(self, ledgerId, isCommitted=True): + state = self.states.get(ledgerId) + if not state: + raise RuntimeError('State with id {} does not exist') + return state.committedHeadHash if isCommitted else state.headHash + @property def isParticipating(self): return self.mode == Mode.participating @@ -490,6 +505,27 @@ def getLedgerManager(self) -> LedgerManager: return LedgerManager(self, ownedByNode=True, postAllLedgersCaughtUp=self.allLedgersCaughtUp) + def init_ledger_manager(self): + # TODO: this and tons of akin stuff should be exterminated + self.ledgerManager.addLedger(DOMAIN_LEDGER_ID, + self.domainLedger, + preCatchupStartClbk=self.preDomainLedgerCatchUp, + postCatchupCompleteClbk=self.postDomainLedgerCaughtUp, + postTxnAddedToLedgerClbk=self.postTxnFromCatchupAddedToLedger) + self.on_new_ledger_added(DOMAIN_LEDGER_ID) + if isinstance(self.poolManager, TxnPoolManager): + self.ledgerManager.addLedger(POOL_LEDGER_ID, self.poolLedger, + postCatchupCompleteClbk=self.postPoolLedgerCaughtUp, + postTxnAddedToLedgerClbk=self.postTxnFromCatchupAddedToLedger) + self.on_new_ledger_added(POOL_LEDGER_ID) + + def on_new_ledger_added(self, ledger_id): + for r in self.replicas: + # If a ledger was added after a replica was created, add a queue + # in the ledger to the replica + if ledger_id not in r.requestQueues: + r.requestQueues[ledger_id] = OrderedSet() + def loadDomainState(self): return PruningState( initKeyValueStorage( @@ -721,7 +757,6 @@ def onConnsChanged(self, joined: Set[str], left: Set[str]): if self.isGoing(): if self.connectedNodeCount == self.totalNodes: self.status = Status.started - # self.stopKeySharing() elif self.connectedNodeCount >= self.minimumNodes: self.status = Status.started_hungry else: @@ -769,19 +804,16 @@ def send_ledger_status_to_newly_connected_node(self, node_name): def newNodeJoined(self, txn): self.setF() + new_replicas = self.adjustReplicas() if self.adjustReplicas() > 0: - self.decidePrimaries() - # TODO: Should tell the client after the new node has synced up its - # ledgers - # self.sendPoolInfoToClients(txn) + while new_replicas > 0: + self.elector.start_election_for_instance( + self.replicas[-new_replicas].instId) + new_replicas -= 1 def nodeLeft(self, txn): self.setF() - if self.adjustReplicas(): - self.decidePrimaries() - # TODO: Should tell the client after the new node has synced up its - # ledgers - # self.sendPoolInfoToClients(txn) + self.adjustReplicas() def sendPoolInfoToClients(self, txn): logger.debug("{} sending new node info {} to all clients".format(self, @@ -831,6 +863,9 @@ def checkInstances(self) -> None: self._schedule(self.decidePrimaries) def adjustReplicas(self): + """ + Add or remove replicas depending on `f` + """ newReplicas = 0 while len(self.replicas) < self.requiredNumberOfInstances: self.addReplica() @@ -841,6 +876,7 @@ def adjustReplicas(self): self.removeReplica() newReplicas -= 1 + pop_keys(self.msgsForFutureReplicas, lambda x: x < len(self.replicas)) return newReplicas def _dispatch_stashed_msg(self, msg, frm): @@ -1015,7 +1051,7 @@ def serviceElectorOutBox(self, limit: int=None) -> int: while self.elector.outBox and (not limit or msgCount < limit): msgCount += 1 msg = self.elector.outBox.popleft() - if isinstance(msg, (Nomination, Primary, Reelection)): + if isinstance(msg, ElectionType): self.send(msg) elif isinstance(msg, BlacklistMsg): nodeName = getattr(msg, f.NODE_NAME.nm) @@ -1066,10 +1102,17 @@ def primaryReplicaNo(self) -> Optional[int]: @property def master_primary(self) -> Optional[str]: + """ + Return the name of the primary node of the master instance + """ if self.replicas[0].primaryName: return self.replicas[0].getNodeName(self.replicas[0].primaryName) return None + @staticmethod + def is_valid_view_or_inst(n): + return not(n is None or not isinstance(n, int) or n < 0) + def msgHasAcceptableInstId(self, msg, frm) -> bool: """ Return true if the instance id of message corresponds to a correct @@ -1079,7 +1122,7 @@ def msgHasAcceptableInstId(self, msg, frm) -> bool: :return: """ instId = getattr(msg, f.INST_ID.nm, None) - if instId is None or not isinstance(instId, int) or instId < 0: + if not self.is_valid_view_or_inst(instId): return False if instId >= len(self.msgsToReplicas): if instId not in self.msgsForFutureReplicas: @@ -1098,9 +1141,9 @@ def msgHasAcceptableViewNo(self, msg, frm) -> bool: :return: """ viewNo = getattr(msg, f.VIEW_NO.nm, None) - if viewNo is None or not isinstance(viewNo, int) or viewNo < 0: + if not self.is_valid_view_or_inst(viewNo): return False - if viewNo < self.viewNo: + if self.viewNo - viewNo > 1: self.discard(msg, "un-acceptable viewNo {}" .format(viewNo), logMethod=logger.info) elif viewNo > self.viewNo: @@ -1180,7 +1223,7 @@ def validateNodeMsg(self, wrappedMsg): try: cMsg = cls(**msg) except Exception as ex: - raise InvalidNodeMsg from ex + raise InvalidNodeMsg(str(ex)) try: self.verifySignature(cMsg) except BaseExc as ex: @@ -1424,6 +1467,11 @@ def postRecvTxnFromCatchup(self, ledgerId: int, txn: Any): return rh def allLedgersCaughtUp(self): + if self.ledgerManager.lastCaughtUpPpSeqNo > 0: + # TODO: currently we use the same ppSeqNo for all instances + for replica in self.replicas: + replica.caught_up_till_pp_seq_no(self.ledgerManager.lastCaughtUpPpSeqNo) + self.mode = Mode.participating self.processStashedOrderedReqs() # TODO: next line not needed @@ -1578,7 +1626,7 @@ def isProcessingReq(self, identifier, reqId) -> bool: def doneProcessingReq(self, identifier, reqId): self.requestSender.pop((identifier, reqId)) - def processOrdered(self, ordered: Ordered, retryNo: int = 0): + def processOrdered(self, ordered: Ordered): """ Process and orderedRequest. @@ -1591,39 +1639,32 @@ def processOrdered(self, ordered: Ordered, retryNo: int = 0): :return: True if successful, None otherwise """ - instId, viewNo, reqIdrs, ppSeqNo, ppTime, ledgerId, stateRoot, txnRoot \ - = tuple(ordered) - - self.monitor.requestOrdered(reqIdrs, - instId, - byMaster=(instId == self.instances.masterId)) + inst_id, view_no, req_idrs, pp_seq_no, pp_time, ledger_id, \ + state_root, txn_root = tuple(ordered) # Only the request ordered by master protocol instance are executed by # the client - if instId == self.instances.masterId: - reqs = [self.requests[i, r].request for (i, r) in reqIdrs - if (i, r) in self.requests] - if len(reqs) == len(reqIdrs): - logger.debug("{} executing Ordered batch {} of {} requests". - format(self.name, ppSeqNo, len(reqIdrs))) - self.executeBatch(ppSeqNo, ppTime, reqs, ledgerId, stateRoot, - txnRoot) - # If the client request hasn't reached the node but corresponding - # PROPAGATE, PRE-PREPARE, PREPARE and COMMIT request did, - # then retry 3 times - elif retryNo < 3: - retryNo += 1 - asyncio.sleep(random.randint(2, 4)) - self.processOrdered(ordered, retryNo) - logger.debug('{} retrying executing ordered client requests'. - format(self.name)) + r = None + if inst_id == self.instances.masterId: + reqs = [self.requests[i, r].finalised for (i, r) in req_idrs + if (i, r) in self.requests and self.requests[i, r].finalised] + if len(reqs) == len(req_idrs): + logger.debug("{} executing Ordered batch {} {} of {} requests". + format(self.name, view_no, pp_seq_no, len(req_idrs))) + self.executeBatch(pp_seq_no, pp_time, reqs, ledger_id, state_root, + txn_root) + r = True else: - logger.warning('{} not retrying processing Ordered any more {} ' - 'times'.format(self, retryNo)) - return True + logger.warning('{} did not find {} finalized requests, but ' + 'still ordered'.format(self, len(req_idrs) - + len(reqs))) + return None else: logger.trace("{} got ordered requests from backup replica {}". - format(self, instId)) + format(self, inst_id)) + r = False + self.monitor.requestOrdered(req_idrs, inst_id, byMaster=r) + return r def processEscalatedException(self, ex): """ @@ -1641,7 +1682,7 @@ def processInstanceChange(self, instChg: InstanceChange, frm: str) -> None: :param instChg: the instance change request :param frm: the name of the node that sent this `msg` """ - logger.debug("Node {} received instance change request: {} from {}". + logger.debug("{} received instance change request: {} from {}". format(self, instChg, frm)) # TODO: add sender to blacklist? @@ -1658,9 +1699,11 @@ def processInstanceChange(self, instChg: InstanceChange, frm: str) -> None: # only when found master to be degraded. if quorum of view changes # found then change view even if master not degraded if not self.instanceChanges.hasInstChngFrom(instChg.viewNo, frm): - self.instanceChanges.addVote(instChg, frm) + self._record_inst_change_msg(instChg, frm) - if self.monitor.isMasterDegraded(): + if self.monitor.isMasterDegraded() and not \ + self.instanceChanges.hasInstChngFrom(instChg.viewNo, + self.name): logger.info( "{} found master degraded after receiving instance change " "message from {}".format(self, frm)) @@ -1668,10 +1711,20 @@ def processInstanceChange(self, instChg: InstanceChange, frm: str) -> None: else: logger.debug( "{} received instance change message {} but did not " - "find the master to be slow".format(self, instChg)) + "find the master to be slow or has already sent an instance" + " change message".format(self, instChg)) - if not self.do_view_change_if_possible(instChg.viewNo): - logger.trace("{} cannot initiate a view change".format(self)) + def do_view_change_if_possible(self, view_no): + # TODO: Need to handle skewed distributions which can arise due to + # malicious nodes sending messages early on + r, msg = self.canViewChange(view_no) + if r: + logger.info("{} initiating a view change to {} from {}". + format(self, view_no, self.viewNo)) + self.startViewChange(view_no) + else: + logger.debug(msg) + return r def do_view_change_if_possible(self, view_no): if self.canViewChange(view_no): @@ -1686,7 +1739,7 @@ def checkPerformance(self): Check if master instance is slow and send an instance change request. :returns True if master performance is OK, otherwise False """ - logger.debug("{} checking its performance".format(self)) + logger.trace("{} checking its performance".format(self)) # Move ahead only if the node has synchronized its state with other # nodes @@ -1697,7 +1750,7 @@ def checkPerformance(self): self.sendNodeRequestSpike() if self.monitor.isMasterDegraded(): self.sendInstanceChange(self.viewNo+1) - logger.debug('{} sent view change performance degraded ' + logger.debug('{} sent view change since performance degraded ' 'of master instance'.format(self)) self.do_view_change_if_possible(self.viewNo+1) return False @@ -1727,8 +1780,13 @@ def sendNodeRequestSpike(self): ) def _create_instance_change_msg(self, view_no, suspicion_code): - return InstanceChange(view_no, suspicion_code, - [r.lastOrderedPPSeqNo for r in self.replicas]) + return InstanceChange(view_no, suspicion_code) + + def _record_inst_change_msg(self, msg, frm): + view_no = msg.viewNo + self.instanceChanges.addVote(msg, frm) + if msg.viewNo > self.viewNo: + self.do_view_change_if_possible(view_no) def sendInstanceChange(self, view_no: int, suspicion=Suspicions.PRIMARY_DEGRADED): @@ -1752,7 +1810,7 @@ def sendInstanceChange(self, view_no: int, format(self, self.monitor.prettymetrics)) msg = self._create_instance_change_msg(view_no, suspicion.code) self.send(msg) - self.instanceChanges.addVote(msg, self.name) + self._record_inst_change_msg(msg, self.name) else: logger.debug("{} cannot send instance change sooner then {} seconds" .format(self, cooldown)) @@ -1773,7 +1831,8 @@ def quorum(self) -> int: def primary_found(self): # If the node has primary replica of master instance self.monitor.hasMasterPrimary = self.primaryReplicaNo == 0 - self.process_reqs_stashed_for_primary() + if self.view_change_in_progress and self.all_instances_have_primary: + self.on_view_change_complete(self.viewNo) @property def all_instances_have_primary(self): @@ -1784,8 +1843,13 @@ def canViewChange(self, proposedViewNo: int) -> (bool, str): Return whether there's quorum for view change for the proposed view number and its view is less than or equal to the proposed view """ - return self.instanceChanges.hasQuorum(proposedViewNo, self.f) and \ - self.viewNo < proposedViewNo + msg = None + if not self.instanceChanges.hasQuorum(proposedViewNo, self.f): + msg = '{} has no quorum for view {}'.format(self, proposedViewNo) + elif not proposedViewNo > self.viewNo: + msg = '{} is in higher view more than {}'.format(self, proposedViewNo) + + return not bool(msg), msg def propose_view_change(self): # Sends instance change message when primary has been @@ -1809,8 +1873,6 @@ def lost_master_primary(self): """ self.lost_primary_at = time.perf_counter() - self.propose_view_change() - logger.debug('{} scheduling a view change in {} sec'. format(self, self.config.ToleratePrimaryDisconnection)) self._schedule(self.propose_view_change, @@ -1823,18 +1885,32 @@ def startViewChange(self, proposedViewNo: int): :param proposedViewNo: the new view number after view change. """ + self.view_change_in_progress = True self.viewNo = proposedViewNo logger.debug("{} resetting monitor stats after view change". format(self)) self.monitor.reset() - self.processStashedMsgsForView(proposedViewNo) # Now communicate the view change to the elector which will # contest primary elections across protocol all instances self.elector.viewChanged(self.viewNo) + self._primary_replica_no = None + pop_keys(self.msgsForFutureViews, lambda x: x <= self.viewNo) self.initInsChngThrottling() self.logNodeInfo() + def on_view_change_complete(self, view_no): + """ + View change completes for a replica when it has been decided which was + the last ppSeqno and state and txn root for previous view + """ + self.view_change_in_progress = False + self.instanceChanges.pop(view_no-1, None) + + def ordered_prev_view_msgs(self, inst_id, pp_seqno): + logger.debug('{} ordered previous view batch {} by instance {}'. + format(self, pp_seqno, inst_id)) + def verifySignature(self, msg): """ Validate the signature of the request @@ -1868,10 +1944,11 @@ def isSignatureVerificationNeeded(self, msg: Any): return True def ppSeqNoForTxnSeqNo(self, ledgerId, seqNo): + # Looking in reverse since its more likely to be recent for ppSeqNo, (lid, txnSeqNo) in reversed(self.batchToSeqNos.items()): if lid == ledgerId and txnSeqNo == seqNo: return ppSeqNo - return -1 + return 0 def executeBatch(self, ppSeqNo: int, ppTime: float, reqs: List[Request], ledgerId, stateRoot, txnRoot) -> None: @@ -2102,8 +2179,6 @@ def reportSuspiciousNode(self, logger.info('{} sent instance change since suspicion code {}' .format(self, code)) - if not self.do_view_change_if_possible(self.viewNo + 1): - logger.trace("{} cannot initiate a view change".format(self)) if offendingMsg: self.discard(offendingMsg, reason, logger.warning) diff --git a/plenum/server/pool_manager.py b/plenum/server/pool_manager.py index d67d7a0d76..92ca296889 100644 --- a/plenum/server/pool_manager.py +++ b/plenum/server/pool_manager.py @@ -179,11 +179,12 @@ def _updateNode(txn): if len(seqNos) == 1: # Since only one transaction has been made, this is a new # node transaction - self.addNewNodeAndConnect(txn) + if VALIDATOR in txn[DATA].get(SERVICES, []): + self.addNewNodeAndConnect(txn) else: self.node.nodeReg[nodeName] = HA(info[DATA][NODE_IP], info[DATA][NODE_PORT]) - self.node.cliNodeReg[nodeName] = HA(info[DATA][CLIENT_IP], + self.node.cliNodeReg[nodeName + CLIENT_STACK_SUFFIX] = HA(info[DATA][CLIENT_IP], info[DATA][CLIENT_PORT]) _updateNode(txn) @@ -262,7 +263,7 @@ def nodeServicesChanged(self, txn): del self.node.nodeReg[nodeName] del self.node.cliNodeReg[nodeName + CLIENT_STACK_SUFFIX] try: - rid = self.node.nodestack.removeRemoteByName(nodeName) + rid = TxnStackManager.removeRemote(self.node.nodestack, nodeName) if rid: self.node.nodestack.outBoxes.pop(rid, None) except RemoteNotFound: diff --git a/plenum/server/pool_req_handler.py b/plenum/server/pool_req_handler.py index 29adb7a587..ca2c76fe7f 100644 --- a/plenum/server/pool_req_handler.py +++ b/plenum/server/pool_req_handler.py @@ -70,13 +70,6 @@ def authErrorWhileAddingNode(self, request): if error: return error - # SERVICES is required for add node and optional for update node txn - # it is the cause why the check is here - # this is not a good place for the check, - # should be moved in some other place - if SERVICES not in data: - return 'field {} is required for adding node'.format(SERVICES) - isSteward = self.isSteward(origin, isCommitted=False) if not isSteward: return "{} is not a steward so cannot add a new node".format(origin) diff --git a/plenum/server/primary_elector.py b/plenum/server/primary_elector.py index b4538134e3..8c7f41442b 100644 --- a/plenum/server/primary_elector.py +++ b/plenum/server/primary_elector.py @@ -6,7 +6,7 @@ from typing import Sequence, Any, Union, List from plenum.common.types import Nomination, Reelection, Primary, f -from plenum.common.util import mostCommonElement, getQuorum +from plenum.common.util import mostCommonElement, get_strong_quorum from stp_core.common.log import getlogger from plenum.server import replica from plenum.server.primary_decider import PrimaryDecider @@ -39,9 +39,8 @@ def __init__(self, node): # primary while its catching up self.node = node + # Flag variable which indicates which replica has nominated for itself self.replicaNominatedForItself = None - """Flag variable which indicates which replica has nominated - for itself""" self.nominations = {} @@ -53,6 +52,11 @@ def __init__(self, node): self.reElectionRounds = {} + # # Tracks when election started for each instance, once + # # `MaxElectionTimeoutFactor`*node_count elapses and no primary decided, + # # re-start election + # self.election_start_times = {} + routerArgs = [(Nomination, self.processNominate), (Primary, self.processPrimary), (Reelection, self.processReelection)] @@ -165,7 +169,7 @@ def quorum(self) -> int: r""" Return the quorum of this RBFT system. Equal to :math:`2f + 1`. """ - return getQuorum(f=self.f) + return get_strong_quorum(f=self.f) def decidePrimaries(self): # overridden method of PrimaryDecider self.scheduleElection() @@ -187,6 +191,11 @@ def startElection(self): self.nominateItself() + def start_election_for_instance(self, inst_id): + # Called when starting election for a particular protocol instance + self.prepareReplicaForElection(self.replicas[inst_id]) + self._schedule(self.nominateItself, random.random()) + def nominateItself(self): """ Actions to perform if this node hasn't nominated any of its replicas. @@ -276,7 +285,7 @@ def processNominate(self, nom: Nomination, sender: str): :param nom: the nomination message :param sender: sender address of the nomination """ - logger.debug("{}'s elector started processing nominate msg: {}". + logger.debug("{} elector started processing nominate msg: {}". format(self.name, nom)) instId = nom.instId replica = self.replicas[instId] @@ -285,7 +294,7 @@ def processNominate(self, nom: Nomination, sender: str): ' of master in previous view too'. format(self, sender, nom.name), logMethod=logger.warning) - return + return False sndrRep = replica.generateName(sender, nom.instId) @@ -336,7 +345,7 @@ def processPrimary(self, prim: Primary, sender: str) -> None: replica = self.replicas[instId] if instId == 0 and replica.getNodeName(prim.name) == self.previous_master_primary: self.discard(prim, '{} got Primary from {} for {} who was primary' - ' of master in previous view too'. + ' of master in previous view too'. format(self, sender, prim.name), logMethod=logger.warning) return @@ -346,6 +355,7 @@ def processPrimary(self, prim: Primary, sender: str) -> None: # Nodes should not be able to declare `Primary` winner more than more if instId not in self.primaryDeclarations: self.setDefaults(instId) + if sndrRep not in self.primaryDeclarations[instId]: self.primaryDeclarations[instId][sndrRep] = (prim.name, prim.ordSeqNo) @@ -740,8 +750,6 @@ def viewChanged(self, viewNo: int): for replica in self.replicas: replica.primaryName = None - self.node._primary_replica_no = None - # Reset to defaults values for different data structures as new # elections would begin for r in self.replicas: diff --git a/plenum/server/propagator.py b/plenum/server/propagator.py index cb614c486e..de92313cb3 100644 --- a/plenum/server/propagator.py +++ b/plenum/server/propagator.py @@ -4,7 +4,7 @@ import weakref from plenum.common.types import Propagate -from plenum.common.request import Request +from plenum.common.request import Request, ReqKey from stp_core.common.log import getlogger from plenum.common.util import checkIfMoreThanFSameItems @@ -191,15 +191,9 @@ def forward(self, request: Request): :param request: the REQUEST to propagate """ key = request.key - fin_req = self.requests[key].finalised - if self.primaryReplicaNo is not None: - self.msgsToReplicas[self.primaryReplicaNo].append(fin_req) - logger.debug("{} forwarding client request {} to replica {}". - format(self, key, self.primaryReplicaNo)) - elif not self.all_instances_have_primary: - logger.debug('{} stashing request {} since at least one replica ' - 'lacks primary'.format(self, key)) - self.reqs_stashed_for_primary.append(fin_req) + for q in self.msgsToReplicas: + logger.debug('{} forwarding request {} to replicas'.format(self, key)) + q.append(ReqKey(*key)) self.monitor.requestUnOrdered(*key) self.requests.flagAsForwarded(request, len(self.msgsToReplicas)) @@ -231,20 +225,5 @@ def tryForwarding(self, request: Request): # to move ahead self.forward(request) else: - logger.trace("{} not forwarding request {} to its replicas " + logger.debug("{} not forwarding request {} to its replicas " "since {}".format(self, request, msg)) - - def process_reqs_stashed_for_primary(self): - if self.reqs_stashed_for_primary: - if self.primaryReplicaNo is not None: - self.msgsToReplicas[self.primaryReplicaNo].extend( - self.reqs_stashed_for_primary) - logger.debug("{} forwarding stashed {} client requests to " - "replica {}". - format(self, len(self.reqs_stashed_for_primary), - self.primaryReplicaNo)) - elif not self.all_instances_have_primary: - return - # Either the stashed requests have been given to a primary or this - # node does not have a primary, so clear the queue - self.reqs_stashed_for_primary.clear() diff --git a/plenum/server/replica.py b/plenum/server/replica.py index 7cf10c0d2e..c33337a87e 100644 --- a/plenum/server/replica.py +++ b/plenum/server/replica.py @@ -15,14 +15,14 @@ import plenum.server.node from plenum.common.config_util import getConfig -from plenum.common.exceptions import SuspiciousNode, InvalidClientRequest, \ +from plenum.common.exceptions import SuspiciousNode, \ InvalidClientMessageException, UnknownIdentifier from plenum.common.signing import serialize from plenum.common.txn_util import reqToTxn from plenum.common.types import PrePrepare, \ Prepare, Commit, Ordered, ThreePhaseMsg, ThreePhaseKey, ThreePCState, \ CheckpointState, Checkpoint, Reject, f, InstanceChange -from plenum.common.request import ReqDigest, Request +from plenum.common.request import ReqDigest, Request, ReqKey from plenum.common.message_processor import MessageProcessor from plenum.common.util import updateNamedTuple from stp_core.common.log import getlogger @@ -87,7 +87,7 @@ def __init__(self, node: 'plenum.server.node.Node', instId: int, self.config = getConfig() - routerArgs = [(Request, self.readyFor3PC)] + routerArgs = [(ReqKey, self.readyFor3PC)] for r in [PrePrepare, Prepare, Commit]: routerArgs.append((r, self.processThreePhaseMsg)) @@ -166,13 +166,13 @@ def __init__(self, node: 'plenum.server.node.Node', instId: int, # Dictionary of sent PRE-PREPARE that are stored by primary replica # which it has broadcasted to all other non primary replicas # Key of dictionary is a 2 element tuple with elements viewNo, - # pre-prepare seqNo and value is a tuple of Request Digest and time + # pre-prepare seqNo and value is the received PRE-PREPARE self.sentPrePrepares = SortedDict(lambda k: k[1]) # type: Dict[Tuple[int, int], PrePrepare] # Dictionary of received PRE-PREPAREs. Key of dictionary is a 2 - # element tuple with elements viewNo, pre-prepare seqNo and value is - # a list of tuples of Request Keys and time + # element tuple with elements viewNo, pre-prepare seqNo and value + # is the received PRE-PREPARE self.prePrepares = SortedDict(lambda k: k[1]) # type: Dict[Tuple[int, int], PrePrepare] @@ -200,11 +200,10 @@ def __init__(self, node: 'plenum.server.node.Node', instId: int, # received while it was not participating self.stashingWhileCatchingUp = set() # type: Set[Tuple] - # Commits which are not being ordered since commits with lower view - # numbers and sequence numbers have not been ordered yet. Key is the + # Commits which are not being ordered since commits with lower + # sequence numbers have not been ordered yet. Key is the # viewNo and value a map of pre-prepare sequence number to commit - self.stashedCommitsForOrdering = {} # type: Dict[int, - # Dict[int, Commit]] + self.stashed_out_of_order_commits = {} # type: Dict[int,Dict[int,Commit]] self.checkpoints = SortedDict(lambda k: k[0]) @@ -218,7 +217,53 @@ def __init__(self, node: 'plenum.server.node.Node', instId: int, # Set high water mark (`H`) too self.h = 0 # type: int - self.lastPrePrepareSeqNo = self.h # type: int + self._lastPrePrepareSeqNo = self.h # type: int + + # Queues used in PRE-PREPARE for each ledger, + self.requestQueues = {} # type: Dict[int, deque] + for ledger_id in self.ledger_ids: + # Using ordered set since after ordering each PRE-PREPARE, + # the request key is removed, so fast lookup and removal of + # request key is needed. Need the collection to be ordered since + # the request key needs to be removed once its ordered + self.requestQueues[ledger_id] = OrderedSet() + + self.batches = OrderedDict() # type: OrderedDict[int, Tuple[int, float, bytes]] + + # TODO: Need to have a timer for each ledger + self.lastBatchCreated = time.perf_counter() + + self.lastOrderedPPSeqNo = 0 + + # Keeps the `lastOrderedPPSeqNo` and ledger_summary for each view no. + # GC when ordered last batch of the view + self.view_ends_at = OrderedDict() + + def ledger_uncommitted_size(self, ledgerId): + if not self.isMaster: + return None + return self.node.getLedger(ledgerId).uncommitted_size + + def txnRootHash(self, ledgerId, toHex=True): + if not self.isMaster: + return None + ledger = self.node.getLedger(ledgerId) + h = ledger.uncommittedRootHash + # If no uncommittedHash since this is the beginning of the tree + # or no transactions affecting the ledger were made after the + # last changes were committed + root = h if h else ledger.tree.root_hash + if toHex: + root = hexlify(root).decode() + return root + + def stateRootHash(self, ledgerId, toHex=True): + if not self.isMaster: + return None + root = self.node.getState(ledgerId).headHash + if toHex: + root = hexlify(root).decode() + return root # Queues used in PRE-PREPARE for each ledger, self.requestQueues = {} # type: Dict[int, deque] @@ -266,13 +311,36 @@ def h(self, n): self.H = self._h + self.config.LOG_SIZE logger.debug('{} set watermarks as {} {}'.format(self, self.h, self.H)) + @property + def lastPrePrepareSeqNo(self): + return self._lastPrePrepareSeqNo + + @lastPrePrepareSeqNo.setter + def lastPrePrepareSeqNo(self, n): + """ + This will _lastPrePrepareSeqNo to values greater than its previous + values else it will not. To forcefully override as in case of `revert`, + directly set `self._lastPrePrepareSeqNo` + """ + if n > self._lastPrePrepareSeqNo: + self._lastPrePrepareSeqNo = n + else: + logger.info('{} cannot set lastPrePrepareSeqNo to {} as its ' + 'already {}'.format(self, n, self._lastPrePrepareSeqNo)) + @property def requests(self): return self.node.requests - def shouldParticipate(self, viewNo: int, ppSeqNo: int): - # Replica should only participating in the consensus process and the - # replica did not stash any of this request's 3-phase request + @property + def ledger_ids(self): + return self.node.ledger_ids + + def shouldParticipate(self, viewNo: int, ppSeqNo: int) -> bool: + """ + Replica should only participating in the consensus process and the + replica did not stash any of this request's 3-phase request + """ return self.node.isParticipating and (viewNo, ppSeqNo) \ not in self.stashingWhileCatchingUp @@ -294,7 +362,7 @@ def isPrimary(self): """ Is this node primary? - :return: True if this node is primary, False otherwise + :return: True if this node is primary, False if not, None if primary status not known """ return self._primaryName == self.name if self._primaryName is not None \ else None @@ -330,10 +398,7 @@ def primaryChanged(self, primaryName, lastOrderedPPSeqNo): self.primaryName = primaryName if primaryName == self.name: assert self.lastOrderedPPSeqNo >= lastOrderedPPSeqNo - self.lastPrePrepareSeqNo = self.lastOrderedPPSeqNo - else: - for lid in self.requestQueues: - self.requestQueues[lid].clear() + self._lastPrePrepareSeqNo = self.lastOrderedPPSeqNo def removeObsoletePpReqs(self): # If replica was primary in previous view then remove every sent @@ -358,6 +423,41 @@ def removeObsoletePpReqs(self): self.sentPrePrepares.pop(key) self.prepares.pop(key, None) + def is_primary_in_view(self, viewNo: int) -> Optional[bool]: + """ + Return whether a primary has been selected for this view number. + """ + return self.primaryNames[viewNo] == self.name + + def isMsgForCurrentView(self, msg): + """ + Return whether this request's view number is equal to the current view + number of this replica. + """ + viewNo = getattr(msg, "viewNo", None) + return viewNo == self.viewNo + + def isPrimaryForMsg(self, msg) -> Optional[bool]: + """ + Return whether this replica is primary if the request's view number is + equal this replica's view number and primary has been selected for + the current view. + Return None otherwise. + :param msg: message + """ + return self.isPrimary if self.isMsgForCurrentView(msg) \ + else self.is_primary_in_view(msg.viewNo) + + def isMsgFromPrimary(self, msg, sender: str) -> bool: + """ + Return whether this message was from primary replica + :param msg: + :param sender: + :return: + """ + return self.primaryName == sender if self.isMsgForCurrentView( + msg) else self.primaryNames[msg.viewNo] == sender + def _stateChanged(self): """ A series of actions to be performed when the state of this replica @@ -367,8 +467,6 @@ def _stateChanged(self): """ self._unstashInBox() if self.isPrimary is not None: - # self.process3PhaseReqsQueue() - # TODO handle suspicion exceptions here try: self.processPostElectionMsgs() except SuspiciousNode as ex: @@ -450,33 +548,36 @@ def batchDigest(reqs): def processReqDuringBatch(self, req: Request, validReqs: List, inValidReqs: List, rejects: List): + """ + This method will do dynamic validation and apply requests, also it + will modify `validReqs`, `inValidReqs` and `rejects` + """ try: if self.isMaster: self.node.doDynamicValidation(req) self.node.applyReq(req) except (InvalidClientMessageException, UnknownIdentifier) as ex: logger.warning('{} encountered exception {} while processing {}, ' - 'will reject'.format(self, ex, req)) + 'will reject'.format(self, ex, req)) rejects.append(Reject(req.identifier, req.reqId, ex)) inValidReqs.append(req) else: validReqs.append(req) - def create3PCBatch(self, ledgerId): - # TODO: If no valid requests then PRE-PREPARE should be sent but rejects - # should be tracked so they can be sent as part of next batch. + def create3PCBatch(self, ledger_id): ppSeqNo = self.lastPrePrepareSeqNo + 1 logger.info("{} creating batch {} for ledger {} with state root {}". - format(self, ppSeqNo, ledgerId, - self.stateRootHash(ledgerId, toHex=False))) + format(self, ppSeqNo, ledger_id, + self.stateRootHash(ledger_id, toHex=False))) tm = time.time() * 1000 validReqs = [] inValidReqs = [] rejects = [] while len(validReqs)+len(inValidReqs) < self.config.Max3PCBatchSize \ - and self.requestQueues[ledgerId]: - req = self.requestQueues[ledgerId].popleft() - self.processReqDuringBatch(req, validReqs, inValidReqs, rejects) + and self.requestQueues[ledger_id]: + key = self.requestQueues[ledger_id].pop(0) # Remove the first element + fin_req = self.requests[key].finalised + self.processReqDuringBatch(fin_req, validReqs, inValidReqs, rejects) reqs = validReqs+inValidReqs digest = self.batchDigest(reqs) @@ -487,26 +588,27 @@ def create3PCBatch(self, ledgerId): [(req.identifier, req.reqId) for req in reqs], len(validReqs), digest, - ledgerId, - self.stateRootHash(ledgerId), - self.txnRootHash(ledgerId) + ledger_id, + self.stateRootHash(ledger_id), + self.txnRootHash(ledger_id) ) logger.debug('{} created a PRE-PREPARE with {} requests for ledger {}' - .format(self, len(validReqs), ledgerId)) + .format(self, len(validReqs), ledger_id)) self.lastPrePrepareSeqNo = ppSeqNo if self.isMaster: self.outBox.extend(rejects) - self.node.onBatchCreated(ledgerId, - self.stateRootHash(ledgerId, toHex=False)) + self.node.onBatchCreated(ledger_id, + self.stateRootHash(ledger_id, toHex=False)) return prePrepareReq def sendPrePrepare(self, ppReq: PrePrepare): self.sentPrePrepares[ppReq.viewNo, ppReq.ppSeqNo] = ppReq self.send(ppReq, TPCStat.PrePrepareSent) - def readyFor3PC(self, request: Request): + def readyFor3PC(self, key: ReqKey): cls = self.node.__class__ - self.requestQueues[cls.ledgerIdForRequest(request)].append(request) + fin_req = self.requests[key].finalised + self.requestQueues[cls.ledgerIdForRequest(fin_req)].add(key) def serviceQueues(self, limit=None): """ @@ -558,7 +660,12 @@ def dispatchThreePhaseMsg(self, msg: ThreePhaseMsg, sender: str) -> Any: return if self.isPpSeqNoBetweenWaterMarks(msg.ppSeqNo): try: - self.threePhaseRouter.handleSync((msg, senderRep)) + if self.can_pp_seq_no_be_in_view(msg.viewNo, msg.ppSeqNo): + self.threePhaseRouter.handleSync((msg, senderRep)) + else: + self.discard(msg, 'un-acceptable pp seq no from previous ' + 'view', logger.debug) + return except SuspiciousNode as ex: self.node.reportSuspiciousNodeEx(ex) else: @@ -577,14 +684,6 @@ def processThreePhaseMsg(self, msg: ThreePhaseMsg, sender: str): COMMIT :param sender: name of the node that sent this message """ - # If COMMIT or PREPARE corresponding to which a PRE-PREPARE is - # received then proceed otherwise only proceed further if primary - # is known - if msg.viewNo < self.viewNo: - self.discard(msg, - "its a previous view message", - logger.debug) - return if self.isPrimary is None: self.postElectionMsgs.append((msg, sender)) logger.debug("Replica {} pended request {} from {}". @@ -603,6 +702,7 @@ def processPrePrepare(self, pp: PrePrepare, sender: str): key = (pp.viewNo, pp.ppSeqNo) logger.debug("{} received PRE-PREPARE{} from {} at {}". format(self, key, sender, time.perf_counter())) + # Converting each req_idrs from list to tuple pp = updateNamedTuple(pp, **{f.REQ_IDR.nm: [(i, r) for i, r in pp.reqIdr]}) oldStateRoot = self.stateRootHash(pp.ledgerId, toHex=False) @@ -671,7 +771,7 @@ def processCommit(self, commit: Commit, sender: str) -> None: :param sender: name of the node that sent the COMMIT """ logger.debug("{} received COMMIT{} from {}". - format(self, commit, sender)) + format(self, (commit.viewNo, commit.ppSeqNo), sender)) if self.isPpSeqNoStable(commit.ppSeqNo): self.discard(commit, "achieved stable checkpoint for Commit", @@ -702,11 +802,12 @@ def tryOrder(self, commit: Commit): """ canOrder, reason = self.canOrder(commit) if canOrder: - logger.debug("{} returning request to node".format(self)) - self.tryOrdering(commit) + logger.trace("{} returning request to node".format(self)) + self.doOrder(commit) else: - logger.trace("{} cannot return request to node: {}". + logger.debug("{} cannot return request to node: {}". format(self, reason)) + return canOrder def doPrepare(self, pp: PrePrepare): logger.debug("{} Sending PREPARE {} at {}". @@ -755,6 +856,8 @@ def isNextPrePrepare(self, ppSeqNo: int): lastPpSeqNo = self.lastOrderedPPSeqNo if ppSeqNo - lastPpSeqNo != 1: + logger.debug('{} missing PRE-PREPAREs between {} and {}'. + format(self, ppSeqNo, lastPpSeqNo)) return False return True @@ -785,10 +888,12 @@ def validatePrePrepare(self, pp: PrePrepare, sender: str): format(self, pp, oldStateRoot)) for reqKey in pp.reqIdr: - req = self.node.requests[reqKey].finalised + req = self.requests[reqKey].finalised self.processReqDuringBatch(req, validReqs, inValidReqs, rejects) if len(validReqs) != pp.discarded: + if self.isMaster: + self.revert(pp.ledgerId, oldStateRoot, len(validReqs)) raise SuspiciousNode(sender, Suspicions.PPR_REJECT_WRONG, pp) reqs = validReqs + inValidReqs @@ -861,12 +966,13 @@ def canProcessPrePrepare(self, pp: PrePrepare, sender: str) -> bool: def addToPrePrepares(self, pp: PrePrepare) -> None: """ Add the specified PRE-PREPARE to this replica's list of received - PRE-PREPAREs. + PRE-PREPAREs and try sending PREPARE :param pp: the PRE-PREPARE to add to the list """ key = (pp.viewNo, pp.ppSeqNo) self.prePrepares[key] = pp + self.lastPrePrepareSeqNo = pp.ppSeqNo self.dequeuePrepares(*key) self.dequeueCommits(*key) self.stats.inc(TPCStat.PrePrepareRcvd) @@ -887,8 +993,6 @@ def canPrepare(self, ppReq) -> (bool, str): if self.hasPrepared(ppReq): return False, 'has already sent PREPARE for {}'.format(ppReq) return True, '' - # and self.requests.isFinalised((ppReq.identifier, - # ppReq.reqId)) def validatePrepare(self, prepare: Prepare, sender: str) -> bool: """ @@ -902,7 +1006,6 @@ def validatePrepare(self, prepare: Prepare, sender: str) -> bool: # primaryStatus = self.isPrimaryForMsg(prepare) primaryStatus = self.isPrimary - # ppReqs = self.sentPrePrepares if primaryStatus else self.prePrepares ppReq = self.getPrePrepare(*key) # If a non primary replica and receiving a PREPARE request before a @@ -946,6 +1049,12 @@ def validatePrepare(self, prepare: Prepare, sender: str) -> bool: return True def addToPrepares(self, prepare: Prepare, sender: str): + """ + Add the specified PREPARE to this replica's list of received + PREPAREs and try sending COMMIT + + :param prepare: the PREPARE to add to the list + """ self.prepares.addVote(prepare, sender) self.tryCommit(prepare) @@ -1053,26 +1162,31 @@ def canOrder(self, commit: Commit) -> Tuple[bool, Optional[str]]: return False, "no quorum: {} commits where f is {}".\ format(commit, self.f) - if self.hasOrdered(commit.viewNo, commit.ppSeqNo): + key = (commit.viewNo, commit.ppSeqNo) + if self.hasOrdered(*key): return False, "already ordered" - if not self.isNextInOrdering(commit): + if not self.all_prev_ordered(commit): viewNo, ppSeqNo = commit.viewNo, commit.ppSeqNo - if viewNo not in self.stashedCommitsForOrdering: - self.stashedCommitsForOrdering[viewNo] = {} - self.stashedCommitsForOrdering[viewNo][ppSeqNo] = commit - self.startRepeating(self.orderStashedCommits, 2) + if viewNo not in self.stashed_out_of_order_commits: + self.stashed_out_of_order_commits[viewNo] = {} + self.stashed_out_of_order_commits[viewNo][ppSeqNo] = commit + self.startRepeating(self.process_stashed_out_of_order_commits, 1) return False, "stashing {} since out of order".\ format(commit) return True, None - def isNextInOrdering(self, commit: Commit): + def all_prev_ordered(self, commit: Commit): + """ + Return True if all previous COMMITs have been ordered + """ # TODO: This method does a lot of work, choose correct data # structures to make it efficient. viewNo, ppSeqNo = commit.viewNo, commit.ppSeqNo if self.ordered and self.ordered[-1] == (viewNo, ppSeqNo-1): + # Last ordered was in same view as this COMMIT return True # if some PREPAREs/COMMITs were completely missed in the same view @@ -1082,80 +1196,75 @@ def isNextInOrdering(self, commit: Commit): toCheck.update(set(self.prepares.keys())) toCheck.update(set(self.commits.keys())) for (v, p) in toCheck: - if v < viewNo: + if v < viewNo and (v, p) not in self.ordered: # Have commits from previous view that are unordered. - # TODO: Question: would commits be always ordered, what if - # some are never ordered and its fine, go to PBFT. return False if v == viewNo and p < ppSeqNo and (v, p) not in self.ordered: # If unordered commits are found with lower ppSeqNo then this # cannot be ordered. return False - # TODO: Revisit PBFT paper, how to make sure that last request of the - # last view has been ordered? Need change in `VIEW CHANGE` mechanism. - # View change needs to communicate what the last request was. return True - def orderStashedCommits(self): - logger.debug('{} trying to order from stashed commits. {} {}'. - format(self, self.ordered, self.stashedCommitsForOrdering)) + def process_stashed_out_of_order_commits(self): + # This method is called periodically to check for any commits that + # were stashed due to lack of commits before them and orders them if it can + logger.debug('{} trying to order from out of order commits. {} {}'. + format(self, self.ordered, self.stashed_out_of_order_commits)) if self.ordered: lastOrdered = self.ordered[-1] vToRemove = set() - for v in self.stashedCommitsForOrdering: - if v < lastOrdered[0] and self.stashedCommitsForOrdering[v]: - raise RuntimeError("{} found commits from previous view {}" + for v in self.stashed_out_of_order_commits: + if v < lastOrdered[0] and self.stashed_out_of_order_commits[v]: + raise RuntimeError("{} found commits {} from previous view {}" " that were not ordered but last ordered" - " is {}".format(self, v, lastOrdered)) + " is {}".format(self, self.stashed_out_of_order_commits[v], v, lastOrdered)) pToRemove = set() - for p, commit in self.stashedCommitsForOrdering[v].items(): + for p, commit in self.stashed_out_of_order_commits[v].items(): + if (v, p) in self.ordered: + pToRemove.add(p) + continue if (v == lastOrdered[0] and lastOrdered == (v, p - 1)) or \ - (v > lastOrdered[0] and - self.isLowestCommitInView(commit)): + (v > lastOrdered[0] and self.isLowestCommitInView(commit)): logger.debug("{} ordering stashed commit {}". format(self, commit)) - if self.tryOrdering(commit): + if self.tryOrder(commit): lastOrdered = (v, p) pToRemove.add(p) for p in pToRemove: - del self.stashedCommitsForOrdering[v][p] - if not self.stashedCommitsForOrdering[v]: + del self.stashed_out_of_order_commits[v][p] + if not self.stashed_out_of_order_commits[v]: vToRemove.add(v) for v in vToRemove: - del self.stashedCommitsForOrdering[v] + del self.stashed_out_of_order_commits[v] - if not self.stashedCommitsForOrdering: - self.stopRepeating(self.orderStashedCommits) + if not self.stashed_out_of_order_commits: + self.stopRepeating(self.process_stashed_out_of_order_commits) def isLowestCommitInView(self, commit): - # TODO: Assumption: This assumes that at least one commit that was sent - # for any request by any node has been received in the view of this - # commit + view_no = commit.viewNo + if view_no > self.viewNo: + logger.debug('{} encountered {} which belongs to a later view' + .format(self, commit)) + return False + if view_no != self.viewNo and view_no not in self.view_ends_at: + logger.debug('{} encountered {} from past view for which dont know ' + 'the end of view'.format(self, commit)) + return False + ppSeqNos = [] for v, p in self.commits: if v == commit.viewNo: ppSeqNos.append(p) return min(ppSeqNos) == commit.ppSeqNo if ppSeqNos else True - def tryOrdering(self, commit: Commit) -> bool: - """ - Attempt to send an ORDERED request for the specified COMMIT to the - node. - - :param commit: the COMMIT message - """ + def doOrder(self, commit: Commit): key = (commit.viewNo, commit.ppSeqNo) - logger.debug("{} trying to order COMMIT{}".format(self, key)) - ppReq = self.getPrePrepare(*key) - assert ppReq - self.doOrder(ppReq) - return True - - def doOrder(self, pp: PrePrepare): - key = (pp.viewNo, pp.ppSeqNo) + logger.debug("{} ordering COMMIT{}".format(self, key)) + pp = self.getPrePrepare(*key) + assert pp self.addToOrdered(*key) ordered = Ordered(self.instId, pp.viewNo, @@ -1177,9 +1286,19 @@ def doOrder(self, pp: PrePrepare): req = self.requests[reqKey].finalised self.node.applyReq(req) self.stashingWhileCatchingUp.remove(key) + + for k in pp.reqIdr: + # Using discard since the key may not be present as in case of + # primary, the key was popped out while creating PRE-PREPARE. + # Or in case of node catching up, it will not validate + # PRE-PREPAREs or PREPAREs but will only validate number of COMMITs + # and their consistency with PRE-PREPARE of PREPAREs + self.requestQueues[pp.ledgerId].discard(k) + self.send(ordered, TPCStat.OrderSent) logger.debug("{} ordered request {}".format(self, key)) self.addToCheckpoint(pp.ppSeqNo, pp.digest) + return True def processCheckpoint(self, msg: Checkpoint, sender: str): logger.debug('{} received checkpoint {} from {}'. @@ -1310,8 +1429,6 @@ def gc(self, tillSeqNo): self.prePrepares.pop(k, None) self.prepares.pop(k, None) self.commits.pop(k, None) - # if k in self.ordered: - # self.ordered.remove(k) for k in reqKeys: self.requests[k].forwardedTo -= 1 @@ -1383,9 +1500,11 @@ def isPpSeqNoBetweenWaterMarks(self, ppSeqNo: int): def addToOrdered(self, viewNo: int, ppSeqNo: int): self.ordered.add((viewNo, ppSeqNo)) - self.lastOrderedPPSeqNo = ppSeqNo + if ppSeqNo > self.lastOrderedPPSeqNo: + self.lastOrderedPPSeqNo = ppSeqNo - def enqueuePrePrepare(self, ppMsg: PrePrepare, sender: str, nonFinReqs: Set=None): + def enqueuePrePrepare(self, ppMsg: PrePrepare, sender: str, + nonFinReqs: Set=None): if nonFinReqs: logger.debug("Queueing pre-prepares due to unavailability of finalised " "requests. PrePrepare {} from {}".format(ppMsg, sender)) @@ -1399,18 +1518,26 @@ def enqueuePrePrepare(self, ppMsg: PrePrepare, sender: str, nonFinReqs: Set=None self.prePreparesPendingPrevPP[ppMsg.viewNo, ppMsg.ppSeqNo] = (ppMsg, sender) def dequeuePrePrepares(self): + """ + Dequeue any received PRE-PREPAREs that did not have finalized requests + or the replica was missing any PRE-PREPAREs before it + :return: + """ ppsReady = [] + # Check if any requests have become finalised belonging to any stashed + # PRE-PREPAREs. for i, (pp, sender, reqIds) in enumerate(self.prePreparesPendingFinReqs): finalised = set() for r in reqIds: if self.requests.isFinalised(r): finalised.add(r) diff = reqIds.difference(finalised) + # All requests become finalised if not diff: ppsReady.append(i) self.prePreparesPendingFinReqs[i] = (pp, sender, diff) - for i in ppsReady: + for i in sorted(ppsReady, reverse=True): pp, sender, _ = self.prePreparesPendingFinReqs.pop(i) self.prePreparesPendingPrevPP[pp.viewNo, pp.ppSeqNo] = (pp, sender) @@ -1418,9 +1545,8 @@ def dequeuePrePrepares(self): while self.prePreparesPendingPrevPP and self.isNextPrePrepare( self.prePreparesPendingPrevPP.iloc[0][1]): _, (pp, sender) = self.prePreparesPendingPrevPP.popitem(last=False) - if pp.viewNo < self.viewNo: - self.discard(pp, - "Pre-Prepare from a previous view", + if not self.can_pp_seq_no_be_in_view(pp.viewNo, pp.ppSeqNo): + self.discard(pp, "Pre-Prepare from a previous view", logger.debug) continue self.processPrePrepare(pp, sender) @@ -1499,6 +1625,18 @@ def getReqKeyFrom3PhaseKey(self, key: ThreePhaseKey): format(key)) return reqKey + def can_pp_seq_no_be_in_view(self, view_no, pp_seq_no): + """ + Checks if the `pp_seq_no` could have been in view `view_no`. It will + return False when the `pp_seq_no` belongs to a later view than + `view_no` else will return True + :return: + """ + assert view_no <= self.viewNo + return view_no == self.viewNo or (view_no < self.viewNo and ( + view_no in self.view_ends_at and + pp_seq_no <= self.view_ends_at[view_no][0])) + @property def threePhaseState(self): # TODO: This method is incomplete @@ -1518,6 +1656,8 @@ def send(self, msg, stat=None) -> None: """ Send a message to the node on which this replica resides. + :param stat: + :param rid: remote id of one recipient (sends to all recipients if None) :param msg: the message to send """ logger.display("{} sending {}".format(self, msg.__class__.__name__), @@ -1526,3 +1666,28 @@ def send(self, msg, stat=None) -> None: if stat: self.stats.inc(stat) self.outBox.append(msg) + + def caught_up_till_pp_seq_no(self, last_caught_up_pp_seq_no): + self.addToOrdered(self.viewNo, last_caught_up_pp_seq_no) + # self._remove_till_caught_up_pp_seq_no(last_caught_up_pp_seq_no) + + def _remove_till_caught_up_pp_seq_no(self, last_caught_up_pp_seq_no): + outdated_pre_prepares = set() + for key, pp in self.prePrepares.items(): + if (key[1] <= last_caught_up_pp_seq_no): + outdated_pre_prepares.add((pp.viewNo, pp.ppSeqNo, pp.ledgerId)) + self.prePrepares.pop(key, None) + self.ordered.add((pp.viewNo, pp.ppSeqNo)) + + for key in sorted(list(outdated_pre_prepares), key=itemgetter(1), reverse=True): + count, _, prevStateRoot = self.batches[key[1]] + self.batches.pop(key[1]) + self.sentPrePrepares.pop(key, None) + self.prepares.pop(key, None) + + ledger_id = key[2] + ledger = self.node.getLedger(ledger_id) + ledger.discardTxns(len(ledger.uncommittedTxns)) + + state = self.node.getState(ledger_id) + state.revertToHead(state.committedHeadHash) diff --git a/plenum/server/router.py b/plenum/server/router.py index a5a7ce2dc9..febfdc7737 100644 --- a/plenum/server/router.py +++ b/plenum/server/router.py @@ -48,7 +48,9 @@ def handleSync(self, msg: Any) -> Any: :param msg: tuple of object and callable """ - if isinstance(msg, tuple) and len(msg) == 2: + # If a plain python tuple and not a named tuple, a better alternative + # would be to create a named entity with the 3 characteristics below + if isinstance(msg, tuple) and len(msg) == 2 and not hasattr(msg, '_field_types'): return self.getFunc(msg[0])(*msg) else: return self.getFunc(msg)(msg) diff --git a/plenum/test/batching_3pc/conftest.py b/plenum/test/batching_3pc/conftest.py index 58258de369..89a0d1c661 100644 --- a/plenum/test/batching_3pc/conftest.py +++ b/plenum/test/batching_3pc/conftest.py @@ -6,13 +6,10 @@ @pytest.fixture(scope="module") def tconf(tconf, request): oldSize = tconf.Max3PCBatchSize - oldTIme = tconf.Max3PCBatchWait - tconf.Max3PCBatchSize = 3 - tconf.Max3PCBatchWait = 5 + tconf.Max3PCBatchSize = 10 def reset(): tconf.Max3PCBatchSize = oldSize - tconf.Max3PCBatchWait = oldTIme request.addfinalizer(reset) return tconf diff --git a/plenum/test/batching_3pc/test_basic_batching.py b/plenum/test/batching_3pc/test_basic_batching.py index 4e2a83a680..d31272fa2a 100644 --- a/plenum/test/batching_3pc/test_basic_batching.py +++ b/plenum/test/batching_3pc/test_basic_batching.py @@ -31,8 +31,7 @@ def test3PCOverBatchWithThresholdReqs(tconf, looper, txnPoolNodeSet, client, :return: """ reqs = sendRandomRequests(wallet1, client, tconf.Max3PCBatchSize) - waitForSufficientRepliesForRequests(looper, client, requests=reqs, - customTimeoutPerReq=tconf.Max3PCBatchWait-1) + waitForSufficientRepliesForRequests(looper, client, requests=reqs) def test3PCOverBatchWithLessThanThresholdReqs(tconf, looper, txnPoolNodeSet, @@ -43,8 +42,7 @@ def test3PCOverBatchWithLessThanThresholdReqs(tconf, looper, txnPoolNodeSet, :return: """ reqs = sendRandomRequests(wallet1, client, tconf.Max3PCBatchSize - 1) - waitForSufficientRepliesForRequests(looper, client, requests=reqs, - customTimeoutPerReq=tconf.Max3PCBatchWait + 1) + waitForSufficientRepliesForRequests(looper, client, requests=reqs) def testTreeRootsCorrectAfterEachBatch(tconf, looper, txnPoolNodeSet, @@ -56,14 +54,12 @@ def testTreeRootsCorrectAfterEachBatch(tconf, looper, txnPoolNodeSet, """ # Send 1 batch reqs = sendRandomRequests(wallet1, client, tconf.Max3PCBatchSize) - waitForSufficientRepliesForRequests(looper, client, requests=reqs, - customTimeoutPerReq=tconf.Max3PCBatchWait) + waitForSufficientRepliesForRequests(looper, client, requests=reqs) checkNodesHaveSameRoots(txnPoolNodeSet) # Send 2 batches reqs = sendRandomRequests(wallet1, client, 2 * tconf.Max3PCBatchSize) - waitForSufficientRepliesForRequests(looper, client, requests=reqs, - customTimeoutPerReq=2*tconf.Max3PCBatchWait) + waitForSufficientRepliesForRequests(looper, client, requests=reqs) checkNodesHaveSameRoots(txnPoolNodeSet) @@ -90,11 +86,11 @@ def rejectingMethod(self, req): node.doDynamicValidation = types.MethodType(rejectingMethod, node) reqs = sendRandomRequests(wallet1, client, tconf.Max3PCBatchSize) - waitForSufficientRepliesForRequests(looper, client, requests=reqs[:-1], - customTimeoutPerReq=tconf.Max3PCBatchWait) + waitForSufficientRepliesForRequests(looper, client, requests=reqs[:-1]) + with pytest.raises(AssertionError): - waitForSufficientRepliesForRequests(looper, client, requests=reqs[-1:], - customTimeoutPerReq=tconf.Max3PCBatchWait) + waitForSufficientRepliesForRequests(looper, client, requests=reqs[-1:]) + for node in txnPoolNodeSet: looper.run(eventually(checkRejectWithReason, client, 'Simulated rejection', node.clientstack.name, diff --git a/plenum/test/batching_3pc/test_batch_rejection.py b/plenum/test/batching_3pc/test_batch_rejection.py index bc1fbb8346..9e63c95054 100644 --- a/plenum/test/batching_3pc/test_batch_rejection.py +++ b/plenum/test/batching_3pc/test_batch_rejection.py @@ -41,14 +41,10 @@ def badMethod(self, ledgerId): def reverted(setup, looper): pr, otherR, oldStateRoot = setup - def chkPps(n): - assert len(pr.batches) == n - def chkStateRoot(root): for r in [pr]+otherR: r.stateRootHash(DOMAIN_LEDGER_ID, toHex=False) == root - looper.run(eventually(chkPps, 1, retryWait=1, timeout=5)) looper.run(eventually(chkStateRoot, oldStateRoot)) diff --git a/plenum/test/batching_3pc/test_batching_scenarios.py b/plenum/test/batching_3pc/test_batching_scenarios.py index 99795a8c0c..b318986b97 100644 --- a/plenum/test/batching_3pc/test_batching_scenarios.py +++ b/plenum/test/batching_3pc/test_batching_scenarios.py @@ -44,8 +44,7 @@ def specificPrePrepares(wrappedMsg): reqs = sendRandomRequests(wallet1, client, (ppsToDelay+1)*tconf.Max3PCBatchSize) - waitForSufficientRepliesForRequests(looper, client, requests=reqs, - customTimeoutPerReq=(ppsToDelay + 1) * tconf.Max3PCBatchWait) + waitForSufficientRepliesForRequests(looper, client, requests=reqs) checkNodesHaveSameRoots(txnPoolNodeSet) for r in otherR: diff --git a/plenum/test/batching_3pc/test_client_requests.py b/plenum/test/batching_3pc/test_client_requests.py index cbf9bdc402..57b31dee97 100644 --- a/plenum/test/batching_3pc/test_client_requests.py +++ b/plenum/test/batching_3pc/test_client_requests.py @@ -1,6 +1,11 @@ +import pytest + + +@pytest.mark.skip(reason='INDY-96. Not implemented') def testClientRequestingStateProof(): - pass + raise NotImplementedError +@pytest.mark.skip(reason='INDY-96. Not implemented') def testClientRequestingStateVariableValue(): - pass + raise NotImplementedError diff --git a/plenum/test/checkpoints/conftest.py b/plenum/test/checkpoints/conftest.py index ecbeff66ec..749d1409f8 100644 --- a/plenum/test/checkpoints/conftest.py +++ b/plenum/test/checkpoints/conftest.py @@ -2,8 +2,7 @@ from plenum.test.pool_transactions.conftest import looper, clientAndWallet1, \ client1, wallet1, client1Connected - -CHK_FREQ = 5 +from plenum.test.batching_3pc.conftest import tconf @pytest.fixture(scope="module") @@ -11,8 +10,8 @@ def chkFreqPatched(tconf, request): oldChkFreq = tconf.CHK_FREQ oldLogSize = tconf.LOG_SIZE - tconf.CHK_FREQ = CHK_FREQ - tconf.LOG_SIZE = 3*tconf.CHK_FREQ + tconf.CHK_FREQ = 2 + tconf.LOG_SIZE = 2*tconf.CHK_FREQ def reset(): tconf.CHK_FREQ = oldChkFreq @@ -21,3 +20,13 @@ def reset(): request.addfinalizer(reset) return tconf + + +@pytest.fixture(scope="module") +def reqs_for_checkpoint(chkFreqPatched): + return chkFreqPatched.CHK_FREQ * chkFreqPatched.Max3PCBatchSize + + +@pytest.fixture(scope="module") +def reqs_for_logsize(chkFreqPatched): + return chkFreqPatched.LOG_SIZE * chkFreqPatched.Max3PCBatchSize diff --git a/plenum/test/checkpoints/test_basic_checkpointing.py b/plenum/test/checkpoints/test_basic_checkpointing.py index 6d752f3b8e..fad4eb4b8f 100644 --- a/plenum/test/checkpoints/test_basic_checkpointing.py +++ b/plenum/test/checkpoints/test_basic_checkpointing.py @@ -1,35 +1,38 @@ +import pytest + from stp_core.loop.eventually import eventually from plenum.test import waits -from plenum.test.checkpoints.conftest import CHK_FREQ from plenum.test.checkpoints.helper import chkChkpoints from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies def testCheckpointCreated(chkFreqPatched, looper, txnPoolNodeSet, client1, - wallet1, client1Connected): + wallet1, client1Connected, reqs_for_checkpoint): """ After requests less than `CHK_FREQ`, there should be one checkpoint on each replica. After `CHK_FREQ`, one checkpoint should become stable """ - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, CHK_FREQ-1, 1) + # Send one batch less so checkpoint is not created + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, + reqs_for_checkpoint-(chkFreqPatched.Max3PCBatchSize), 1) # Deliberately waiting so as to verify that not more than 1 checkpoint is # created looper.runFor(2) chkChkpoints(txnPoolNodeSet, 1) - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1, 1) + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, chkFreqPatched.Max3PCBatchSize, 1) timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, 0, retryWait=1, timeout=timeout)) def testOldCheckpointDeleted(chkFreqPatched, looper, txnPoolNodeSet, client1, - wallet1, client1Connected): + wallet1, client1Connected, reqs_for_checkpoint): """ Send requests more than twice of `CHK_FREQ`, there should be one new stable checkpoint on each replica. The old stable checkpoint should be removed """ - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 2*CHK_FREQ, + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 2*reqs_for_checkpoint, 1) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1, 1) diff --git a/plenum/test/checkpoints/test_discard_old_checkpoint_messages.py b/plenum/test/checkpoints/test_discard_old_checkpoint_messages.py index b954a02540..6bfd9d0441 100644 --- a/plenum/test/checkpoints/test_discard_old_checkpoint_messages.py +++ b/plenum/test/checkpoints/test_discard_old_checkpoint_messages.py @@ -1,6 +1,5 @@ from stp_core.loop.eventually import eventually from plenum.common.types import Checkpoint -from plenum.test.checkpoints.conftest import CHK_FREQ from plenum.test.checkpoints.helper import chkChkpoints from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, \ checkDiscardMsg @@ -8,8 +7,9 @@ def testDiscardCheckpointMsgForStableCheckpoint(chkFreqPatched, looper, txnPoolNodeSet, client1, - wallet1, client1Connected): - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, CHK_FREQ, 1) + wallet1, client1Connected, + reqs_for_checkpoint): + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, reqs_for_checkpoint, 1) looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, 0, retryWait=1)) node1 = txnPoolNodeSet[0] rep1 = node1.replicas[0] diff --git a/plenum/test/checkpoints/test_message_outside_watermark.py b/plenum/test/checkpoints/test_message_outside_watermark.py index 2bda8ea407..c2a0b87802 100644 --- a/plenum/test/checkpoints/test_message_outside_watermark.py +++ b/plenum/test/checkpoints/test_message_outside_watermark.py @@ -1,6 +1,4 @@ from plenum.test import waits -from plenum.test.checkpoints.conftest import CHK_FREQ -from plenum.test.checkpoints.helper import chkChkpoints from plenum.test.delayers import ppDelay from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, \ countDiscarded @@ -11,7 +9,8 @@ def testNonPrimaryRecvs3PhaseMessageOutsideWatermarks(chkFreqPatched, looper, txnPoolNodeSet, client1, - wallet1, client1Connected): + wallet1, client1Connected, + reqs_for_logsize): """ A node is slow in processing PRE-PREPAREs such that lot of requests happen and the slow node has started getting 3 phase messages outside of it @@ -22,7 +21,7 @@ def testNonPrimaryRecvs3PhaseMessageOutsideWatermarks(chkFreqPatched, looper, """ delay = 15 instId = 1 - reqsToSend = chkFreqPatched.LOG_SIZE + 2 + reqsToSend = reqs_for_logsize + 2 npr = getNonPrimaryReplicas(txnPoolNodeSet, instId) slowReplica = npr[0] slowNode = slowReplica.node diff --git a/plenum/test/checkpoints/test_message_outside_watermark1.py b/plenum/test/checkpoints/test_message_outside_watermark1.py index 537de33517..ea1b2f695d 100644 --- a/plenum/test/checkpoints/test_message_outside_watermark1.py +++ b/plenum/test/checkpoints/test_message_outside_watermark1.py @@ -1,3 +1,5 @@ +import math + from stp_core.loop.eventually import eventually from plenum.test import waits @@ -6,9 +8,13 @@ from plenum.test.test_node import getNonPrimaryReplicas, getPrimaryReplica +TestRunningTimeLimitSec = 300 + + def testPrimaryRecvs3PhaseMessageOutsideWatermarks(tconf, chkFreqPatched, looper, txnPoolNodeSet, client1, - wallet1, client1Connected): + wallet1, client1Connected, + reqs_for_logsize): """ One of the primary starts getting lot of requests, more than his log size and queues up requests since they will go beyond its watermarks. This @@ -16,9 +22,9 @@ def testPrimaryRecvs3PhaseMessageOutsideWatermarks(tconf, chkFreqPatched, looper Eventually this primary will send PRE-PREPARE for all requests and those requests will complete """ - delay = 5 + delay = 3 instId = 1 - reqsToSend = 2*chkFreqPatched.LOG_SIZE + 1 + reqsToSend = 2*reqs_for_logsize + 1 npr = getNonPrimaryReplicas(txnPoolNodeSet, instId) pr = getPrimaryReplica(txnPoolNodeSet, instId) from plenum.server.replica import TPCStat @@ -27,9 +33,14 @@ def testPrimaryRecvs3PhaseMessageOutsideWatermarks(tconf, chkFreqPatched, looper for r in npr: r.node.nodeIbStasher.delay(ppDelay(delay, instId)) + tm_exec_1_batch = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) + batch_count = math.ceil(reqsToSend / tconf.Max3PCBatchSize) + total_timeout = (tm_exec_1_batch + delay) * batch_count + def chk(): - assert orderedCount + reqsToSend == pr.stats.get(TPCStat.OrderSent) + assert orderedCount + batch_count == pr.stats.get(TPCStat.OrderSent) - print('Sending {} requests'.format(reqsToSend)) - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, reqsToSend, 1) - looper.run(eventually(chk, retryWait=1, timeout=tconf.TestRunningTimeLimitSec)) + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, reqsToSend, + 1, override_timeout_limit=True, + total_timeout=total_timeout) + looper.run(eventually(chk, retryWait=1, timeout=3)) diff --git a/plenum/test/checkpoints/test_stable_checkpoint.py b/plenum/test/checkpoints/test_stable_checkpoint.py index ad5db58ca9..0b98ad76aa 100644 --- a/plenum/test/checkpoints/test_stable_checkpoint.py +++ b/plenum/test/checkpoints/test_stable_checkpoint.py @@ -1,36 +1,37 @@ from stp_core.loop.eventually import eventually from plenum.test import waits -from plenum.test.checkpoints.conftest import CHK_FREQ from plenum.test.checkpoints.helper import chkChkpoints from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies -def checkRequestCounts(nodes, count): +def checkRequestCounts(nodes, req_count, cons_count): for node in nodes: - assert len(node.requests) == count + assert len(node.requests) == req_count for r in node.replicas: - assert len(r.commits) == count - assert len(r.prepares) == count + assert len(r.commits) == cons_count + assert len(r.prepares) == cons_count def testRequestOlderThanStableCheckpointRemoved(chkFreqPatched, looper, txnPoolNodeSet, client1, - wallet1, client1Connected): + wallet1, client1Connected, + reqs_for_checkpoint): reqs = sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, - CHK_FREQ-1, 1) + reqs_for_checkpoint - (chkFreqPatched.Max3PCBatchSize), 1) timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, retryWait=1, timeout=timeout)) - checkRequestCounts(txnPoolNodeSet, len(reqs)) - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1, 1) + checkRequestCounts(txnPoolNodeSet, len(reqs), chkFreqPatched.CHK_FREQ-1) + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, + chkFreqPatched.Max3PCBatchSize, 1) looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, 0, retryWait=1, timeout=timeout)) - checkRequestCounts(txnPoolNodeSet, 0) + checkRequestCounts(txnPoolNodeSet, 0, 0) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, - 3*CHK_FREQ + 1, 1) + reqs_for_checkpoint + 1, 1) looper.run(eventually(chkChkpoints, txnPoolNodeSet, 2, 0, retryWait=1, timeout=timeout)) - checkRequestCounts(txnPoolNodeSet, 1) + checkRequestCounts(txnPoolNodeSet, 1, 1) diff --git a/plenum/test/checkpoints/test_stable_checkpoint1.py b/plenum/test/checkpoints/test_stable_checkpoint1.py index 6492f47295..7d5f6ff26f 100644 --- a/plenum/test/checkpoints/test_stable_checkpoint1.py +++ b/plenum/test/checkpoints/test_stable_checkpoint1.py @@ -1,7 +1,6 @@ from stp_core.loop.eventually import eventually from plenum.test import waits -from plenum.test.checkpoints.conftest import CHK_FREQ from plenum.test.checkpoints.helper import chkChkpoints from plenum.test.delayers import ppDelay from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies @@ -10,7 +9,8 @@ def testStableCheckpointWhenOneInstanceSlow(chkFreqPatched, looper, txnPoolNodeSet, client1, - wallet1, client1Connected): + wallet1, client1Connected, + reqs_for_checkpoint): delay = 5 pr = getPrimaryReplica(txnPoolNodeSet, 1) slowNode = pr.node @@ -18,7 +18,7 @@ def testStableCheckpointWhenOneInstanceSlow(chkFreqPatched, looper, for n in otherNodes: n.nodeIbStasher.delay(ppDelay(delay, 1)) - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, CHK_FREQ, 1) + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, reqs_for_checkpoint, 1) timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) + delay looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, 0, retryWait=1, timeout=timeout)) diff --git a/plenum/test/cli/test_cli_with_bank_req_plugin.py b/plenum/test/cli/test_cli_with_bank_req_plugin.py index e9b48cb710..b9c9416d19 100644 --- a/plenum/test/cli/test_cli_with_bank_req_plugin.py +++ b/plenum/test/cli/test_cli_with_bank_req_plugin.py @@ -22,7 +22,6 @@ def testReqForNonExistentClient(cli, loadBankReqPlugin, createAllNodes): assertNoClient(cli) -# @pytest.mark.skipif('sys.platform == "win32"', reason='SOV-457') def testTransactions(cli, loadBankReqPlugin, createAllNodes, validNodeNames): nodeCount = len(validNodeNames) diff --git a/plenum/test/cli/test_new_steward.py b/plenum/test/cli/test_new_steward.py index ca61aff69c..57a662d29a 100644 --- a/plenum/test/cli/test_new_steward.py +++ b/plenum/test/cli/test_new_steward.py @@ -11,7 +11,7 @@ def testNewStewardKeysWithSeeds(cli, validNodeNames, createAllNodes): cli.enterCmd('new client {}'.format(cName)) printeds = cli.printeds cli.looper.runFor(8) - assert False + raise NotImplementedError @pytest.mark.skip(reason="SOV-545. Not implemented") diff --git a/plenum/test/conftest.py b/plenum/test/conftest.py index 970560f02a..e0a1ad2daa 100644 --- a/plenum/test/conftest.py +++ b/plenum/test/conftest.py @@ -41,7 +41,7 @@ from plenum.test.helper import randomOperation, \ checkReqAck, checkLastClientReqForNode, waitForSufficientRepliesForRequests, \ waitForViewChange, requestReturnedToNode, randomText, \ - mockGetInstalledDistributions, mockImportModule + mockGetInstalledDistributions, mockImportModule, chk_all_funcs from plenum.test.node_request.node_request_helper import checkPrePrepared, \ checkPropagated, checkPrepared, checkCommitted from plenum.test.plugin.helper import getPluginPath @@ -49,7 +49,7 @@ from plenum.test.test_node import TestNode, TestNodeSet, Pool, \ checkNodesConnected, ensureElectionsDone, genNodeReg -Logger.setLogLevel(logging.DEBUG) +Logger.setLogLevel(logging.NOTSET) logger = getlogger() config = getConfig() @@ -97,11 +97,13 @@ def keyfunc(_): @pytest.fixture(scope="function", autouse=True) -def limitTestRunningTime(tconf): +def limitTestRunningTime(request, tconf): st = time.time() yield runningTime = time.time() - st - if runningTime > tconf.TestRunningTimeLimitSec: + time_limit = getValueFromModule(request, "TestRunningTimeLimitSec", + tconf.TestRunningTimeLimitSec) + if runningTime > time_limit: pytest.fail( 'The running time of each test is limited by {} sec ' '(actually the test has taken {:2.1f} sec).\n' @@ -160,9 +162,6 @@ def getValueFromModule(request, name: str, default: Any = None): PLUGIN_BASE_DIR_PATH: testPluginBaseDirPath, PLUGIN_TYPE_STATS_CONSUMER: "stats_consumer" }, - 'EnsureLedgerDurability': False, - 'Max3PCBatchSize': 1, - 'DELTA': .8 } @@ -329,8 +328,10 @@ def ensureView(nodeSet, looper, up): @pytest.fixture("module") def delayed_perf_chk(nodeSet): + d = 20 for node in nodeSet: - node.delayCheckPerformance(20) + node.delayCheckPerformance(d) + return d @pytest.fixture(scope="module") @@ -375,17 +376,19 @@ def reqAcked1(looper, nodeSet, client1, sent1, faultyNodes): propTimeout = waits.expectedClientToPoolRequestDeliveryTime(numerOfNodes) coros = [partial(checkLastClientReqForNode, node, sent1) for node in nodeSet] - looper.run(eventuallyAll(*coros, - totalTimeout=propTimeout, - acceptableFails=faultyNodes)) + # looper.run(eventuallyAll(*coros, + # totalTimeout=propTimeout, + # acceptableFails=faultyNodes)) + chk_all_funcs(looper, coros, acceptable_fails=faultyNodes, timeout=propTimeout) # Wait until sufficient number of acks received coros2 = [partial(checkReqAck, client1, node, sent1.identifier, sent1.reqId) for node in nodeSet] ackTimeout = waits.expectedReqAckQuorumTime() - looper.run(eventuallyAll(*coros2, - totalTimeout=ackTimeout, - acceptableFails=faultyNodes)) + # looper.run(eventuallyAll(*coros2, + # totalTimeout=ackTimeout, + # acceptableFails=faultyNodes)) + chk_all_funcs(looper, coros2, acceptable_fails=faultyNodes, timeout=ackTimeout) return sent1 diff --git a/plenum/test/delayers.py b/plenum/test/delayers.py index 8ad4fffdad..351fc1935a 100644 --- a/plenum/test/delayers.py +++ b/plenum/test/delayers.py @@ -1,8 +1,9 @@ +import random from typing import Iterable from plenum.common.types import f, Propagate, PrePrepare, \ Prepare, Commit, InstanceChange, LedgerStatus, ConsistencyProof, CatchupReq, \ - Nomination, CatchupRep + Nomination, CatchupRep, Primary, Reelection from plenum.common.constants import OP_FIELD_NAME from plenum.common.util import getCallableName from plenum.test.test_client import TestClient @@ -63,29 +64,39 @@ def inner(action_pair): return inner -def nom_delay(delay: float): +def nom_delay(delay: float, inst_id=None, sender_filter: str=None): # Delayer of NOMINATE requests - return delayerMsgTuple(delay, Nomination) + return delayerMsgTuple(delay, Nomination, instFilter=inst_id, senderFilter=sender_filter) -def ppgDelay(delay: float): +def prim_delay(delay: float, inst_id=None, sender_filter: str=None): + # Delayer of PRIMARY requests + return delayerMsgTuple(delay, Primary, instFilter=inst_id, senderFilter=sender_filter) + + +def rel_delay(delay: float, inst_id=None, sender_filter: str=None): + # Delayer of REELECTION requests + return delayerMsgTuple(delay, Reelection, instFilter=inst_id, senderFilter=sender_filter) + + +def ppgDelay(delay: float, sender_filter: str=None): # Delayer of PROPAGATE requests - return delayerMsgTuple(delay, Propagate) + return delayerMsgTuple(delay, Propagate, senderFilter=sender_filter) -def ppDelay(delay: float, instId: int=None): +def ppDelay(delay: float, instId: int=None, sender_filter: str=None): # Delayer of PRE-PREPARE requests from a particular instance - return delayerMsgTuple(delay, PrePrepare, instFilter=instId) + return delayerMsgTuple(delay, PrePrepare, instFilter=instId, senderFilter=sender_filter) -def pDelay(delay: float, instId: int=None): +def pDelay(delay: float, instId: int=None, sender_filter: str=None): # Delayer of PREPARE requests from a particular instance - return delayerMsgTuple(delay, Prepare, instFilter=instId) + return delayerMsgTuple(delay, Prepare, instFilter=instId, senderFilter=sender_filter) -def cDelay(delay: float, instId: int=None): +def cDelay(delay: float, instId: int=None, sender_filter: str=None): # Delayer of COMMIT requests from a particular instance - return delayerMsgTuple(delay, Commit, instFilter=instId) + return delayerMsgTuple(delay, Commit, instFilter=instId, senderFilter=sender_filter) def icDelay(delay: float): @@ -141,4 +152,36 @@ def delayNonPrimaries(nodeSet, instId, delay): from plenum.test.test_node import getNonPrimaryReplicas nonPrimReps = getNonPrimaryReplicas(nodeSet, instId) for r in nonPrimReps: - r.node.nodeIbStasher.delay(ppDelay(delay, instId)) \ No newline at end of file + r.node.nodeIbStasher.delay(ppDelay(delay, instId)) + return nonPrimReps + + +def delay_messages(typ, nodes, inst_id, delay=None, min_delay=None, max_delay=None): + if typ == 'election': + delay_meths = (nom_delay, prim_delay, rel_delay) + elif typ == '3pc': + delay_meths = (ppDelay, pDelay, cDelay) + else: + RuntimeError('Unknown type') + assert delay is not None or (min_delay is not None and max_delay is not None) + for node in nodes: + if delay: + d = delay + else: + d = min_delay + random.randint(0, max_delay - min_delay) + for meth in delay_meths: + node.nodeIbStasher.delay(meth(d, inst_id)) + for other_node in [n for n in nodes if n != node]: + other_node.nodeIbStasher.delay(meth(d, inst_id, node.name)) + + +def delay_election_messages(nodes, inst_id, delay=None, min_delay=None, + max_delay=None): + # Delay election message + delay_messages('election', nodes, inst_id, delay, min_delay, max_delay) + + +def delay_3pc_messages(nodes, inst_id, delay=None, min_delay=None, + max_delay=None): + # Delay 3 phase commit message + delay_messages('3pc', nodes, inst_id, delay, min_delay, max_delay) diff --git a/plenum/test/helper.py b/plenum/test/helper.py index 0cde3fba16..52abd82e5e 100644 --- a/plenum/test/helper.py +++ b/plenum/test/helper.py @@ -86,7 +86,10 @@ def waitForSufficientRepliesForRequests(looper, requests = None, requestIds = None, fVal=None, - customTimeoutPerReq=None): + customTimeoutPerReq=None, + add_delay_to_timeout: float = 0, + override_timeout_limit=False, + total_timeout=None): """ Checks number of replies for given requests of specific client and raises exception if quorum not reached at least for one @@ -104,16 +107,17 @@ def waitForSufficientRepliesForRequests(looper, nodeCount = len(client.nodeReg) fVal = fVal or getMaxFailures(nodeCount) - timeoutPerRequest = customTimeoutPerReq or \ - waits.expectedTransactionExecutionTime(nodeCount) - - # here we try to take into account what timeout for execution - # N request - totalTimeout should be in - # timeoutPerRequest < totalTimeout < timeoutPerRequest * N - # we cannot just take (timeoutPerRequest * N) because it is so huge. - # (for timeoutPerRequest=5 and N=10, totalTimeout=50sec) - # lets start with some simple formula: - totalTimeout = (1 + len(requestIds) / 10) * timeoutPerRequest + if not total_timeout: + timeoutPerRequest = customTimeoutPerReq or \ + waits.expectedTransactionExecutionTime(nodeCount) + timeoutPerRequest += add_delay_to_timeout + # here we try to take into account what timeout for execution + # N request - total_timeout should be in + # timeoutPerRequest < total_timeout < timeoutPerRequest * N + # we cannot just take (timeoutPerRequest * N) because it is so huge. + # (for timeoutPerRequest=5 and N=10, total_timeout=50sec) + # lets start with some simple formula: + total_timeout = (1 + len(requestIds) / 10) * timeoutPerRequest coros = [] for requestId in requestIds: @@ -122,9 +126,13 @@ def waitForSufficientRepliesForRequests(looper, requestId, fVal)) - looper.run(eventuallyAll(*coros, - retryWait=1, - totalTimeout=totalTimeout)) + chk_all_funcs(looper, coros, retry_wait=1, timeout=total_timeout, + override_eventually_timeout=override_timeout_limit) + + # looper.run(eventuallyAll(*coros, + # retryWait=1, + # totalTimeout=total_timeout, + # override_timeout_limit=override_timeout_limit)) def sendReqsToNodesAndVerifySuffReplies(looper: Looper, @@ -132,14 +140,20 @@ def sendReqsToNodesAndVerifySuffReplies(looper: Looper, client: TestClient, numReqs: int, fVal: int=None, - customTimeoutPerReq: float=None): + customTimeoutPerReq: float=None, + add_delay_to_timeout: float=0, + override_timeout_limit=False, + total_timeout=None): nodeCount = len(client.nodeReg) fVal = fVal or getMaxFailures(nodeCount) requests = sendRandomRequests(wallet, client, numReqs) waitForSufficientRepliesForRequests(looper, client, requests=requests, + fVal=fVal, customTimeoutPerReq=customTimeoutPerReq, - fVal=fVal) + add_delay_to_timeout=add_delay_to_timeout, + override_timeout_limit=override_timeout_limit, + total_timeout=total_timeout) return requests @@ -324,12 +338,6 @@ def addNodeBack(nodeSet: TestNodeSet, return node -# def checkMethodCalled(node: TestNode, -# method: str, -# args: Tuple): -# assert node.spylog.getLastParams(method) == args - - def checkPropagateReqCountOfNode(node: TestNode, identifier: str, reqId: int): key = identifier, reqId assert key in node.requests @@ -365,13 +373,14 @@ def checkPrePrepareReqRecvd(replicas: Iterable[TestReplica], assert expectedRequest.reqIdr in [p['pp'].reqIdr for p in params] -def checkPrepareReqSent(replica: TestReplica, identifier: str, reqId: int): +def checkPrepareReqSent(replica: TestReplica, identifier: str, reqId: int, + view_no: int): paramsList = getAllArgs(replica, replica.canPrepare) rv = getAllReturnVals(replica, replica.canPrepare) assert [(identifier, reqId)] in \ - [p["ppReq"].reqIdr for p in paramsList] - idx = [p["ppReq"].reqIdr for p in paramsList].index([(identifier, reqId)]) + [p["ppReq"].reqIdr and p["ppReq"].viewNo == view_no for p in paramsList] + idx = [p["ppReq"].reqIdr for p in paramsList if p["ppReq"].viewNo == view_no].index([(identifier, reqId)]) assert rv[idx] @@ -599,10 +608,16 @@ def checkStateEquality(state1, state2): def check_seqno_db_equality(db1, db2): - assert db1.size == db2.size + assert db1.size == db2.size,\ + "{} != {}".format(db1.size, db2.size) assert {bytes(k): bytes(v) for k, v in db1._keyValueStorage.iter()} == \ {bytes(k): bytes(v) for k, v in db2._keyValueStorage.iter()} +def check_last_ordered_pp_seq_no(node1, node2): + master_replica_1 = node1.replicas[0] + master_replica_2 = node2.replicas[0] + assert master_replica_1.lastOrderedPPSeqNo == master_replica_2.lastOrderedPPSeqNo, \ + "{} != {}".format(master_replica_1.lastOrderedPPSeqNo, master_replica_2.lastOrderedPPSeqNo) def randomText(size): return ''.join(random.choice(string.ascii_letters) for _ in range(size)) @@ -685,4 +700,27 @@ def nodeByName(nodes, name): for node in nodes: if node.name == name: return node - raise Exception("Node with the name '{}' has not been found.".format(name)) \ No newline at end of file + raise Exception("Node with the name '{}' has not been found.".format(name)) + + +def chk_all_funcs(looper, funcs, acceptable_fails=0, retry_wait=None, + timeout=None, override_eventually_timeout=False): + # TODO: Move this logic to eventuallyAll + def chk(): + fails = 0 + for func in funcs: + try: + func() + except Exception: + fails += 1 + assert fails <= acceptable_fails + + kwargs = {} + if retry_wait: + kwargs['retryWait'] = retry_wait + if timeout: + kwargs['timeout'] = timeout + if override_eventually_timeout: + kwargs['override_timeout_limit'] = override_eventually_timeout + + looper.run(eventually(chk, **kwargs)) diff --git a/plenum/test/input_validation/fields_validation/__init__.py b/plenum/test/input_validation/fields_validation/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/plenum/test/input_validation/fields_validation/test_base58_field.py b/plenum/test/input_validation/fields_validation/test_base58_field.py new file mode 100644 index 0000000000..7ed9f92861 --- /dev/null +++ b/plenum/test/input_validation/fields_validation/test_base58_field.py @@ -0,0 +1,34 @@ +import pytest +import string +from plenum.common.messages.fields import Base58Field +from plenum.common.util import randomString +from plenum.test.input_validation.utils import * + + +LENGTH_LONG_MIN = 43 +LENGTH_LONG_MAX = 46 +LENGTH_SHORT_MIN = 15 +LENGTH_SHORT_MAX = 26 + +valid_base58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyzzzzaaaaa' +validator = Base58Field(short=True, long=True) + + +def test_valid_base58(): + assert not validator.validate(valid_base58[:LENGTH_LONG_MIN]) + assert not validator.validate(valid_base58[:LENGTH_SHORT_MIN]) + + +def test_empty_string(): + assert validator.validate('') + + +def test_wrong_lengths(): + assert validator.validate(valid_base58[:LENGTH_LONG_MIN - 1]) + assert validator.validate(valid_base58[:LENGTH_LONG_MAX + 1]) + assert validator.validate(valid_base58[:LENGTH_SHORT_MIN - 1]) + assert validator.validate(valid_base58[:LENGTH_SHORT_MAX + 1]) + + +def test_invalid_symbol(): + assert validator.validate(valid_base58[:LENGTH_LONG_MIN - 1] + '0') diff --git a/plenum/test/input_validation/fields_validation/test_hex_field.py b/plenum/test/input_validation/fields_validation/test_hex_field.py new file mode 100644 index 0000000000..13d32e37de --- /dev/null +++ b/plenum/test/input_validation/fields_validation/test_hex_field.py @@ -0,0 +1,22 @@ +import pytest +from plenum.common.messages.fields import HexField + +valid_hex_hash = "0123456789abcdefABCDEF" +validator = HexField(length=len(valid_hex_hash)) + + +def test_valid_hex(): + assert not validator.validate(valid_hex_hash) + + +def test_empty_string(): + assert validator.validate('') + + +def test_invalid_length(): + assert validator.validate(valid_hex_hash[:-1]) + assert validator.validate(valid_hex_hash + "0") + + +def test_invalid_symbol(): + assert validator.validate(valid_hex_hash[:-1] + 'X') diff --git a/plenum/test/input_validation/fields_validation/test_identifier_field.py b/plenum/test/input_validation/fields_validation/test_identifier_field.py new file mode 100644 index 0000000000..0e7c726850 --- /dev/null +++ b/plenum/test/input_validation/fields_validation/test_identifier_field.py @@ -0,0 +1,29 @@ +import pytest +from plenum.common.messages.fields import IdentifierField + +validator = IdentifierField() + +valid_chars = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz" + +MIN_LENGTH_SHORT = 15 +MAX_LENGTH_SHORT = 25 +MIN_LENGTH_LONG = 43 +MAX_LENGTH_LONG = 45 + + +def test_valid_identifiers(): + all_valid_length = \ + list(range(MIN_LENGTH_SHORT, MAX_LENGTH_SHORT + 1)) + \ + list(range(MIN_LENGTH_LONG, MAX_LENGTH_LONG + 1)) + for length in all_valid_length: + assert not validator.validate(valid_chars[:length]) + + +def test_invalid_char(): + invalid_identifier = valid_chars[:MIN_LENGTH_SHORT - 1] + "0" + assert validator.validate(invalid_identifier) + + +def test_invalid_length(): + invalid_identifier = valid_chars[:MIN_LENGTH_SHORT - 1] + assert validator.validate(invalid_identifier) diff --git a/plenum/test/input_validation/fields_validation/test_iterable_field.py b/plenum/test/input_validation/fields_validation/test_iterable_field.py new file mode 100644 index 0000000000..1b8bf3a00a --- /dev/null +++ b/plenum/test/input_validation/fields_validation/test_iterable_field.py @@ -0,0 +1,15 @@ +import pytest +from plenum.common.messages.fields import IterableField, NonNegativeNumberField + + +def test_invalid_inner_type(): + with pytest.raises(Exception): + IterableField(None) + with pytest.raises(Exception): + IterableField({}) + + +def test_valid_inner_type(): + validator = IterableField(NonNegativeNumberField()) + assert not validator.validate([1,2,3]) + assert validator.validate([1, 2, -3]) diff --git a/plenum/test/input_validation/fields_validation/test_ledger_id_field.py b/plenum/test/input_validation/fields_validation/test_ledger_id_field.py new file mode 100644 index 0000000000..431aeb862d --- /dev/null +++ b/plenum/test/input_validation/fields_validation/test_ledger_id_field.py @@ -0,0 +1,15 @@ +import pytest +from plenum.common.messages.fields import LedgerIdField +from plenum.common.types import POOL_LEDGER_ID, DOMAIN_LEDGER_ID + +validator = LedgerIdField() + + +def test_valid_ledger_id(): + assert not validator.validate(POOL_LEDGER_ID) + assert not validator.validate(DOMAIN_LEDGER_ID) + + +def test_invalid_ledger_id(): + not_existing_ledger = 100 + assert validator.validate(not_existing_ledger) diff --git a/plenum/test/input_validation/fields_validation/test_merkle_tree_root_field.py b/plenum/test/input_validation/fields_validation/test_merkle_tree_root_field.py new file mode 100644 index 0000000000..6f5b2b80c3 --- /dev/null +++ b/plenum/test/input_validation/fields_validation/test_merkle_tree_root_field.py @@ -0,0 +1,26 @@ +import pytest +from plenum.common.messages.fields import MerkleRootField + +LENGTH_MIN = 43 +LENGTH_MAX = 45 + +valid_merkle_root = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz' +validator = MerkleRootField() + + +def test_valid_merkle_root(): + assert not validator.validate(valid_merkle_root[:LENGTH_MIN]) + assert not validator.validate(valid_merkle_root[:LENGTH_MAX]) + + +def test_empty_string(): + assert validator.validate('') + + +def test_wrong_lengths(): + assert validator.validate(valid_merkle_root[:LENGTH_MIN - 1]) + assert validator.validate(valid_merkle_root[:LENGTH_MAX + 1]) + + +def test_invalid_symbol(): + assert validator.validate(valid_merkle_root[:LENGTH_MIN - 1] + '0') diff --git a/plenum/test/input_validation/fields_validation/test_non_empty_string_field.py b/plenum/test/input_validation/fields_validation/test_non_empty_string_field.py new file mode 100644 index 0000000000..1f70f6c5b6 --- /dev/null +++ b/plenum/test/input_validation/fields_validation/test_non_empty_string_field.py @@ -0,0 +1,12 @@ +import pytest +from plenum.common.messages.fields import NonEmptyStringField + +validator = NonEmptyStringField() + + +def test_non_empty_string(): + assert not validator.validate("x") + + +def test_empty_string(): + assert validator.validate("") diff --git a/plenum/test/input_validation/fields_validation/test_non_negative_number_field.py b/plenum/test/input_validation/fields_validation/test_non_negative_number_field.py new file mode 100644 index 0000000000..bc18490e75 --- /dev/null +++ b/plenum/test/input_validation/fields_validation/test_non_negative_number_field.py @@ -0,0 +1,20 @@ +import pytest +from plenum.common.messages.fields import NonNegativeNumberField + +validator = NonNegativeNumberField() + + +def test_positive_number(): + assert not validator.validate(1) + + +def test_negative_number(): + assert validator.validate(-1) + + +def test_zero_number(): + assert not validator.validate(0) + + +def test_not_accepts_floats(): + assert validator.validate(1.5) diff --git a/plenum/test/input_validation/fields_validation/test_request_identifier_field.py b/plenum/test/input_validation/fields_validation/test_request_identifier_field.py new file mode 100644 index 0000000000..29622ca4dd --- /dev/null +++ b/plenum/test/input_validation/fields_validation/test_request_identifier_field.py @@ -0,0 +1,49 @@ +import pytest +from plenum.common.messages.fields import RequestIdentifierField + +validator = RequestIdentifierField() + +valid_client_id_chars = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz" + +MIN_LENGTH_SHORT = 15 +MAX_LENGTH_SHORT = 25 +MIN_LENGTH_LONG = 43 +MAX_LENGTH_LONG = 45 + +# Request id consists of client identifier (base56 string 16/32 long) and +# some number (for now it is current timestamp, but can be any number) +valid_request_id = (valid_client_id_chars[:MIN_LENGTH_SHORT], 11111) + + +def test_valid_request_id(): + all_valid_length = \ + list(range(MIN_LENGTH_SHORT, MAX_LENGTH_SHORT + 1)) + \ + list(range(MIN_LENGTH_LONG, MAX_LENGTH_LONG + 1)) + for length in all_valid_length: + assert not validator.validate((valid_client_id_chars[:length], 11111)) + + +def test_invalid_order(): + s, t = valid_request_id + assert validator.validate((t, s)) + + +def test_empty_client_id(): + assert validator.validate(("", valid_request_id[1])) + assert validator.validate((None, valid_request_id[1])) + + +def test_empty_number(): + assert validator.validate((valid_request_id[0], None)) + + +def test_invalid_char(): + invalid_client_id = valid_request_id[0][:-1] + "0" + invalid_request = (invalid_client_id, valid_request_id[1]) + assert validator.validate(invalid_request) + + +def test_invalid_length(): + invalid_client_id = valid_request_id[:-1] + invalid_request = (invalid_client_id, valid_request_id[1]) + assert validator.validate(invalid_request) diff --git a/plenum/test/input_validation/fields_validation/test_time_among_field.py b/plenum/test/input_validation/fields_validation/test_time_among_field.py new file mode 100644 index 0000000000..3d60309417 --- /dev/null +++ b/plenum/test/input_validation/fields_validation/test_time_among_field.py @@ -0,0 +1,17 @@ +import pytest +from plenum.common.messages.fields import TieAmongField + +validator = TieAmongField() + + +def test_valid(): + assert not validator.validate(("Node1:0", 1)) + assert not validator.validate(("Node1:0", 0)) + + +def test_invalid_vote_number(): + assert validator.validate(("Node1:0", -1)) + + +def test_empty_node_id(): + assert validator.validate(("", 1)) diff --git a/plenum/test/input_validation/fields_validation/test_timestamp_field.py b/plenum/test/input_validation/fields_validation/test_timestamp_field.py new file mode 100644 index 0000000000..a763841f98 --- /dev/null +++ b/plenum/test/input_validation/fields_validation/test_timestamp_field.py @@ -0,0 +1,20 @@ +import pytest +from plenum.common.messages.fields import TimestampField +from datetime import datetime + +validator = TimestampField() +timestamp = datetime.now().timestamp() + + +def test_valid_value(): + assert not validator.validate(timestamp) + + # This is needed because timestamp is usually multiplied + # by 1000 to "make it compatible to JavaScript Date()" + assert not validator.validate(round(timestamp * 1000)) + + +def test_invalid_value(): + assert validator.validate(-1) + + diff --git a/plenum/test/input_validation/utils.py b/plenum/test/input_validation/utils.py new file mode 100644 index 0000000000..4b6c7b7b00 --- /dev/null +++ b/plenum/test/input_validation/utils.py @@ -0,0 +1,8 @@ + + +def assert_valid(smth): + assert not smth + + +def assert_invalid(smth): + assert smth \ No newline at end of file diff --git a/plenum/test/instances/test_pre_prepare_digest.py b/plenum/test/instances/test_pre_prepare_digest.py index e908bacdf9..8f88f97971 100644 --- a/plenum/test/instances/test_pre_prepare_digest.py +++ b/plenum/test/instances/test_pre_prepare_digest.py @@ -43,10 +43,15 @@ def chkSusp(): for r in nonPrimaryReps: # Every node with non primary replicas of instance 0 should raise # suspicion + susp_code = Suspicions.PPR_DIGEST_WRONG.code + # Since the node sending bad requests might become primary of + # some backup instance after view changes, it will again send a + # PRE-PREPARE with incorrect digest, so other nodes might raise + # suspicion more than once assert len(getNodeSuspicions(r.node, - Suspicions.PPR_DIGEST_WRONG.code)) == 1 + susp_code)) >= 1 # No non primary replica should send any PREPARE - assert len(sentPrepare(r)) == 0 + assert len(sentPrepare(r, viewNo=0, ppSeqNo=1)) == 0 numOfNodes = len(primaryRep.node.nodeReg) timeout = waits.expectedTransactionExecutionTime(numOfNodes) diff --git a/plenum/test/malicious_behaviors_node.py b/plenum/test/malicious_behaviors_node.py index c47d679e17..26228097e5 100644 --- a/plenum/test/malicious_behaviors_node.py +++ b/plenum/test/malicious_behaviors_node.py @@ -14,7 +14,8 @@ from stp_core.common.log import getlogger from plenum.server.replica import TPCStat from plenum.test.helper import TestReplica -from plenum.test.test_node import TestNode, TestReplica, getPrimaryReplica +from plenum.test.test_node import TestNode, TestReplica, getPrimaryReplica, \ + getNonPrimaryReplicas from plenum.test.delayers import ppDelay logger = getlogger() @@ -181,12 +182,20 @@ def newGenerateReply(self, viewNo: int, req: Request) -> Reply: node.generateReply = types.MethodType(newGenerateReply, node) -def slow_primary(nodes, instId=0, delay=5): +def slow_primary(nodes, inst_id=0, delay=5): # make primary replica slow to send PRE-PREPAREs def ifPrePrepare(msg): if isinstance(msg, PrePrepare): return delay - pr = getPrimaryReplica(nodes, instId) + pr = getPrimaryReplica(nodes, inst_id) pr.outBoxTestStasher.delay(ifPrePrepare) return pr + + +def slow_non_primary(nodes, inst_id=0, delay=5): + # make non-primary replica slow to receive PRE-PREPAREs + npr = getNonPrimaryReplicas(nodes, inst_id)[0] + slow_node = npr.node + slow_node.nodeIbStasher.delay(ppDelay(delay, inst_id)) + return npr diff --git a/plenum/test/monitoring/test_monitor_reconnection.py b/plenum/test/monitoring/test_monitor_reconnection.py index 5453d5a08d..e53efb5ef3 100644 --- a/plenum/test/monitoring/test_monitor_reconnection.py +++ b/plenum/test/monitoring/test_monitor_reconnection.py @@ -1,3 +1,7 @@ +import pytest + + +@pytest.mark.skip(reason='INDY-98. Not implemented') def testMonitorReconnects(): #TODO: Add test to confirm monitor successfully reconnects if it loses connection to stats server at some point - pass + raise NotImplementedError diff --git a/plenum/test/node_catchup/helper.py b/plenum/test/node_catchup/helper.py index 2694f70c6e..5ae469a367 100644 --- a/plenum/test/node_catchup/helper.py +++ b/plenum/test/node_catchup/helper.py @@ -5,7 +5,7 @@ from stp_core.loop.eventually import eventually from stp_core.types import HA from plenum.test.helper import checkLedgerEquality, checkStateEquality, \ - check_seqno_db_equality, assertEquality + check_seqno_db_equality, assertEquality, check_last_ordered_pp_seq_no from plenum.test.test_client import TestClient from plenum.test.test_node import TestNode from plenum.test import waits @@ -21,6 +21,7 @@ def checkNodeDataForEquality(node: TestNode, *otherNodes: Iterable[TestNode]): # Checks for node's ledgers and state's to be equal for n in otherNodes: + check_last_ordered_pp_seq_no(node, n) check_seqno_db_equality(node.seqNoDB, n.seqNoDB) checkLedgerEquality(node.domainLedger, n.domainLedger) checkStateEquality(node.getState(DOMAIN_LEDGER_ID), n.getState(DOMAIN_LEDGER_ID)) diff --git a/plenum/test/node_catchup/test_catchup_demoted.py b/plenum/test/node_catchup/test_catchup_demoted.py new file mode 100644 index 0000000000..fcd12c0b7d --- /dev/null +++ b/plenum/test/node_catchup/test_catchup_demoted.py @@ -0,0 +1,46 @@ +from plenum.common.constants import ALIAS, SERVICES, VALIDATOR +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies +from plenum.test.node_catchup.conftest import whitelist +from plenum.test.node_catchup.helper import waitNodeDataEquality, \ + checkNodeDataForUnequality, checkNodeDataForEquality +from plenum.test.pool_transactions.helper import \ + updateNodeData +from stp_core.common.log import getlogger + +# Logger.setLogLevel(logging.WARNING) +logger = getlogger() + + +def test_catch_up_after_demoted(txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns): + # 1. add a new node after sending some txns and check that catch-up is done (the new node is up to date) + looper, newNode, client, wallet, newStewardClient, newStewardWallet = nodeSetWithNodeAddedAfterSomeTxns + waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:4]) + + # 2. turn the new node off (demote) + node_data = { + ALIAS: newNode.name, + SERVICES: [] + } + updateNodeData(looper, newStewardClient, + newStewardWallet, newNode, + node_data) + + # 3. send more requests, so that the new node's state is outdated + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5) + checkNodeDataForUnequality(newNode, *txnPoolNodeSet[:-1]) + + # 4. turn the new node on + node_data = { + ALIAS: newNode.name, + SERVICES: [VALIDATOR] + } + updateNodeData(looper, newStewardClient, + newStewardWallet, newNode, + node_data) + + # 5. make sure catch-up is done (the new node is up to date again) + waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1]) + + # 6. send more requests and make sure that the new node participates in processing them + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 10) + checkNodeDataForEquality(newNode, *txnPoolNodeSet[:-1]) diff --git a/plenum/test/node_catchup/test_discard_view_no.py b/plenum/test/node_catchup/test_discard_view_no.py index ede4dffd4f..197a984824 100644 --- a/plenum/test/node_catchup/test_discard_view_no.py +++ b/plenum/test/node_catchup/test_discard_view_no.py @@ -6,20 +6,18 @@ from stp_core.loop.eventually import eventually from plenum.common.types import Nomination, PrePrepare from plenum.common.util import randomString -from plenum.test.delayers import delayNonPrimaries -from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, \ - waitForViewChange, checkDiscardMsg +from plenum.common.constants import DOMAIN_LEDGER_ID +from plenum.test.helper import checkDiscardMsg +from plenum.test.view_change.helper import ensure_view_change from plenum.test.node_catchup.helper import waitNodeDataEquality from plenum.test.pool_transactions.helper import addNewStewardAndNode from plenum.test.test_node import checkNodesConnected, \ - checkProtocolInstanceSetup + checkProtocolInstanceSetup, getPrimaryReplica from plenum.test import waits whitelist = ['found legacy entry'] # warnings - -@pytest.mark.skip(reason='SOV-456') def testNodeDiscardMessageFromUnknownView(txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns, newNodeCaughtUp, tdirWithPoolTxns, @@ -32,11 +30,12 @@ def testNodeDiscardMessageFromUnknownView(txnPoolNodeSet, looper, nodeX, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns viewNo = nodeX.viewNo - # Delay processing of PRE-PREPARE from all non primary replicas of master - # so master's performance falls and view changes - delayNonPrimaries(txnPoolNodeSet, 0, 10) - sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 4) - waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=viewNo+1) + # Force two view changes: node discards msgs which have viewNo + # at least two less than node's. Current protocol implementation + # needs to hold messages from the previous view as well as + # from the current view. + ensure_view_change(looper, txnPoolNodeSet, client, wallet) + ensure_view_change(looper, txnPoolNodeSet, client, wallet) newStewardName = "testClientSteward" + randomString(3) nodeName = "Theta" @@ -50,23 +49,25 @@ def testNodeDiscardMessageFromUnknownView(txnPoolNodeSet, looper.run(checkNodesConnected(txnPoolNodeSet)) looper.run(client.ensureConnectedToNodes()) waitNodeDataEquality(looper, nodeTheta, *txnPoolNodeSet[:-1]) - checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1, - timeout=10) + checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) electMsg = Nomination(nodeX.name, 0, viewNo, nodeX.replicas[0].lastOrderedPPSeqNo) + + primaryRepl = getPrimaryReplica(txnPoolNodeSet) threePMsg = PrePrepare( 0, viewNo, 10, - wallet.defaultId, - wallet._getIdData().lastReqId+1, + time.time(), + [[wallet.defaultId, wallet._getIdData().lastReqId+1]], + 1, "random digest", - time.time() + DOMAIN_LEDGER_ID, + primaryRepl.stateRootHash(DOMAIN_LEDGER_ID), + primaryRepl.txnRootHash(DOMAIN_LEDGER_ID), ) ridTheta = nodeX.nodestack.getRemote(nodeTheta.name).uid nodeX.send(electMsg, ridTheta) - nodeX.send(threePMsg, ridTheta) - nodeX.send(electMsg, ridTheta) messageTimeout = waits.expectedNodeToNodeMessageDeliveryTime() looper.run(eventually(checkDiscardMsg, [nodeTheta, ], electMsg, diff --git a/plenum/test/node_catchup/test_new_node_catchup.py b/plenum/test/node_catchup/test_new_node_catchup.py index 6d5172fb12..00f930b46e 100644 --- a/plenum/test/node_catchup/test_new_node_catchup.py +++ b/plenum/test/node_catchup/test_new_node_catchup.py @@ -69,7 +69,7 @@ def testDelayedLedgerStatusNotChangingState(): `participating` mode, the mode should not change to `discovered` if found the arriving `LedgerStatus` to be ok. """ - pass + raise NotImplementedError # TODO: This test passes but it is observed that PREPAREs are not received at diff --git a/plenum/test/node_catchup/test_node_catchup_after_disconnect.py b/plenum/test/node_catchup/test_node_catchup_after_disconnect.py index b5e37d2d69..4d262fdb89 100644 --- a/plenum/test/node_catchup/test_node_catchup_after_disconnect.py +++ b/plenum/test/node_catchup/test_node_catchup_after_disconnect.py @@ -1,6 +1,6 @@ from stp_core.common.log import getlogger from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies -from plenum.test.node_catchup.helper import waitNodeDataEquality, waitNodeDataUnequality +from plenum.test.node_catchup.helper import waitNodeDataEquality, waitNodeDataUnequality, checkNodeDataForEquality from plenum.test.pool_transactions.helper import disconnect_node_and_ensure_disconnected, reconnect_node_and_ensure_connected # Do not remove the next import @@ -19,17 +19,25 @@ def testNodeCatchupAfterDisconnect(newNodeCaughtUp, txnPoolNodeSet, :return: """ looper, newNode, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns + logger.debug("Stopping node {} with pool ledger size {}". format(newNode, newNode.poolManager.txnSeqNo)) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, newNode, stopNode=False) looper.removeProdable(newNode) + # TODO: Check if the node has really stopped processing requests? logger.debug("Sending requests") sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5) # Make sure new node got out of sync waitNodeDataUnequality(looper, newNode, *txnPoolNodeSet[:-1]) + logger.debug("Starting the stopped node, {}".format(newNode)) looper.add(newNode) reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, newNode) + logger.debug("Waiting for the node to catch up, {}".format(newNode)) waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1]) + + logger.debug("Sending more requests") + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 10) + checkNodeDataForEquality(newNode, *txnPoolNodeSet[:-1]) diff --git a/plenum/test/node_request/node_request_helper.py b/plenum/test/node_request/node_request_helper.py index 5919d9da74..bffdd3b369 100644 --- a/plenum/test/node_request/node_request_helper.py +++ b/plenum/test/node_request/node_request_helper.py @@ -4,10 +4,12 @@ from stp_core.loop.eventually import eventuallyAll from plenum.common.types import PrePrepare, OPERATION, f from plenum.common.constants import DOMAIN_LEDGER_ID +from plenum.common.types import OPERATION, f from plenum.common.util import getMaxFailures from plenum.server.node import Node from plenum.server.replica import Replica from plenum.test import waits +from plenum.test.helper import chk_all_funcs from plenum.test.spy_helpers import getAllArgs from plenum.test.test_node import TestNode, getNonPrimaryReplicas, \ getAllReplicas, getPrimaryReplica @@ -40,10 +42,8 @@ def g(node: TestNode): numOfMsgsWithFaults) timeout = waits.expectedPropagateTime(len(nodeSet)) - coros = [partial(g, node) for node in nodeSet] - looper.run(eventuallyAll(*coros, - totalTimeout=timeout, - acceptableFails=faultyNodes)) + funcs = [partial(g, node) for node in nodeSet] + chk_all_funcs(looper, funcs, faultyNodes, timeout) def checkPrePrepared(looper, @@ -167,9 +167,10 @@ def nonPrimaryReceivesCorrectNumberOfPREPREPAREs(): primarySentsCorrectNumberOfPREPREPAREs() nonPrimaryReceivesCorrectNumberOfPREPREPAREs() - coros = [partial(g, instId) for instId in instIds] + funcs = [partial(g, instId) for instId in instIds] # TODO Select or create the timeout from 'waits'. Don't use constant. - looper.run(eventuallyAll(*coros, retryWait=1, totalTimeout=timeout)) + # looper.run(eventuallyAll(*coros, retryWait=1, totalTimeout=timeout)) + chk_all_funcs(looper, funcs, faultyNodes, timeout) def checkPrepared(looper, nodeSet, preprepared1, instIds, faultyNodes=0, @@ -275,9 +276,10 @@ def nonPrimaryReplicasReceiveCorrectNumberOfPREPAREs(): primaryReceivesCorrectNumberOfPREPAREs() nonPrimaryReplicasReceiveCorrectNumberOfPREPAREs() - coros = [partial(g, instId) for instId in instIds] + funcs = [partial(g, instId) for instId in instIds] # TODO Select or create the timeout from 'waits'. Don't use constant. - looper.run(eventuallyAll(*coros, retryWait=1, totalTimeout=timeout)) + # looper.run(eventuallyAll(*coros, retryWait=1, totalTimeout=timeout)) + chk_all_funcs(looper, funcs, faultyNodes, timeout) def checkCommitted(looper, nodeSet, prepared1, instIds, faultyNodes=0): @@ -289,6 +291,8 @@ def g(instId): allReplicas = getAllReplicas(nodeSet, instId) primaryReplica = getPrimaryReplica(nodeSet, instId) + # Question: Why 2 checks are being made, one with the data structure + # and then the spylog def replicasSeesCorrectNumOfCOMMITs(): """ num of commit messages must be = n when zero fault; @@ -343,9 +347,10 @@ def replicasReceivesCorrectNumberOfCOMMITs(): replicasReceivesCorrectNumberOfCOMMITs() replicasSeesCorrectNumOfCOMMITs() - coros = [partial(g, instId) for instId in instIds] + funcs = [partial(g, instId) for instId in instIds] # TODO Select or create the timeout from 'waits'. Don't use constant. - looper.run(eventuallyAll(*coros, retryWait=1, totalTimeout=timeout)) + # looper.run(eventuallyAll(*coros, retryWait=1, totalTimeout=timeout)) + chk_all_funcs(looper, funcs, faultyNodes, timeout) def msgCountOK(nodesSize, diff --git a/plenum/test/node_request/test_commit/test_num_of_commit_with_f_plus_one_faults.py b/plenum/test/node_request/test_commit/test_num_of_commit_with_f_plus_one_faults.py index d2f6059a62..e0eba05d8a 100644 --- a/plenum/test/node_request/test_commit/test_num_of_commit_with_f_plus_one_faults.py +++ b/plenum/test/node_request/test_commit/test_num_of_commit_with_f_plus_one_faults.py @@ -38,8 +38,9 @@ def afterElection(setup, up): def testNumOfCommitMsgsWithFPlusOneFaults(afterElection, looper, nodeSet, prepared1, noRetryReq): with pytest.raises(AssertionError): + # To raise an error pass less than the actual number of faults checkCommitted(looper, nodeSet, prepared1, range(getNoInstances(len(nodeSet))), - faultyNodes) + faultyNodes-1) diff --git a/plenum/test/node_request/test_pre_prepare/test_non_primary_sends_a_pre_prepare.py b/plenum/test/node_request/test_pre_prepare/test_non_primary_sends_a_pre_prepare.py index 0b42be3f0d..1fc53f94be 100644 --- a/plenum/test/node_request/test_pre_prepare/test_non_primary_sends_a_pre_prepare.py +++ b/plenum/test/node_request/test_pre_prepare/test_non_primary_sends_a_pre_prepare.py @@ -42,7 +42,7 @@ def testNonPrimarySendsAPrePrepare(looper, nodeSet, setup, propagated1): remainingNpr = nonPrimaryReplicas[1:] def sendPrePrepareFromNonPrimary(): - firstNpr.requestQueues[DOMAIN_LEDGER_ID].append(propagated1) + firstNpr.requestQueues[DOMAIN_LEDGER_ID].add(propagated1) ppReq = firstNpr.create3PCBatch(DOMAIN_LEDGER_ID) firstNpr.sendPrePrepare(ppReq) return ppReq diff --git a/plenum/test/node_request/test_request_forwarding.py b/plenum/test/node_request/test_request_forwarding.py new file mode 100644 index 0000000000..4bf2ba254a --- /dev/null +++ b/plenum/test/node_request/test_request_forwarding.py @@ -0,0 +1,62 @@ +import pytest + +from plenum.common.constants import DOMAIN_LEDGER_ID +from plenum.test import waits +from plenum.test.delayers import nom_delay, delay_3pc_messages +from plenum.test.helper import sendRandomRequests, \ + waitForSufficientRepliesForRequests +from plenum.test.batching_3pc.conftest import tconf +from plenum.test.pool_transactions.conftest import looper, clientAndWallet1, \ + client1, wallet1, client1Connected +from plenum.test.test_node import ensureElectionsDone +from plenum.test.view_change.helper import ensure_view_change +from stp_core.loop.eventually import eventually + +@pytest.mark.skip(reason="INDY-147") +def test_all_replicas_hold_request_keys(looper, txnPoolNodeSet, client1, + wallet1, client1Connected, tconf): + """ + All replicas whether primary or non primary hold request keys of forwarded + requests. Once requests are ordered, they request keys are removed from replica. + """ + delay_3pc_messages(txnPoolNodeSet, 0, 2) + delay_3pc_messages(txnPoolNodeSet, 1, 2) + + def chk(count): + # All replicas have same amount of forwarded request keys and all keys + # are finalised. + for node in txnPoolNodeSet: + for r in node.replicas: + if r.isPrimary is False: + assert len(r.requestQueues[DOMAIN_LEDGER_ID]) == count + for i in range(count): + k = r.requestQueues[DOMAIN_LEDGER_ID][i] + assert r.requests[k].finalised + elif r.isPrimary is True: + assert len(r.requestQueues[DOMAIN_LEDGER_ID]) == 0 + + reqs = sendRandomRequests(wallet1, client1, tconf.Max3PCBatchSize - 1) + # Only non primary replicas should have all request keys with them + looper.run(eventually(chk, tconf.Max3PCBatchSize - 1)) + waitForSufficientRepliesForRequests(looper, client1, requests=reqs, + add_delay_to_timeout=2) + # Replicas should have no request keys with them since they are ordered + looper.run(eventually(chk, 0)) # Need to wait since one node might not + # have processed it. + + delay = 1 + for node in txnPoolNodeSet: + node.nodeIbStasher.delay(nom_delay(delay)) + + ensure_view_change(looper, txnPoolNodeSet, client1, wallet1) + reqs = sendRandomRequests(wallet1, client1, 2 * tconf.Max3PCBatchSize) + looper.run(eventually(chk, 2 * tconf.Max3PCBatchSize)) + + # Since each nomination is delayed and there will be multiple nominations + # so adding some extra time + timeout = waits.expectedPoolElectionTimeout(len(txnPoolNodeSet)) + \ + len(txnPoolNodeSet)*delay + ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=timeout) + waitForSufficientRepliesForRequests(looper, client1, requests=reqs, + add_delay_to_timeout=2) + looper.run(eventually(chk, 0)) diff --git a/plenum/test/pool_transactions/helper.py b/plenum/test/pool_transactions/helper.py index 48e859d80d..fa71aa29ac 100644 --- a/plenum/test/pool_transactions/helper.py +++ b/plenum/test/pool_transactions/helper.py @@ -76,9 +76,10 @@ def sendAddNewNode(newNodeName, stewardClient, stewardWallet, def addNewNode(looper, stewardClient, stewardWallet, newNodeName, tdir, tconf, - allPluginsPath=None, autoStart=True, nodeClass=TestNode): + allPluginsPath=None, autoStart=True, nodeClass=TestNode, + transformOpFunc=None): req, nodeIp, nodePort, clientIp, clientPort, sigseed \ - = sendAddNewNode(newNodeName, stewardClient, stewardWallet) + = sendAddNewNode(newNodeName, stewardClient, stewardWallet, transformOpFunc) waitForSufficientRepliesForRequests(looper, stewardClient, requests=[req], fVal=1) @@ -108,7 +109,7 @@ def addNewSteward(looper, tdir, def addNewStewardAndNode(looper, creatorClient, creatorWallet, stewardName, newNodeName, tdir, tconf, allPluginsPath=None, autoStart=True, nodeClass=TestNode, - clientClass=TestClient): + clientClass=TestClient, transformNodeOpFunc=None): newSteward, newStewardWallet = addNewSteward(looper, tdir, creatorClient, creatorWallet, stewardName, @@ -116,7 +117,7 @@ def addNewStewardAndNode(looper, creatorClient, creatorWallet, stewardName, newNode = addNewNode(looper, newSteward, newStewardWallet, newNodeName, tdir, tconf, allPluginsPath, autoStart=autoStart, - nodeClass=nodeClass) + nodeClass=nodeClass, transformOpFunc=transformNodeOpFunc) return newSteward, newStewardWallet, newNode diff --git a/plenum/test/pool_transactions/test_node_key_changed.py b/plenum/test/pool_transactions/test_node_key_changed.py new file mode 100644 index 0000000000..d407a839ff --- /dev/null +++ b/plenum/test/pool_transactions/test_node_key_changed.py @@ -0,0 +1,53 @@ +import base58 +from plenum.common.keygen_utils import initNodeKeysForBothStacks +from plenum.common.signer_simple import SimpleSigner +from plenum.common.util import randomString +from plenum.test.node_catchup.helper import waitNodeDataEquality, \ + ensureClientConnectedToNodesAndPoolLedgerSame +from plenum.test.pool_transactions.helper import changeNodeKeys +from plenum.test.test_node import TestNode, checkNodesConnected + +from stp_core.common.log import getlogger +from stp_core.types import HA + +logger = getlogger() + +# logged errors to ignore +whitelist = ['found legacy entry', "doesn't match", 'reconciling nodeReg', + 'missing', 'conflicts', 'matches', 'nodeReg', + 'conflicting address', 'unable to send message', + 'got error while verifying message'] +# Whitelisting "got error while verifying message" since a node while not have +# initialised a connection for a new node by the time the new node's message +# reaches it + + +def testNodeKeysChanged(looper, txnPoolNodeSet, tdirWithPoolTxns, + tconf, steward1, nodeThetaAdded, + allPluginsPath=None): + newSteward, newStewardWallet, newNode = nodeThetaAdded + + newNode.stop() + looper.removeProdable(name=newNode.name) + nodeHa, nodeCHa = HA(*newNode.nodestack.ha), HA(*newNode.clientstack.ha) + sigseed = randomString(32).encode() + verkey = base58.b58encode(SimpleSigner(seed=sigseed).naclSigner.verraw) + changeNodeKeys(looper, newSteward, newStewardWallet, newNode, verkey) + initNodeKeysForBothStacks(newNode.name, tdirWithPoolTxns, sigseed, + override=True) + + logger.debug("{} starting with HAs {} {}".format(newNode, nodeHa, nodeCHa)) + node = TestNode(newNode.name, basedirpath=tdirWithPoolTxns, config=tconf, + ha=nodeHa, cliha=nodeCHa, pluginPaths=allPluginsPath) + looper.add(node) + # The last element of `txnPoolNodeSet` is the node Theta that was just + # stopped + txnPoolNodeSet[-1] = node + + looper.run(checkNodesConnected(stacks=txnPoolNodeSet)) + waitNodeDataEquality(looper, node, *txnPoolNodeSet[:-1]) + ensureClientConnectedToNodesAndPoolLedgerSame(looper, steward1, + *txnPoolNodeSet) + ensureClientConnectedToNodesAndPoolLedgerSame(looper, newSteward, + *txnPoolNodeSet) + diff --git a/plenum/test/pool_transactions/test_nodes_data_changed.py b/plenum/test/pool_transactions/test_nodes_data_changed.py new file mode 100644 index 0000000000..0b49840b1a --- /dev/null +++ b/plenum/test/pool_transactions/test_nodes_data_changed.py @@ -0,0 +1,132 @@ +from plenum.common.constants import * +from plenum.common.util import randomString +from plenum.test.helper import waitRejectWithReason +from plenum.test.node_catchup.helper import waitNodeDataEquality, \ + ensureClientConnectedToNodesAndPoolLedgerSame +from plenum.test.pool_transactions.helper import addNewStewardAndNode, sendUpdateNode, \ + updateNodeDataAndReconnect +from plenum.test.test_node import checkNodesConnected + +from stp_core.common.log import getlogger +from stp_core.network.port_dispenser import genHa + +logger = getlogger() + +# logged errors to ignore +whitelist = ['found legacy entry', "doesn't match", 'reconciling nodeReg', + 'missing', 'conflicts', 'matches', 'nodeReg', + 'conflicting address', 'unable to send message', + 'got error while verifying message'] + + +# Whitelisting "got error while verifying message" since a node while not have +# initialised a connection for a new node by the time the new node's message +# reaches it + + + +def testNodePortCannotBeChangedByAnotherSteward(looper, txnPoolNodeSet, + tdirWithPoolTxns, tconf, + steward1, stewardWallet, + nodeThetaAdded): + _, _, newNode = nodeThetaAdded + nodeNewHa = genHa(1) + new_port = nodeNewHa.port + node_ha = txnPoolNodeSet[0].nodeReg[newNode.name] + cli_ha = txnPoolNodeSet[0].cliNodeReg[newNode.name + CLIENT_STACK_SUFFIX] + node_data = { + ALIAS: newNode.name, + NODE_PORT: new_port, + NODE_IP: node_ha.host, + CLIENT_PORT: cli_ha.port, + CLIENT_IP: cli_ha.host, + } + + logger.debug('{} changing port to {} {}'.format(newNode, new_port, + newNode.nodestack.ha.port)) + sendUpdateNode(steward1, stewardWallet, newNode, + node_data) + + for node in txnPoolNodeSet: + waitRejectWithReason(looper, steward1, 'is not a steward of node', + node.clientstack.name) + + +def test_node_alias_cannot_be_changed(looper, txnPoolNodeSet, + tdirWithPoolTxns, + tconf, nodeThetaAdded): + """ + The node alias cannot be changed. + """ + newSteward, newStewardWallet, newNode = nodeThetaAdded + node_data = {ALIAS: 'foo'} + sendUpdateNode(newSteward, newStewardWallet, newNode, + node_data) + for node in txnPoolNodeSet: + waitRejectWithReason(looper, newSteward, + 'data has conflicts with request data', + node.clientstack.name) + + +def testNodePortChanged(looper, txnPoolNodeSet, tdirWithPoolTxns, + tconf, steward1, stewardWallet, nodeThetaAdded): + """ + An running node's port is changed + """ + newSteward, newStewardWallet, newNode = nodeThetaAdded + nodeNewHa = genHa(1) + new_port = nodeNewHa.port + + node_ha = txnPoolNodeSet[0].nodeReg[newNode.name] + cli_ha = txnPoolNodeSet[0].cliNodeReg[newNode.name + CLIENT_STACK_SUFFIX] + node_data = { + ALIAS: newNode.name, + NODE_PORT: new_port, + NODE_IP: node_ha.host, + CLIENT_PORT: cli_ha.port, + CLIENT_IP: cli_ha.host, + } + + node = updateNodeDataAndReconnect(looper, newSteward, + newStewardWallet, newNode, + node_data, + tdirWithPoolTxns, tconf, + txnPoolNodeSet) + + waitNodeDataEquality(looper, node, *txnPoolNodeSet[:-1]) + + ensureClientConnectedToNodesAndPoolLedgerSame(looper, steward1, + *txnPoolNodeSet) + ensureClientConnectedToNodesAndPoolLedgerSame(looper, newSteward, + *txnPoolNodeSet) + + +def testAddInactiveNodeThenActivate(looper, txnPoolNodeSet, tdirWithPoolTxns, + tconf, steward1, stewardWallet, allPluginsPath): + newStewardName = "testClientSteward" + randomString(3) + newNodeName = "Kappa" + + # adding a new node without SERVICES field + # it means the node is in the inactive state + def del_services(op): del op[DATA][SERVICES] + + newSteward, newStewardWallet, newNode = \ + addNewStewardAndNode(looper, + steward1, stewardWallet, + newStewardName, newNodeName, + tdirWithPoolTxns, tconf, + allPluginsPath, + transformNodeOpFunc=del_services) + looper.run(checkNodesConnected(txnPoolNodeSet)) + + # turn the new node on + node_data = { + ALIAS: newNode.name, + SERVICES: [VALIDATOR] + } + + updateNodeDataAndReconnect(looper, newSteward, + newStewardWallet, newNode, + node_data, + tdirWithPoolTxns, tconf, + txnPoolNodeSet + [newNode]) diff --git a/plenum/test/pool_transactions/test_nodes_with_pool_txns.py b/plenum/test/pool_transactions/test_nodes_with_pool_txns.py index 8b3344289a..95f97d1d53 100644 --- a/plenum/test/pool_transactions/test_nodes_with_pool_txns.py +++ b/plenum/test/pool_transactions/test_nodes_with_pool_txns.py @@ -2,30 +2,22 @@ from copy import copy import base58 -import pytest - -from plenum.common.keygen_utils import initNodeKeysForBothStacks -from stp_core.network.port_dispenser import genHa -from stp_core.types import HA - -from stp_core.loop.eventually import eventually -from stp_core.common.log import getlogger -from plenum.common.signer_simple import SimpleSigner from plenum.common.constants import * +from plenum.common.signer_simple import SimpleSigner from plenum.common.util import getMaxFailures, randomString from plenum.test import waits from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, \ - checkRejectWithReason, waitReqNackWithReason, waitRejectWithReason, \ - waitForSufficientRepliesForRequests, waitReqNackFromPoolWithReason, \ - waitRejectFromPoolWithReason + waitRejectWithReason, \ + waitReqNackFromPoolWithReason from plenum.test.node_catchup.helper import waitNodeDataEquality, \ ensureClientConnectedToNodesAndPoolLedgerSame -from plenum.test.pool_transactions.helper import addNewClient, addNewNode, \ - updateNodeData, addNewStewardAndNode, changeNodeKeys, sendUpdateNode, \ - sendAddNewNode, updateNodeDataAndReconnect, addNewSteward -from plenum.test.test_node import TestNode, checkNodesConnected, \ +from plenum.test.pool_transactions.helper import addNewClient, addNewStewardAndNode, sendAddNewNode +from plenum.test.test_node import checkNodesConnected, \ checkProtocolInstanceSetup +from stp_core.common.log import getlogger +from stp_core.loop.eventually import eventually + logger = getlogger() # logged errors to ignore @@ -33,15 +25,13 @@ 'missing', 'conflicts', 'matches', 'nodeReg', 'conflicting address', 'unable to send message', 'got error while verifying message'] + + # Whitelisting "got error while verifying message" since a node while not have # initialised a connection for a new node by the time the new node's message # reaches it -def getNodeWithName(txnPoolNodeSet, name: str): - return next(node for node in txnPoolNodeSet if node.name == name) - - def testNodesConnect(txnPoolNodeSet): pass @@ -95,8 +85,8 @@ def _setHexVerkey(op): def testStewardCannotAddNodeWithInvalidHa(looper, tdir, - txnPoolNodeSet, - newAdHocSteward): + txnPoolNodeSet, + newAdHocSteward): """ The case: Steward accidentally sends the NODE txn with an invalid HA. @@ -120,6 +110,7 @@ def testStewardCannotAddNodeWithInvalidHa(looper, tdir, for field, value in tests: # create a transform function for each test def _tnf(op): op[DATA].update({field: value}) + sendAddNewNode(newNodeName, newSteward, newStewardWallet, transformOpFunc=_tnf) # wait NAcks with exact message. it does not works for just 'is invalid' @@ -129,8 +120,8 @@ def _tnf(op): op[DATA].update({field: value}) def testStewardCannotAddNodeWithOutFullFieldsSet(looper, tdir, - txnPoolNodeSet, - newAdHocSteward): + txnPoolNodeSet, + newAdHocSteward): """ The case: Steward accidentally sends the NODE txn without full fields set. @@ -153,6 +144,7 @@ def _renameNodePortField(op): for fn in (NODE_IP, CLIENT_IP, NODE_PORT, CLIENT_PORT): def _tnf(op): del op[DATA][fn] + sendAddNewNode(newNodeName, newSteward, newStewardWallet, transformOpFunc=_tnf) # wait NAcks with exact message. it does not works for just 'is missed' @@ -180,7 +172,7 @@ def testNonStewardCannotAddNode(looper, txnPoolNodeSet, client1, sendAddNewNode(newNodeName, client1, wallet1) for node in txnPoolNodeSet: waitRejectWithReason(looper, client1, 'is not a steward so cannot add a ' - 'new node', node.clientstack.name) + 'new node', node.clientstack.name) def testClientConnectsToNewNode(looper, txnPoolNodeSet, tdirWithPoolTxns, @@ -188,14 +180,14 @@ def testClientConnectsToNewNode(looper, txnPoolNodeSet, tdirWithPoolTxns, """ A client should be able to connect to a newly added node """ - newStewardName = "testClientSteward"+randomString(3) + newStewardName = "testClientSteward" + randomString(3) newNodeName = "Epsilon" oldNodeReg = copy(steward1.nodeReg) newSteward, newStewardWallet, newNode = addNewStewardAndNode(looper, - steward1, stewardWallet, - newStewardName, newNodeName, - tdirWithPoolTxns, tconf, - allPluginsPath) + steward1, stewardWallet, + newStewardName, newNodeName, + tdirWithPoolTxns, tconf, + allPluginsPath) txnPoolNodeSet.append(newNode) looper.run(checkNodesConnected(txnPoolNodeSet)) logger.debug("{} connected to the pool".format(newNode)) @@ -218,7 +210,7 @@ def testAdd2NewNodes(looper, txnPoolNodeSet, tdirWithPoolTxns, tconf, steward1, Add 2 new nodes to trigger replica addition and primary election """ for nodeName in ("Zeta", "Eta"): - newStewardName = "testClientSteward"+randomString(3) + newStewardName = "testClientSteward" + randomString(3) newSteward, newStewardWallet, newNode = addNewStewardAndNode(looper, steward1, stewardWallet, @@ -242,114 +234,3 @@ def checkFValue(): timeout = waits.expectedClientToPoolConnectionTimeout(len(txnPoolNodeSet)) looper.run(eventually(checkFValue, retryWait=1, timeout=timeout)) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) - - -def testNodePortCannotBeChangedByAnotherSteward(looper, txnPoolNodeSet, - tdirWithPoolTxns, tconf, - steward1, stewardWallet, - nodeThetaAdded): - _, _, newNode = nodeThetaAdded - nodeNewHa = genHa(1) - new_port = nodeNewHa.port - node_ha = txnPoolNodeSet[0].nodeReg[newNode.name] - cli_ha = txnPoolNodeSet[0].cliNodeReg[newNode.name + CLIENT_STACK_SUFFIX] - node_data = { - ALIAS: newNode.name, - NODE_PORT: new_port, - NODE_IP: node_ha.host, - CLIENT_PORT: cli_ha.port, - CLIENT_IP: cli_ha.host, - } - - logger.debug('{} changing port to {} {}'.format(newNode, new_port, - newNode.nodestack.ha.port)) - sendUpdateNode(steward1, stewardWallet, newNode, - node_data) - - for node in txnPoolNodeSet: - waitRejectWithReason(looper, steward1, 'is not a steward of node', - node.clientstack.name) - - -def test_node_alias_cannot_be_changed(looper, txnPoolNodeSet, - tdirWithPoolTxns, - tconf, nodeThetaAdded): - """ - The node alias cannot be changed. - """ - newSteward, newStewardWallet, newNode = nodeThetaAdded - node_data = {ALIAS: 'foo'} - sendUpdateNode(newSteward, newStewardWallet, newNode, - node_data) - for node in txnPoolNodeSet: - waitRejectWithReason(looper, newSteward, - 'data has conflicts with request data', - node.clientstack.name) - - -def testNodePortChanged(looper, txnPoolNodeSet, tdirWithPoolTxns, - tconf, steward1, stewardWallet, nodeThetaAdded): - """ - An running node's port is changed - """ - newSteward, newStewardWallet, newNode = nodeThetaAdded - nodeNewHa = genHa(1) - new_port = nodeNewHa.port - - node_ha = txnPoolNodeSet[0].nodeReg[newNode.name] - cli_ha = txnPoolNodeSet[0].cliNodeReg[newNode.name + CLIENT_STACK_SUFFIX] - node_data = { - ALIAS: newNode.name, - NODE_PORT: new_port, - NODE_IP: node_ha.host, - CLIENT_PORT: cli_ha.port, - CLIENT_IP: cli_ha.host, - } - - node = updateNodeDataAndReconnect(looper, newSteward, - newStewardWallet, newNode, - node_data, - tdirWithPoolTxns, tconf, - txnPoolNodeSet) - - waitNodeDataEquality(looper, node, *txnPoolNodeSet[:-1]) - - ensureClientConnectedToNodesAndPoolLedgerSame(looper, steward1, - *txnPoolNodeSet) - ensureClientConnectedToNodesAndPoolLedgerSame(looper, newSteward, - *txnPoolNodeSet) - - -def testNodeKeysChanged(looper, txnPoolNodeSet, tdirWithPoolTxns, - tconf, steward1, nodeThetaAdded, - allPluginsPath=None): - newSteward, newStewardWallet, newNode = nodeThetaAdded - - # Since the node returned by fixture `nodeThetaAdded` was abandoned in the - # previous test, so getting node `Theta` from `txnPoolNodeSet` - newNode = getNodeWithName(txnPoolNodeSet, newNode.name) - - newNode.stop() - looper.removeProdable(name=newNode.name) - nodeHa, nodeCHa = HA(*newNode.nodestack.ha), HA(*newNode.clientstack.ha) - sigseed = randomString(32).encode() - verkey = SimpleSigner(seed=sigseed).naclSigner.verhex.decode() - changeNodeKeys(looper, newSteward, newStewardWallet, newNode, verkey) - initNodeKeysForBothStacks(newNode.name, tdirWithPoolTxns, sigseed, - override=True) - - logger.debug("{} starting with HAs {} {}".format(newNode, nodeHa, nodeCHa)) - node = TestNode(newNode.name, basedirpath=tdirWithPoolTxns, config=tconf, - ha=nodeHa, cliha=nodeCHa, pluginPaths=allPluginsPath) - looper.add(node) - # The last element of `txnPoolNodeSet` is the node Theta that was just - # stopped - txnPoolNodeSet[-1] = node - looper.run(checkNodesConnected(txnPoolNodeSet)) - waitNodeDataEquality(looper, node, *txnPoolNodeSet[:-1]) - ensureClientConnectedToNodesAndPoolLedgerSame(looper, steward1, - *txnPoolNodeSet) - ensureClientConnectedToNodesAndPoolLedgerSame(looper, newSteward, - *txnPoolNodeSet) - - diff --git a/plenum/test/primary_election/test_primary_election_case1.py b/plenum/test/primary_election/test_primary_election_case1.py index 7fd4118963..6e9322f90f 100644 --- a/plenum/test/primary_election/test_primary_election_case1.py +++ b/plenum/test/primary_election/test_primary_election_case1.py @@ -22,6 +22,7 @@ delayOfNomination = 5 + @pytest.fixture() def case1Setup(startedNodes: TestNodeSet): nodes = startedNodes @@ -71,10 +72,9 @@ def testPrimaryElectionCase1(case1Setup, looper, keySharedNodes): # Node B sends multiple NOMINATE messages for Node D but only after A has # nominated itself - timeout = waits.expectedPoolNominationTimeout(nodeCount=1) + timeout = waits.expectedPoolNominationTimeout(nodeCount=len(keySharedNodes)) looper.run(eventually(checkNomination, nodeA, nodeA.name, - retryWait=.25, - timeout=timeout)) + retryWait=.25, timeout=timeout)) instId = getSelfNominationByNode(nodeA) diff --git a/plenum/test/primary_election/test_primary_forfeit.py b/plenum/test/primary_election/test_primary_forfeit.py index 783cba7418..adfb2acef1 100644 --- a/plenum/test/primary_election/test_primary_forfeit.py +++ b/plenum/test/primary_election/test_primary_forfeit.py @@ -13,4 +13,4 @@ def testPrimaryForfeit(looper, nodeSet, up, client1, wallet1): pr = getPrimaryReplica(nodeSet, instId=0) prNode = pr.node # TODO: Incomplete - pass + raise NotImplementedError diff --git a/plenum/test/primary_selection/test_primary_selection.py b/plenum/test/primary_selection/test_primary_selection.py index 823f50b43d..dc0b4d8fd1 100644 --- a/plenum/test/primary_selection/test_primary_selection.py +++ b/plenum/test/primary_selection/test_primary_selection.py @@ -8,7 +8,11 @@ from plenum.server.replica import Replica from plenum.test import waits from plenum.test.test_node import checkProtocolInstanceSetup, getPrimaryReplica -from plenum.test.view_change.conftest import viewNo + +# noinspection PyUnresolvedReferences +from plenum.test.view_change.conftest import viewNo, simulate_slow_master + +# noinspection PyUnresolvedReferences from plenum.test.view_change.test_view_change import viewChangeDone nodeCount = 7 diff --git a/plenum/test/replica/test_replica_reject_same_pre_prepare.py b/plenum/test/replica/test_replica_reject_same_pre_prepare.py index 110d3b504a..f21137e648 100644 --- a/plenum/test/replica/test_replica_reject_same_pre_prepare.py +++ b/plenum/test/replica/test_replica_reject_same_pre_prepare.py @@ -46,7 +46,8 @@ def testReplicasRejectSamePrePrepareMsg(looper, nodeSet, client1, wallet1): logger.debug( "Decrementing the primary replica's pre-prepare sequence number by " "one...") - primaryRepl.lastPrePrepareSeqNo -= 1 + primaryRepl._lastPrePrepareSeqNo -= 1 + view_no = primaryRepl.viewNo request2 = sendRandomRequest(wallet1, client1) timeout = waits.expectedPrePrepareTime(len(nodeSet)) looper.run(eventually(checkPrePrepareReqSent, primaryRepl, request2, @@ -89,5 +90,6 @@ def testReplicasRejectSamePrePrepareMsg(looper, nodeSet, client1, wallet1): npr, request2.identifier, request2.reqId, + view_no, retryWait=1, timeout=timeout)) diff --git a/plenum/test/script/test_change_node_ha.py b/plenum/test/script/test_change_node_ha.py index 2abc434678..4ca4820e12 100644 --- a/plenum/test/script/test_change_node_ha.py +++ b/plenum/test/script/test_change_node_ha.py @@ -1,18 +1,23 @@ -# TODO: This is failing as of now, fix it -# def testStopScriptIfNodeIsRunning(looper, txnPoolNodeSet, poolTxnData, -# poolTxnStewardData, tconf): -# nodeName = txnPoolNodeSet[0].name -# nodeSeed = poolTxnData["seeds"][nodeName].encode() -# stewardName, stewardsSeed = poolTxnStewardData -# ip, port = genHa() -# nodeStackNewHA = HA(ip, port) -# -# # the node `nodeName` is not stopped here -# -# # change HA -# with pytest.raises(Exception, message="Node '{}' must be stopped " -# "before".format(nodeName)): -# changeHA(looper, tconf, nodeName, nodeSeed, nodeStackNewHA, -# stewardName, stewardsSeed) +import pytest +from stp_core.network.port_dispenser import genHa +from stp_core.types import HA +from plenum.common.script_helper import changeHA + +@pytest.mark.skip(reason='INDY-99') +def testStopScriptIfNodeIsRunning(looper, txnPoolNodeSet, poolTxnData, + poolTxnStewardData, tconf): + nodeName = txnPoolNodeSet[0].name + nodeSeed = poolTxnData["seeds"][nodeName].encode() + stewardName, stewardsSeed = poolTxnStewardData + ip, port = genHa() + nodeStackNewHA = HA(ip, port) + + # the node `nodeName` is not stopped here + + # change HA + with pytest.raises(Exception, message="Node '{}' must be stopped " + "before".format(nodeName)): + changeHA(looper, tconf, nodeName, nodeSeed, nodeStackNewHA, + stewardName, stewardsSeed) diff --git a/plenum/test/stasher.py b/plenum/test/stasher.py index 451ff67704..859c1be849 100644 --- a/plenum/test/stasher.py +++ b/plenum/test/stasher.py @@ -45,7 +45,7 @@ def stashAll(self, age): self.delayeds.append((age + secondsToDelay, rx)) self.queue.remove(rx) - def unstashAll(self, age): + def unstashAll(self, age, ignore_age_check=False): """ Not terribly efficient, but for now, this is only used for testing. HasActionQueue is more efficient about knowing when to iterate through @@ -53,16 +53,24 @@ def unstashAll(self, age): :param age: seconds since Stasher started """ + unstashed = 0 for d in self.delayeds: - if age >= d[0]: + # This is in-efficient as `ignore_age_check` wont change during loop + # but its ok since its a testing util. + if ignore_age_check or age >= d[0]: + msg = '(forced)' if ignore_age_check else '({:.0f} milliseconds overdue)'\ + .format((age - d[0]) * 1000) logger.debug( - "{} unstashing message {} ({:.0f} milliseconds overdue)". - format(self.name, d[1], (age - d[0]) * 1000)) + "{} unstashing message {} {}". + format(self.name, d[1], msg)) self.queue.appendleft(d[1]) self.delayeds.remove(d) + unstashed += 1 + return unstashed def resetDelays(self): logger.debug("{} resetting delays".format(self.name)) self.delayRules = set() - + def force_unstash(self): + return self.unstashAll(0, ignore_age_check=True) diff --git a/plenum/test/test_delay.py b/plenum/test/test_delay.py index 7984e0209b..50c4137be1 100644 --- a/plenum/test/test_delay.py +++ b/plenum/test/test_delay.py @@ -44,6 +44,7 @@ def testTestNodeDelay(tdir_for_func): # reset the delay, and find another message comes quickly nodeB.nodeIbStasher.resetDelays() + nodeB.nodeIbStasher.force_unstash() looper.run(sendMessageAndCheckDelivery(nodes, nodeA, nodeB)) diff --git a/plenum/test/test_log_rotation.py b/plenum/test/test_log_rotation.py index e9badae8b0..c2585b1639 100644 --- a/plenum/test/test_log_rotation.py +++ b/plenum/test/test_log_rotation.py @@ -2,7 +2,6 @@ import logging import shutil import time -import pytest from stp_core.common.logging.TimeAndSizeRotatingFileHandler \ import TimeAndSizeRotatingFileHandler diff --git a/plenum/test/test_node.py b/plenum/test/test_node.py index faa1c8981c..3db4eb01a8 100644 --- a/plenum/test/test_node.py +++ b/plenum/test/test_node.py @@ -148,6 +148,15 @@ def resetDelays(self): for r in self.replicas: r.outBoxTestStasher.resetDelays() + def force_process_delayeds(self): + c = self.nodestack.force_process_delayeds() + c += self.nodeIbStasher.force_unstash() + for r in self.replicas: + c += r.outBoxTestStasher.force_unstash() + logger.debug("{} forced processing of delayed messages, {} processed in total". + format(self, c)) + return c + def whitelistNode(self, nodeName: str, *codes: int): if nodeName not in self.whitelistedClients: self.whitelistedClients[nodeName] = set() @@ -585,9 +594,11 @@ def chk(): def checkEveryProtocolInstanceHasOnlyOnePrimary(looper: Looper, nodes: Sequence[TestNode], retryWait: float = None, - timeout: float = None): + timeout: float = None, + numInstances: int = None): - coro = eventually(instances, nodes, retryWait=retryWait, timeout=timeout) + coro = eventually(instances, nodes, numInstances, + retryWait=retryWait, timeout=timeout) insts, timeConsumed = timeThis(looper.run, coro) newTimeout = timeout - timeConsumed if timeout is not None else None for instId, replicas in insts.items(): @@ -617,14 +628,16 @@ def checkAtMostOnePrim(node): def checkProtocolInstanceSetup(looper: Looper, nodes: Sequence[TestNode], retryWait: float = 1, - customTimeout: float = None): + customTimeout: float = None, + numInstances: int = None): timeout = customTimeout or waits.expectedPoolElectionTimeout(len(nodes)) checkEveryProtocolInstanceHasOnlyOnePrimary(looper=looper, nodes=nodes, retryWait=retryWait, - timeout=timeout) + timeout=timeout, + numInstances=numInstances) checkEveryNodeHasAtMostOnePrimary(looper=looper, nodes=nodes, @@ -641,12 +654,14 @@ def checkProtocolInstanceSetup(looper: Looper, def ensureElectionsDone(looper: Looper, nodes: Sequence[TestNode], retryWait: float = None, # seconds - customTimeout: float = None) -> Sequence[TestNode]: + customTimeout: float = None, + numInstances: int = None) -> Sequence[TestNode]: """ Wait for elections to be complete :param retryWait: :param customTimeout: specific timeout + :param numInstances: expected number of protocol instances :return: primary replica for each protocol instance """ @@ -660,7 +675,8 @@ def ensureElectionsDone(looper: Looper, looper=looper, nodes=nodes, retryWait=retryWait, - customTimeout=customTimeout) + customTimeout=customTimeout, + numInstances=numInstances) def genNodeReg(count=None, names=None) -> Dict[str, NodeDetail]: @@ -686,9 +702,6 @@ def extractCliNodeReg(self): def prepareNodeSet(looper: Looper, nodeSet: TestNodeSet): # TODO: Come up with a more specific name for this - # for n in nodeSet: - # n.startKeySharing() - # Key sharing party looper.run(checkNodesConnected(nodeSet)) @@ -719,11 +732,13 @@ def timeThis(func, *args, **kwargs): return res, time.perf_counter() - s -def instances(nodes: Sequence[Node]) -> Dict[int, List[replica.Replica]]: - instCount = getRequiredInstances(len(nodes)) +def instances(nodes: Sequence[Node], + numInstances: int = None) -> Dict[int, List[replica.Replica]]: + numInstances = (getRequiredInstances(len(nodes)) + if numInstances is None else numInstances) for n in nodes: - assert len(n.replicas) == instCount - return {i: [n.replicas[i] for n in nodes] for i in range(instCount)} + assert len(n.replicas) == numInstances + return {i: [n.replicas[i] for n in nodes] for i in range(numInstances)} def getRequiredInstances(nodeCount: int) -> int: @@ -759,6 +774,7 @@ def get_master_primary_node(nodes): if node.replicas[0].primaryName is not None: nm = TestReplica.getNodeName(node.replicas[0].primaryName) return nodeByName(nodes, nm) + raise AssertionError('No primary found for master') def primaryNodeNameForInstance(nodes, instanceId): diff --git a/plenum/test/test_node_connection.py b/plenum/test/test_node_connection.py index 12853a9980..f4ea011b2e 100644 --- a/plenum/test/test_node_connection.py +++ b/plenum/test/test_node_connection.py @@ -13,7 +13,7 @@ from plenum.test import waits from plenum.test.helper import stopNodes from plenum.test.test_node import TestNode, checkNodesConnected, \ - checkProtocolInstanceSetup + checkProtocolInstanceSetup, ensureElectionsDone from stp_core.network.port_dispenser import genHa logger = getlogger() @@ -48,7 +48,7 @@ def tdirAndLooper(nodeReg): yield td, looper -@pytest.mark.skip(reason='INDY-75') +@pytest.mark.skip(reason='INDY-109. Intermittent failures') def testNodesConnectsWhenOneNodeIsLate(allPluginsPath, tdirAndLooper, nodeReg): tdir, looper = tdirAndLooper @@ -64,30 +64,28 @@ def create(name): nodes.append(node) return node - for name in names[:3]: + for name in names: create(name) logger.debug("Creating keys") for node in nodes: tellKeysToOthers(node, nodes) + + for node in nodes[:3]: looper.add(node) - looper.run(checkNodesConnected(nodes)) + looper.run(checkNodesConnected(nodes[:3])) # wait for the election to complete with the first three nodes - looper.runFor(10) + ensureElectionsDone(looper, nodes[:3], numInstances=2) - # create the fourth and see that it learns who the primaries are + # start the fourth and see that it learns who the primaries are # from the other nodes - lateNode = create(names[3]) - for node in nodes[:-1]: - tellKeysToOthers(lateNode, node) - tellKeysToOthers(node, lateNode) - - looper.add(lateNode) - # TODO set timeout from 'waits' after the test enabled - checkProtocolInstanceSetup(looper, nodes, customTimeout=10) + looper.add(nodes[3]) + + # ensure election is done for updated pool + ensureElectionsDone(looper, nodes) stopNodes(nodes, looper) diff --git a/plenum/test/test_node_request.py b/plenum/test/test_node_request.py index 26408d1452..f2e83dcd42 100644 --- a/plenum/test/test_node_request.py +++ b/plenum/test/test_node_request.py @@ -44,53 +44,11 @@ async def chk(): if arg.instId == node.instances.masterId: assert result else: - assert result is None + assert result is False timeout = waits.expectedOrderingTime(nodeSet.nodes['Alpha'].instances.count) looper.run(eventually(chk, timeout=timeout)) -# noinspection PyIncorrectDocstring -@pytest.mark.skip(reason="SOV-539. Implementation changed") -def testRequestReturnToNodeWhenPrePrepareNotReceivedByOneNode(tdir_for_func): - """Test no T-3""" - nodeNames = genNodeNames(7) - nodeReg = genNodeReg(names=nodeNames) - with TestNodeSet(nodeReg=nodeReg, tmpdir=tdir_for_func) as nodeSet: - with Looper(nodeSet) as looper: - prepareNodeSet(looper, nodeSet) - logger.debug("Add the seven nodes back in") - # Every node except A delays self nomination so A can become primary - nodeA = addNodeBack(nodeSet, looper, nodeNames[0]) - for i in range(1, 7): - node = addNodeBack(nodeSet, looper, nodeNames[i]) - node.delaySelfNomination(15) - - nodeB = nodeSet.getNode(nodeNames[1]) - # Node B delays PREPREPARE from node A(which would be the primary) - # for a long time. - nodeB.nodeIbStasher.delay( - delayerMsgTuple(120, PrePrepare, nodeA.name)) - - # Ensure elections are done - ensureElectionsDone(looper=looper, nodes=nodeSet) - assert nodeA.hasPrimary - - instNo = nodeA.primaryReplicaNo - client1, wallet1 = setupClient(looper, nodeSet, tmpdir=tdir_for_func) - req = sendRandomRequest(wallet1, client1) - - # All nodes including B should return their ordered requests - for node in nodeSet: - # TODO set timeout from 'waits' after the test enabled - looper.run(eventually(checkRequestReturnedToNode, node, - wallet1.defaultId, req.reqId, - instNo, retryWait=1, timeout=30)) - - # Node B should not have received the PRE-PREPARE request yet - replica = nodeB.replicas[instNo] # type: Replica - assert len(replica.prePrepares) == 0 - - def testPrePrepareWhenPrimaryStatusIsUnknown(tdir_for_func): nodeNames = genNodeNames(4) nodeReg = genNodeReg(names=nodeNames) @@ -103,14 +61,13 @@ def testPrePrepareWhenPrimaryStatusIsUnknown(tdir_for_func): # Nodes C and D delays self nomination so A and B can become # primaries - nodeC.delaySelfNomination(30) - nodeD.delaySelfNomination(30) + nodeC.delaySelfNomination(10) + nodeD.delaySelfNomination(10) # Node D delays receiving PRIMARY messages from all nodes so it # will not know whether it is primary or not - # nodeD.nodestack.delay(delayer(20, PRIMARY)) - delayD = 20 + delayD = 5 nodeD.nodeIbStasher.delay(delayerMsgTuple(delayD, Primary)) checkPoolReady(looper=looper, nodes=nodeSet) @@ -148,13 +105,18 @@ def assertTwoPrepare(): timeout = waits.expectedPrePrepareTime(len(nodeSet)) looper.run(eventually(assertTwoPrepare, retryWait=1, timeout=timeout)) + # Its been checked above that replica stashes 3 phase messages in + # lack of primary, now avoid delay (fix the network) + nodeD.nodeIbStasher.resetDelays() + nodeD.nodeIbStasher.force_unstash() + # Node D should have no pending PRE-PREPARE, PREPARE or COMMIT # requests for reqType in [PrePrepare, Prepare, Commit]: looper.run(eventually(lambda: assertLength( getPendingRequestsForReplica(nodeD.replicas[instNo], reqType), - 0), retryWait=1, timeout=delayD)) + 0), retryWait=1, timeout=delayD)) # wait little more than delay async def checkIfPropagateRecvdFromNode(recvrNode: TestNode, @@ -171,7 +133,7 @@ async def checkIfPropagateRecvdFromNode(recvrNode: TestNode, "or implement a `stats` feature in ZStack") def testMultipleRequests(tdir_for_func): """ - Send multiple requests to the client + Send multiple requests to the node """ with TestNodeSet(count=7, tmpdir=tdir_for_func) as nodeSet: with Looper(nodeSet) as looper: diff --git a/plenum/test/test_stack.py b/plenum/test/test_stack.py index 8e34e8d4f9..9a43887dcc 100644 --- a/plenum/test/test_stack.py +++ b/plenum/test/test_stack.py @@ -44,6 +44,9 @@ async def _serviceStack(self, age): def resetDelays(self): self.stasher.resetDelays() + def force_process_delayeds(self): + return self.stasher.force_unstash() + class StackedTester: def checkIfConnectedTo(self, count=None): diff --git a/plenum/test/test_stasher.py b/plenum/test/test_stasher.py index 4666087f0f..254f87bc5a 100644 --- a/plenum/test/test_stasher.py +++ b/plenum/test/test_stasher.py @@ -40,4 +40,9 @@ def delayTwos(item): s.process() r3 = x.popleft() - assert r3 == 2 \ No newline at end of file + assert r3 == 2 + + x.append(2) + s.resetDelays() + s.process() + assert 2 == x.popleft() diff --git a/plenum/test/test_testable.py b/plenum/test/test_testable.py index f70d782252..72273b6f4d 100644 --- a/plenum/test/test_testable.py +++ b/plenum/test/test_testable.py @@ -23,8 +23,8 @@ def checkOneInit(self, z, params): with self.subTest( "ensure __init__ entry is there for {}".format(params)): inits = z.spylog.getAll('__init__') - self.assertEquals(len(inits), 1, "expected only 1 init entry") - self.assertEquals(inits[0][pr], (params, None), + self.assertEqual(len(inits), 1, "expected only 1 init entry") + self.assertEqual(inits[0][pr], (params, None), "expected proper init params and result") def runThroughAssertions(self, z, ovrdCornResult: str = None): @@ -39,7 +39,7 @@ def runThroughAssertions(self, z, ovrdCornResult: str = None): with self.subTest( "ensure first call of method #2 returns the proper params and result"): z.eatCorn('canned') - self.assertEquals(z.spylog.getLast('eatCorn')[pr], + self.assertEqual(z.spylog.getLast('eatCorn')[pr], ({'kind': 'canned'}, yucky)) assert z.spylog.count('eatCorn') == 1 @@ -62,13 +62,13 @@ def runThroughAssertions(self, z, ovrdCornResult: str = None): with self.subTest("ensure entries accummulate properly"): z.eatCorn('canned') z.eatCorn('whirled') - self.assertEquals(len(z.spylog), 7, + self.assertEqual(len(z.spylog), 7, "expected 7 entries in the spy log") z.eatCorn('creamed') z.eatCorn('creamed') z.eatCorn('spun') z.mymethod("hello again") - self.assertEquals(len(z.spylog), 11, + self.assertEqual(len(z.spylog), 11, "expected 11 entries in the spy log") with self.subTest("exceptions are handled gracefully (logged, etc.)"): @@ -107,15 +107,15 @@ def testSpyOnSubsetOfMethods(self): def go(methods, ec: "expected counts"): SpySubClass = spyable(methods=methods)(SubClass) z = SpySubClass('a', 'b') - self.assertEquals(len(z.spylog), ec[0], + self.assertEqual(len(z.spylog), ec[0], "expected certain number of entries in the spy log") z.eatCorn('canned') z.eatCorn('whirled') - self.assertEquals(len(z.spylog), ec[1], + self.assertEqual(len(z.spylog), ec[1], "expected certain number of entries in the spy log") z.eatCorn('creamed') z.mymethod("hello again") - self.assertEquals(len(z.spylog), ec[2], + self.assertEqual(len(z.spylog), ec[2], "expected certain number of entries in the spy log") with TestHelpers.subTest(self, "No subset"): @@ -138,9 +138,9 @@ def testSpyOnOverriddenClassMethod(self): z = SpySubClass('a', 'b') z.mymethod("hi") z.eatCorn("canned") - self.assertEquals(z.spylog.getLast('eatCorn')[pr], + self.assertEqual(z.spylog.getLast('eatCorn')[pr], ({'kind': 'canned'}, 'hooray!')) - self.assertEquals(z.spylog.getLast('mymethod')[pr], + self.assertEqual(z.spylog.getLast('mymethod')[pr], ({'inp': 'hi'}, None)) def testSpyOnOverriddenBaseClassMethod(self): @@ -149,8 +149,8 @@ def testSpyOnOverriddenBaseClassMethod(self): z = SpySubClass('a', 'b') z.mymethod("hi") z.eatCorn("canned") - self.assertEquals(z.spylog.getLast('eatCorn'), None) - self.assertEquals(z.spylog.getLast('mymethod')[pr], + self.assertEqual(z.spylog.getLast('eatCorn'), None) + self.assertEqual(z.spylog.getLast('mymethod')[pr], ({'inp': 'hi'}, None)) def testSpyOnCertainClass(self): @@ -161,8 +161,8 @@ def testSpyOnCertainClass(self): z = SpySubClass('a', 'b') z.mymethod("hi") z.eatCorn("canned") - self.assertEquals(z.spylog.getLast('eatCorn'), None) - self.assertEquals(z.spylog.getLast('mymethod')[pr], ({'inp': 'hi'}, + self.assertEqual(z.spylog.getLast('eatCorn'), None) + self.assertEqual(z.spylog.getLast('mymethod')[pr], ({'inp': 'hi'}, None)) diff --git a/plenum/test/view_change/conftest.py b/plenum/test/view_change/conftest.py index b8aebedf90..cf35163716 100644 --- a/plenum/test/view_change/conftest.py +++ b/plenum/test/view_change/conftest.py @@ -1,6 +1,10 @@ import pytest -from plenum.test.test_node import ensureElectionsDone +from plenum.common.util import adict +from plenum.test.delayers import delayNonPrimaries +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, \ + waitForViewChange +from plenum.test.test_node import ensureElectionsDone, get_master_primary_node @pytest.fixture() @@ -10,3 +14,23 @@ def viewNo(nodeSet): viewNos.add(n.viewNo) assert len(viewNos) == 1 return viewNos.pop() + + +@pytest.fixture() +def simulate_slow_master(nodeSet, looper, up, wallet1, client1, viewNo): + def _(): + m_primary_node = get_master_primary_node(list(nodeSet.nodes.values())) + # Delay processing of PRE-PREPARE from all non primary replicas of master + # so master's performance falls and view changes + delayNonPrimaries(nodeSet, 0, 10) + + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4) + + try: + waitForViewChange(looper, nodeSet, expectedViewNo=viewNo+1) + except AssertionError as e: + raise RuntimeError('view did not change') from e + ensureElectionsDone(looper=looper, nodes=nodeSet) + new_m_primary_node = get_master_primary_node(list(nodeSet.nodes.values())) + return adict(old=m_primary_node, new=new_m_primary_node) + return _ diff --git a/plenum/test/view_change/helper.py b/plenum/test/view_change/helper.py index d62a9a5839..7564393b3d 100644 --- a/plenum/test/view_change/helper.py +++ b/plenum/test/view_change/helper.py @@ -2,6 +2,7 @@ from plenum.test.helper import checkViewNoForNodes, sendRandomRequests, \ sendReqsToNodesAndVerifySuffReplies +from plenum.test.test_node import get_master_primary_node from stp_core.common.log import getlogger from stp_core.loop.eventually import eventually from plenum.test import waits @@ -44,7 +45,10 @@ def provoke_and_wait_for_view_change(looper, def ensure_view_change(looper, nodes, client, wallet): - sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 2) + """ + This method patches the master performance check to return False and thus + ensures that all given nodes do a view change + """ old_view_no = checkViewNoForNodes(nodes) old_meths = {} @@ -55,7 +59,10 @@ def ensure_view_change(looper, nodes, client, wallet): def slow_master(self): # Only allow one view change - return self.totalViewChanges == view_changes[self.name] + rv = self.totalViewChanges == view_changes[self.name] + if rv: + logger.info('{} making master look slow'.format(self)) + return rv node.monitor.isMasterDegraded = types.MethodType(slow_master, node.monitor) @@ -66,3 +73,42 @@ def slow_master(self): for node in nodes: node.monitor.isMasterDegraded = old_meths[node.name] return old_view_no + 1 + + +def check_each_node_reaches_same_end_for_view(nodes, view_no): + # Check if each node agreed on the same ledger summary and last ordered + # seq no for same view + args = {} + vals = {} + for node in nodes: + params = [e.params for e in node.replicas[0].spylog.getAll( + node.replicas[0].primary_changed.__name__) + if e.params['view_no'] == view_no] + assert params + args[node.name] = (params[0]['last_ordered_pp_seq_no'], + params[0]['ledger_summary']) + vals[node.name] = node.replicas[0].view_ends_at[view_no-1] + + arg = list(args.values())[0] + for a in args.values(): + assert a == arg + + val = list(args.values())[0] + for v in vals.values(): + assert v == val + + +def do_vc(looper, nodes, client, wallet, old_view_no=None): + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5) + new_view_no = ensure_view_change(looper, nodes, client, wallet) + if old_view_no: + assert new_view_no - old_view_no >= 1 + return new_view_no + + +def disconnect_master_primary(nodes): + pr_node = get_master_primary_node(nodes) + for node in nodes: + if node != pr_node: + node.nodestack.getRemote(pr_node.nodestack.name).disconnect() + return pr_node diff --git a/plenum/test/view_change/test_disable_view_change.py b/plenum/test/view_change/test_disable_view_change.py new file mode 100644 index 0000000000..c89ef14ad8 --- /dev/null +++ b/plenum/test/view_change/test_disable_view_change.py @@ -0,0 +1,17 @@ +import pytest + + +@pytest.fixture(scope="module") +def disable_view_change_config(tconf): + tconf.unsafe.add('disable_view_change') + return tconf + + +def test_disable_view_change(disable_view_change_config, simulate_slow_master): + assert disable_view_change_config + assert isinstance(disable_view_change_config.unsafe, set) + assert 'disable_view_change' in disable_view_change_config.unsafe + + with pytest.raises(RuntimeError) as e_info: + simulate_slow_master() + assert e_info.value.args == ('view did not change',) diff --git a/plenum/test/view_change/test_discard_inst_chng_msg_from_past_view.py b/plenum/test/view_change/test_discard_inst_chng_msg_from_past_view.py index 33f25d7fed..992f226c1d 100644 --- a/plenum/test/view_change/test_discard_inst_chng_msg_from_past_view.py +++ b/plenum/test/view_change/test_discard_inst_chng_msg_from_past_view.py @@ -14,7 +14,7 @@ def testDiscardInstChngMsgFrmPastView(nodeSet, looper, ensureView): curViewNo = ensureView # Send an instance change for an old instance message to all nodes - icMsg = nodeSet.Alpha._create_instance_change_msg(curViewNo - 1, 0) + icMsg = nodeSet.Alpha._create_instance_change_msg(curViewNo, 0) nodeSet.Alpha.send(icMsg) # ensure every node but Alpha discards the invalid instance change request diff --git a/plenum/test/view_change/test_instance_change_msg_checking.py b/plenum/test/view_change/test_instance_change_msg_checking.py index 07c65a0f79..838fab5b58 100644 --- a/plenum/test/view_change/test_instance_change_msg_checking.py +++ b/plenum/test/view_change/test_instance_change_msg_checking.py @@ -3,7 +3,7 @@ from plenum.test import waits from plenum.test.test_node import TestNode -DISCARD_REASON = 'viewNo has incorrect type' +DISCARD_REASON = "validation error: expected types" whitelist = [DISCARD_REASON,] @@ -11,18 +11,27 @@ def testInstanceChangeMsgTypeChecking(nodeSet, looper, up): nodeA = nodeSet.Alpha nodeB = nodeSet.Beta - + ridBeta = nodeA.nodestack.getRemote(nodeB.name).uid - badViewNo = "BAD" - icMsg = nodeSet.Alpha._create_instance_change_msg(badViewNo, 0) + + def createInstanceChangeMessage(): + # Creating a message this way to exclude + # client-side validation of viewNo + + goodViewNo = 1 + badViewNo = "BAD" + icMsg = nodeSet.Alpha._create_instance_change_msg(goodViewNo, 0) + icMsg._fields["viewNo"] = badViewNo + return icMsg + + icMsg = createInstanceChangeMessage() nodeA.send(icMsg, ridBeta) looper.runFor(0.2) params = nodeB.spylog.getLastParams(TestNode.discard) def chk(): - assert isinstance(params['msg'], InstanceChange) - assert DISCARD_REASON in params['reason'] + assert DISCARD_REASON in str(params['reason']) timeout = waits.expectedNodeToNodeMessageDeliveryTime() looper.run(eventually(chk, timeout=timeout)) diff --git a/plenum/test/view_change/test_view_change.py b/plenum/test/view_change/test_view_change.py index fb9dbe0b8d..fd67fd0bf9 100644 --- a/plenum/test/view_change/test_view_change.py +++ b/plenum/test/view_change/test_view_change.py @@ -1,34 +1,21 @@ import types -from functools import partial import pytest -from stp_core.loop.eventually import eventually from plenum.server.node import Node from plenum.test.delayers import delayNonPrimaries from plenum.test.helper import waitForViewChange, \ sendReqsToNodesAndVerifySuffReplies -from plenum.test.test_node import getPrimaryReplica, get_master_primary_node, \ +from plenum.test.test_node import get_master_primary_node, getPrimaryReplica, \ ensureElectionsDone -from plenum.test.test_node import getPrimaryReplica, ensureElectionsDone nodeCount = 7 -# noinspection PyIncorrectDocstring @pytest.fixture() -def viewChangeDone(nodeSet, looper, up, wallet1, client1, viewNo): - m_primary_node = get_master_primary_node(list(nodeSet.nodes.values())) - # Delay processing of PRE-PREPARE from all non primary replicas of master - # so master's performance falls and view changes - delayNonPrimaries(nodeSet, 0, 10) - - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4) - - waitForViewChange(looper, nodeSet, expectedViewNo=viewNo+1) - ensureElectionsDone(looper=looper, nodes=nodeSet) - new_m_primary_node = get_master_primary_node(list(nodeSet.nodes.values())) - assert m_primary_node.name != new_m_primary_node.name +def viewChangeDone(simulate_slow_master): + primary_node = simulate_slow_master() + assert primary_node.old.name != primary_node.new.name # noinspection PyIncorrectDocstring diff --git a/plenum/test/view_change/test_view_change_happens_post_timeout.py b/plenum/test/view_change/test_view_change_happens_post_timeout.py index 076f77028e..e2f5ceed7d 100644 --- a/plenum/test/view_change/test_view_change_happens_post_timeout.py +++ b/plenum/test/view_change/test_view_change_happens_post_timeout.py @@ -1,4 +1,8 @@ +import pytest + + +@pytest.mark.skip(reason='INDY-101. Not implemented') def test_view_change_happens_post_timeout(): # TODO: # View change should not happen unless the timeout expires - pass + raise NotImplementedError diff --git a/plenum/test/view_change/test_view_change_not_gamable.py b/plenum/test/view_change/test_view_change_not_gamable.py index f7493c724f..132094ed14 100644 --- a/plenum/test/view_change/test_view_change_not_gamable.py +++ b/plenum/test/view_change/test_view_change_not_gamable.py @@ -1,5 +1,9 @@ +import pytest + + +@pytest.mark.skip('INDY-102. Not implemented') def test_view_change_not_gamable(): # # TODO: A malicious node should not be able to disrupt a # view change by sending a message too early, this decreasing the # available time to get enough view change messages - pass + raise NotImplementedError diff --git a/plenum/test/waits.py b/plenum/test/waits.py index 51ec77f36e..7200f10a45 100644 --- a/plenum/test/waits.py +++ b/plenum/test/waits.py @@ -67,8 +67,10 @@ def expectedPoolInterconnectionTime(nodeCount): # bug (`'str' object has no attribute 'keys'`) which supposed to be # fixed in the 3pcbatch feature # https://evernym.atlassian.net/browse/SOV-995 - return interconnectionCount * nodeConnectionTimeout + \ - KITZStack.RETRY_TIMEOUT_RESTRICTED + # multiply by 2 because we need to re-create connections which can be done on a second re-try only + # (we may send pings on some of the re-tries) + return min(90, + interconnectionCount * nodeConnectionTimeout + 2 * KITZStack.RETRY_TIMEOUT_RESTRICTED + 2) def expectedPoolDisconnectionTime(nodeCount): @@ -241,7 +243,7 @@ def expectedClientConsistencyProof(nodeCount): From: the Client is connected to the Pool To: the Client finished the consistency proof procedure """ - qN = util.getQuorum(nodeCount) + qN = util.get_strong_quorum(nodeCount) return qN * __Peer2PeerRequestExchangeTime + \ config.ConsistencyProofsTimeout @@ -251,7 +253,7 @@ def expectedClientCatchupTime(nodeCount): From: the Client finished the consistency proof procedure To: the Client finished the catchup procedure """ - qN = util.getQuorum(nodeCount) + qN = util.get_strong_quorum(nodeCount) return qN * 2 * __Peer2PeerRequestExchangeTime + \ config.CatchupTransactionsTimeout @@ -261,14 +263,10 @@ def expectedClientToPoolRequestDeliveryTime(nodeCount): From: the Client send a request To: the request is delivered to f nodes """ - qN = util.getQuorum(nodeCount) + qN = util.get_strong_quorum(nodeCount) return __Peer2PeerRequestExchangeTime * qN -def expectedClientToNodeMessageDeliveryTime(nodeCount): - return 1 * nodeCount - - def expectedClientRequestPropagationTime(nodeCount): """ From: The requests are sent diff --git a/plenum/test/zstack_tests/test_zstack_reconnection.py b/plenum/test/zstack_tests/test_zstack_reconnection.py index 53181dc9eb..1a30c9d145 100644 --- a/plenum/test/zstack_tests/test_zstack_reconnection.py +++ b/plenum/test/zstack_tests/test_zstack_reconnection.py @@ -10,6 +10,9 @@ logger = getlogger() +TestRunningTimeLimitSec = 300 + + @pytest.fixture(scope="module") def tconf(conf, tdirWithPoolTxns): conf.UseZStack = True @@ -29,7 +32,6 @@ def checkNodesSendingCommits(nodeSet): assert otherReplica.name in senders -@pytest.mark.skip(reason='SOV-1020') def testZStackNodeReconnection(tconf, looper, txnPoolNodeSet, client1, wallet1, tdirWithPoolTxns, client1Connected): sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1) @@ -53,7 +55,7 @@ def checkFlakyConnected(conn=True): looper.runFor(1) stopNodes([nodeToCrash], looper) # TODO Select or create the timeout from 'waits'. Don't use constant. - looper.run(eventually(checkFlakyConnected, False, retryWait=1, timeout=35)) + looper.run(eventually(checkFlakyConnected, False, retryWait=1, timeout=60)) looper.runFor(1) node = TestNode(nodeToCrash.name, basedirpath=tdirWithPoolTxns, config=tconf, ha=nodeToCrash.nodestack.ha, cliha=nodeToCrash.clientstack.ha) diff --git a/setup.py b/setup.py index 3adf664df0..4129f76f53 100644 --- a/setup.py +++ b/setup.py @@ -58,7 +58,7 @@ data_files=[( (BASE_DIR, ['data/pool_transactions_sandbox', ]) )], - install_requires=['ledger==0.2.14', 'stp==0.1.9', + install_requires=['ledger==0.2.14', 'stp==0.1.10', 'state-trie==0.1.1', 'jsonpickle', 'prompt_toolkit==0.57', 'pygments', 'ioflo==1.5.4', 'semver', 'base58', 'orderedset', From c5442af627c710d033a5ac41da8994d7efc1f1a9 Mon Sep 17 00:00:00 2001 From: Jason Law Date: Mon, 5 Jun 2017 12:42:46 -0600 Subject: [PATCH 010/100] resolved problems introduced from earlier merge Signed-off-by: Jason Law Sign-off-executed-by: toktar Approved-at: h-master --- plenum/server/models.py | 3 +- plenum/server/node.py | 32 ------------ plenum/server/propagator.py | 5 -- plenum/server/replica.py | 42 +--------------- .../test_no_forwarding_without_election.py | 50 ------------------- plenum/test/test_log_rotation.py | 4 -- 6 files changed, 3 insertions(+), 133 deletions(-) delete mode 100644 plenum/test/node_request/test_no_forwarding_without_election.py diff --git a/plenum/server/models.py b/plenum/server/models.py index 8d3582b431..a3fe87cf18 100644 --- a/plenum/server/models.py +++ b/plenum/server/models.py @@ -11,8 +11,7 @@ InsChgVotes = NamedTuple("InsChg", [ ("viewNo", int), - ("voters", Set[str]), - ('last_ordered', Dict[str, Dict[int, int]])]) + ("voters", Set[str])]) class TrackedMsgs(dict): diff --git a/plenum/server/node.py b/plenum/server/node.py index c06d8b2d66..328eddf707 100644 --- a/plenum/server/node.py +++ b/plenum/server/node.py @@ -328,15 +328,6 @@ def __init__(self, # help in voting for/against a view change. self.lost_primary_at = None - self._primary_replica_no = None - - # Need to keep track of the time when lost connection with primary, - # help in voting for/against a view change. - self.lost_primary_at = None - - # First view change message received for a view no - self.view_change_started_at = {} - tp = loadPlugins(self.basedirpath) logger.debug("total plugins loaded in node: {}".format(tp)) # TODO: this is already happening in `start`, why here then? @@ -415,21 +406,6 @@ def domainLedgerStatus(self): return LedgerStatus(DOMAIN_LEDGER_ID, self.domainLedger.size, self.domainLedger.root_hash) - def getLedgerRootHash(self, ledgerId, isCommitted=True): - ledgerInfo = self.ledgerManager.getLedgerInfoByType(ledgerId) - if not ledgerInfo: - raise RuntimeError('Ledger with id {} does not exist') - ledger = ledgerInfo.ledger - if isCommitted: - return ledger.root_hash - return ledger.uncommittedRootHash or ledger.root_hash - - def stateRootHash(self, ledgerId, isCommitted=True): - state = self.states.get(ledgerId) - if not state: - raise RuntimeError('State with id {} does not exist') - return state.committedHeadHash if isCommitted else state.headHash - @property def ledger_ids(self): return [POOL_LEDGER_ID, DOMAIN_LEDGER_ID] @@ -1726,14 +1702,6 @@ def do_view_change_if_possible(self, view_no): logger.debug(msg) return r - def do_view_change_if_possible(self, view_no): - if self.canViewChange(view_no): - logger.info("{} initiating a view change to {} from {}". - format(self, view_no, self.viewNo)) - self.startViewChange(view_no) - return True - return False - def checkPerformance(self): """ Check if master instance is slow and send an instance change request. diff --git a/plenum/server/propagator.py b/plenum/server/propagator.py index de92313cb3..c14d22c6b8 100644 --- a/plenum/server/propagator.py +++ b/plenum/server/propagator.py @@ -118,11 +118,6 @@ def digest(self, reqKey: Tuple) -> str: class Propagator: def __init__(self): self.requests = Requests() - # If the node does not have any primary and at least one protocol - # instance is missing a primary then add the request in - # `reqs_stashed_for_primary`. Note that this does not prevent the - # request from being processed as its marked as finalised - self.reqs_stashed_for_primary = deque() # noinspection PyUnresolvedReferences def propagate(self, request: Request, clientName): diff --git a/plenum/server/replica.py b/plenum/server/replica.py index c33337a87e..f8570c9be8 100644 --- a/plenum/server/replica.py +++ b/plenum/server/replica.py @@ -18,7 +18,6 @@ from plenum.common.exceptions import SuspiciousNode, \ InvalidClientMessageException, UnknownIdentifier from plenum.common.signing import serialize -from plenum.common.txn_util import reqToTxn from plenum.common.types import PrePrepare, \ Prepare, Commit, Ordered, ThreePhaseMsg, ThreePhaseKey, ThreePCState, \ CheckpointState, Checkpoint, Reject, f, InstanceChange @@ -257,42 +256,6 @@ def txnRootHash(self, ledgerId, toHex=True): root = hexlify(root).decode() return root - def stateRootHash(self, ledgerId, toHex=True): - if not self.isMaster: - return None - root = self.node.getState(ledgerId).headHash - if toHex: - root = hexlify(root).decode() - return root - - # Queues used in PRE-PREPARE for each ledger, - self.requestQueues = {} # type: Dict[int, deque] - for ledgerId in self.node.ledgerManager.ledgerRegistry: - self.requestQueues[ledgerId] = deque() - - # Batches with key as ppSeqNo of batch and value as a tuple of number - # of txns and the time as batch was created/received and the state root - # hash for the batch - self.batches = OrderedDict() # type: OrderedDict[int, Tuple[int, float, bytes]] - - # TODO: Need to have a timer for each ledger - self.lastBatchCreated = time.perf_counter() - - self.lastOrderedPPSeqNo = 0 - - def txnRootHash(self, ledgerId, toHex=True): - if not self.isMaster: - return None - ledger = self.node.getLedger(ledgerId) - h = ledger.uncommittedRootHash - # If no uncommittedHash since this is the beginning of the tree - # or no transactions affecting the ledger were made after the - # last changes were committed - root = h if h else ledger.tree.root_hash - if toHex: - root = hexlify(root).decode() - return root - def stateRootHash(self, ledgerId, toHex=True): if not self.isMaster: return None @@ -935,7 +898,7 @@ def canProcessPrePrepare(self, pp: PrePrepare, sender: str) -> bool: raise SuspiciousNode(sender, Suspicions.PPR_FRM_NON_PRIMARY, pp) # A PRE-PREPARE is being sent to primary - if self.isPrimary is True: + if self.isPrimaryForMsg(pp) is True: raise SuspiciousNode(sender, Suspicions.PPR_TO_PRIMARY, pp) # A PRE-PREPARE is sent that has already been received @@ -1003,8 +966,7 @@ def validatePrepare(self, prepare: Prepare, sender: str) -> bool: :return: True if PREPARE is valid, False otherwise """ key = (prepare.viewNo, prepare.ppSeqNo) - # primaryStatus = self.isPrimaryForMsg(prepare) - primaryStatus = self.isPrimary + primaryStatus = self.isPrimaryForMsg(prepare) ppReq = self.getPrePrepare(*key) diff --git a/plenum/test/node_request/test_no_forwarding_without_election.py b/plenum/test/node_request/test_no_forwarding_without_election.py deleted file mode 100644 index 9ebd603dea..0000000000 --- a/plenum/test/node_request/test_no_forwarding_without_election.py +++ /dev/null @@ -1,50 +0,0 @@ -from plenum.test import waits -from plenum.test.delayers import nom_delay -from plenum.test.helper import sendRandomRequests, \ - waitForSufficientRepliesForRequests, sendReqsToNodesAndVerifySuffReplies -from plenum.test.pool_transactions.conftest import looper, clientAndWallet1, \ - client1, wallet1, client1Connected -from plenum.test.test_node import ensureElectionsDone -from plenum.test.view_change.helper import ensure_view_change -from stp_core.loop.eventually import eventually - - -def test_node_stashes_requests_if_no_primary(looper, txnPoolNodeSet, client1, - wallet1, client1Connected): - """ - Node stashes requests while no primary is present, but once primary is - determined, the stashed requests are processed - """ - def chk_stashed(stashed): - for node in txnPoolNodeSet: - assert (len(node.reqs_stashed_for_primary) == 0) != stashed - - # No requests are stashed before and after sending any requests - chk_stashed(False) - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 2) - chk_stashed(False) - - delay = 3 - for node in txnPoolNodeSet: - node.nodeIbStasher.delay(nom_delay(delay)) - - # Ensure view change and soon as view starts, send requests - ensure_view_change(looper, txnPoolNodeSet, client1, wallet1) - - reqs = sendRandomRequests(wallet1, client1, 2) - - # The above requests must be stashed - looper.run(eventually(chk_stashed, True, retryWait=.1, - timeout=3)) - - # The elections must complete for the new view, though the election would - # take longer since nominates are delayed. The calculation below is approx. - timeout = waits.expectedPoolElectionTimeout(len(txnPoolNodeSet)) + \ - delay*(len(txnPoolNodeSet)) - ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=timeout) - - # The requests should be successful - waitForSufficientRepliesForRequests(looper, client1, requests=reqs) - - # No requests should be stashed in propagator. - chk_stashed(False) diff --git a/plenum/test/test_log_rotation.py b/plenum/test/test_log_rotation.py index c2585b1639..4fd7603555 100644 --- a/plenum/test/test_log_rotation.py +++ b/plenum/test/test_log_rotation.py @@ -6,7 +6,6 @@ import TimeAndSizeRotatingFileHandler - def cleanFolder(path): if os.path.exists(path): shutil.rmtree(path, ignore_errors=True) @@ -14,7 +13,6 @@ def cleanFolder(path): return path -@pytest.mark.skip(reason="SOV-950") def test_time_log_rotation(): logDirPath = cleanFolder("/tmp/plenum/test_time_log_rotation") logFile = os.path.join(logDirPath, "log") @@ -29,7 +27,6 @@ def test_time_log_rotation(): assert len(os.listdir(logDirPath)) == 4 # initial + 3 new -@pytest.mark.skip(reason="SOV-950") def test_size_log_rotation(): logDirPath = cleanFolder("/tmp/plenum/test_size_log_rotation") logFile = os.path.join(logDirPath, "log") @@ -46,7 +43,6 @@ def test_size_log_rotation(): assert len(os.listdir(logDirPath)) == 5 -@pytest.mark.skip(reason="SOV-950") def test_time_and_size_log_rotation(): logDirPath = cleanFolder("/tmp/plenum/test_time_and_size_log_rotation") logFile = os.path.join(logDirPath, "log") From 179bc685bed3fdd274761cd904ea3c6328198308 Mon Sep 17 00:00:00 2001 From: Andrei Goncharov Date: Thu, 15 Jun 2017 13:59:16 +0300 Subject: [PATCH 011/100] Stable (#218) * rename validateReq to validate * add config=None argument to validate method to make sub and super signatures match * rename applyReq to apply * add super initialization, update usage of super methods * update docstring of commit * remove commitReq since it does the same as commit * adding timeout to test utility method * changes for load testing * sanitising log messages * Versioning fix (#113) * changes for request handlers * [Closes SOV-905] Big Fixed: Orientdb is not installing with the latest build 0.3.16 * changing to rlp encoding * moving error codes to stp * upgrading version of stp * req_handler changes * fixing import * temp commit * Stp (#116) * use common stp interface for changing of node's HA * get rid of explicitly calling zmq and raet; move all transport stuff into stp interfaces. * fix tests; increment stp version * add AuthMode enum instead of auto parameter * fixed testConnectWithoutKeySharingFails test * increased test timeout * minor changes to serialization * Plenum test runner fix (#115) * Fixed a bug in plenum test runner with an incorrect test results summary pattern. Previously it had been expected that the line with the test results summary must begin with equality signs and end with them. But this line is padded with equality signs only if it is shorter than 80 characters. Now the test results summary patterns don't require equality signs. Instead they may match only the last line in the file. * Corrected test results summary patterns in plenum test runner. * updating apply of request handler * fix import of OP_FIELD_NAME * up version of stp to 0.1.24 * Agent issuer wallet restoration (#117) * supplied keys parameter as True to wallet encoding and decoding functions so that dictionary keys which are custom objects restore properly, otherwise it used to be restored as string oppossed to actual object it was before persistence * minor changes in list keyrings to show issuer wallet for agent as well * minor change to print base wallet first and then any other related wallets * up stp version to 0.1.26 * skip tests in test_new_node_catchup.py * scheduling primary selection * Skip failing tests (#122) * skeip test testChangeNodeHaForNonPrimary due to SOV-941 * skip test testProtocolInstanceCannotBecomeActiveWithLessThanFourServers due to SOV-940 * leveldb HashStore * parametrize test timeouts (#120) * rename expectedWait to expectedNodeInterconnectionTime * add signature for other time expect functions * use named timeouts in conftest * move totalConnections from waits to utils * use named timeout in ensureElectionsDone * use float instead of int for seconds * use default args of ensureElectionsDone where it is possible * use named argument for timeouts * use named timeout in helper * use names for parameters * rename 'timeoutPerReq' of checkSufficientRepliesForRequests and sendReqsToNodesAndVerifySuffReplies to 'customTimeoutPerRequest' to emphasize the fact that there is a default one * use default timeout instead of custom timeout of checkSufficientRepliesForRequests and sendReqsToNodesAndVerifySuffReplies where it can be used; use named argument for timeouts * add comments for two functions with similar names * rename checkSufficientRepliesRecvd to checkSufficientRepliesReceived * rename checkSufficientRepliesForRequests to waitForSufficientRepliesForRequests * add 'returns' to docstrings * fix passing of args to waits.expectedElectionTimeout * use waitForSufficientRepliesForRequests and it's default timeout instead of checkSufficientRepliesReceived everywhere it is possible * update doc lines * create waitForViewChange and use it in ensureView * replace usages of checkViewNoForNodes with eventually by waitForViewChange * add waits.expectedNodeToNodeMessageDeliveryTime and use it in sendMsgAndCheck * rename checkMsg to checkMessageReceived * rename sendMsgAndCheck to sendMessageAndCheckDelivery * add docstring to sendMessageAndCheckDelivery * remove unused helper function * add expectedGetReadyTimeout and use it in checkPoolReady * rename overrideTimeout parameter to customTimeout in checkNodesConnected * use default timeout of checkNodesConnected * create expectedPoolLedgerCheck and expectedClientConnectionTimeout waits and use them * add todo for ensureDisconnectedToNodes * update waits.expectedPoolLedgerCheck * add todo for checkNodesParticipating * add requestIds parameter for waitForSufficientRepliesForRequests * update docstring of waitForSufficientRepliesForRequests * fix waitForSufficientRepliesForRequests * remove unused imports from test_log_filtering * use named timeout in test_status_command * use waits.expectedTransactionExecutionTime in testTransactions * refactor testTransactions * use waitRequestSuccess and waitBalanceChange in test_cli_with_auction_plugin, move them to test.cli.helper * use named timeout in test_basic_client_commands * use named timeout in helper.checkRequest * create waitClientConnected and use it instead of checkClientConnected with eventually * add docstrings * rename checkNodeStarted to waitNodeStarted and use named timeout 'expectedNodeStartUpTimeout' in it * rename expectedGetReadyTimeout to expectedPoolGetReadyTimeout * rename checkAllNodesStarted to waitAllNodesStarted * fix default value of customTimeout of checkPoolReady * create waitAllNodesUp and use it instead of checkAllNodesUp with eventually * create waitReplyCount and use instead of checkReplyCount and eventually * use named timeouts in test_client * use named timeouts in some more tests * add basic implementation for waits.expectedCatchupTime and use it * make expectedCatchupTime get custom ConsistencyProofsTimeout as parameter * use named timeout in testNodeDiscardMessageFromUnknownView * use waits.expectedElectionTimeout and rename timeout arg to custonTimeout in checkEveryNodeHasAtMostOnePrimary * rename timeout argument of plenum/test/node_catchup/test_discard_view_no.py to customTimeout and make it used named timeouts from waits as default * update timeouts in testNodeDiscardMessageFromUnknownView * create waits.expectedRequestStashingTime and use it * add TODO to test_catchup_delayed_nodes * create waitNodeLedgersEquality and use it instead of directo usage of checkNodeLedgersEquality * use waits.expectedPoolLedgerCheck in waitNodeLedgersEquality * use named timeout in testOrderingCase2 * add waits.expectedNominationTimeout and use it * use named timeout in some more tests * add missing empty lines * update waits * add 'TODO[slow-factor]' * update timeouts in the tests * fix testTestNodeDelay and missed import * skip testMultipleRequests test * skip testStatusAfterClientAdded test * fix testInstChangeWithLowerRatioThanDelta * fix test_new_node_catchup.py * fix testChangeHaPersistsPostNodesRestart * fix testAdd2NewNodes * increase expectedElectionTimeout timeout * rename logger.warn => logger.warning * tune timeouts in the tests * make sections in waits.py * add --repat for runner.py * increase expectedCatchupTime timeout * improve runner.py * tune the test timeouts * skip some catchup tests * parametrize test timeouts * rm eventually.py * fix testQueueingReqFromFutureView, testNumOfPrePrepareWithFPlusOneFaults, testNumOfPrepareWithFPlusOneFaults * fix testProtocolInstanceCannotBecomeActiveWithLessThanFourServers * tune propagate and preprepare test timeouts * skip testNumOf*WithFPlusOneFaults * removing commented code and fixing bugs in pool request handler * lowering Delta in pool_transactions conftest * fix jenkins build (#123) * fixed deps in setup.py * added pypi publishing * updated deps * updated deps stp-dev -> stp-perf-imp * removed unnecessary sources * updated setup.py to hold correct deps names and self name for publishing * fixed plenum for the latest stp (#127) * fixed plenum for the latest stp * increment stp version * archive runner.py output for all agents (#107) * archive runner.py results * using env variable NODE_NAME instaed of func param for artifacts * configured archiveArtifacts to allow empty/no archive * sending last pre-prepare sequence number in consistency proof so already processed request can be ignored, re-enabling tests and re-adding proper timeouts * Feature Added: Deb build * Feature Added: Deb build * Do view change if a primary is disconnected (#128) * check whether function is a partial function in getCallableName * add tests for view change when primary goes down * start view change if primary went offline * use startViewChangeIfPrimaryWentOffline instead of doElectionIfNeeded * processing stashed ordered requests after all ledgers have caught up and applying any request if mode changed while processing and removing obsolete code from pool manager * Unclosed file warnings (#124) * Removed "ignore" rule for ResourceWarning about an unclosed file from the warnings filter. * Fixed some causes of ResourceWarnings about unclosed files. * - Fixed some causes of ResourceWarnings about unclosed files. - Corrected plenum.common.txn_util.updateGenesisPoolTxnFile function. * - Fixed the rest causes of ResourceWarnings about unclosed files. - Removed TimeAndSizeRotatingFileHandler class which is not used anymore (the class with the same name from stp is now used instead). * Updated stp-dev dependency to the new version. * Reverted update of stp-dev dependency. * Skipped the tests in plenum.test.test_log_rotation module since they require stp-dev dependency of a newer version (0.1.28 or higher). * refactoring, documentation and updating setup to get rid of pyorient * refactoring * Updated ledger dep * deprecating orientdb * check state equality too while checking ledger equality in tests * removing obsolete test helper method * Agent generalization (#126) * refactored test_network_setup; added some role-base helper classes for members of the network * fixed issue with wrong import and minor refactoring * more work on tests * bugfixes * interim changes during refactoring * adding init_utils * cleaning up before keys * removed type hint causing issues; this issue is fixed in Python 3.5.3 with a new typing.Coroutine * added a check in DidVerifier to check that we have a verkey * fixed unreliable tmpdir test fixture * modifications to tests to make more robust when running from command line * changed spyable to be able to monkey patch a single method * Added a run_till_quiet, waits for quiet prods This function will wait till there is a quiet period from the prodables. Dependent on prodables correctly reporting events. Will run forever if there is always activity. * small tweek to log format to keep | alined * changes to fix some issues happening in end-to-end test * using same seed for creating keys * ignore log files * added a small test for verifier * removed some excpetion class that were moved to STP * init Local keys for client in test network * Add default values for role and verkey in state and fix test timeouts * refactor * import logger * updated to use new API from shared lib * updated to use new API from shared lib * handle pyorient import * handle pyorient import * increment stp version * Enabled the tests in plenum.test.test_log_rotation module. (#135) * increment stp version * close files in tests * check for close method before calling it (#136) * minor fix in batch creation logic and caching primary replica no in node to avoid recomputation * fix testNumOf*WithFPlusOneFaults tests * Updated stp dep * Merge branches '3pc-batch' and 'master' of github.com:evernym/plenum into 3pc-batch # Conflicts: # plenum/common/test_network_setup.py # plenum/server/node.py # plenum/test/test_node.py # setup.py * add missing __init__ for member package * added missed __init__.py for common/member * skip temporarily a failing test * fix primary disconnection and new primary not being same as old for master instance * add more tests for primary disconnection, same primary not elected in the next view and refactor * add new test for minor network glitch with primary * add missing parameter to bootstrap test network (#143) * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * minor change in a test and removing sip statements * fix the catchup tests (#140) * make tests a little robust * fix test, check for disconnections after processing sent and received messages and little documentation * Close SOV-976 (#144) * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * [Closes SOV-976] Unable to create the genesis transaction files * Merge branches '3pc-batch' and 'master' of github.com:evernym/plenum into 3pc-batch # Conflicts: # plenum/test/node_catchup/test_new_node_catchup.py # setup.py * up stp to 0.1.42 (#148) * refactor, enable test, adding timeout for view change and create remotes as soon as nodestack starts * fix broken test * [Closes SOV-981] Bug Fixed: Unable to create pool transaction file on the client machine without passing it a node number (#149) * increment stp * increment ledger * up stp to 0.1.14 * move state and kv storage to 'state' repo * get rid of orientdb code * fix tests * fix generation of trustee txns * fix genesis txns for trustee * fix generation of trustee txns (#151) * remove obsolete code * client to attempt establishing connections with nodes on start * refactor monitor.isMasterThroughputTooLow * update method for forcing view change cna checking it; intoduce wait method for it * use provoke_and_wait_for_view_change instead of eventually and check * remove unused dependency and skip tests - removed unused dependencies - minor fixes - skip tests that fail in master too (we have separate tickets for this) * increment ledger * increment ledger * enable debug logging in tests * using reverted changes in state interface * increment state version * Removed redundant copying of the file "pool_transactions_sandbox" to the directory "~/.plenum". (It is already done using "data_files" argument of "setup" function call.) (#152) * speed up tests (#147) * speed up test_nodes_with_pool_txns.py tests * reduce the timeout in checkNodesConnected * speed up the pool_transactions test * fix the timeout in test_delay.py * fix the timeout in test_primary_election_case5.py * speed up monitoring tests * revert changes in test_instance_change_with_Delta.py * each test's running time is limited by TestRunningTimeLimitSec * increase pool interconnection timeout on zstack retry timeout * fix timeout in test_delay.py * fix tests, make them faster * speedup the tests, tidy test timeouts * add a timeout into waits * bump stp-dev==0.1.43 * Unskip testProtocolInstanceCannotBecomeActiveWithLessThanFourServers since it works well (#153) * unskip testProtocolInstanceCannotBecomeActiveWithLessThanFourServers since it works well * decrease number of nodes in test_instance_cannot_become_active_with_less_than_four_servers to 13 * fix change back HA, SOV-908 (#154) * Fix testChangeNodeHaForNonPrimary (#157) * unskip test_change_non_primary_node_ha * fix waits name - it was renamed * verify genesis ledgers only if such option set in config * use primaryName instead of isPrimary to check that election is done * add numOfReelections parameter to waits.expectedPoolElectionTimeout * refactor test_node.py * set custom election timeout (numOfReelections=3) in changeNodeHa * remove debug prints * check VerKey is base58 for NODE txn, SOV-988 (#158) * client using a method which makes the code more testable, update to an old test with more checks and removing async from a method where it was not needed * Fix test testChangeNodeHaForPrimary (#160) * unskip testChangeNodeHaForPrimary * simplify conditional in changeNodeHa * node which is going down should not participate in a view change * change formating in testChangeNodeHaForPrimary * refactor to make method overridable and adding an argument to a test function * updated pypi package name for state repo and its version (#159) * Squashed commit of the following: create LedgerInfo class and replace collections of LedgerManager by fields * rename LedgerManager.ledgers to ledgerRegistry * fix usages of LedgerManager.ledgerRegistry * use correct attribute and renamed method * bring change from master and unskip a test * use correct attribute and set state to be committed when received from catchup * formatting * up ledger-3pc-batch version to 0.2.16 * up stp-3pc-batch version to 0.1.15 * improve the check of the arguments in "generate_sovrin_pool_transactions" (#162) * check client input for the NODE txn (#161) * fix testInstChangeWithLowerRatioThanDelta - decrease number of messages in a batch * decrease timeout to fix provoke_and_wait_for_view_change * fixing test * Fix post 3pc-batch merge (#163) * remove development options from Jenkinsfile * skip some broken tests * Update versions and skip windows build steps (#164) * switch off windows related build steps * update versions of ledger and stp * fix name * up ledger and stp version * skip test testNodeRequestingTxns * using correct timeout in tests * move some enums to correct location, using ledger's string encoding methods and test for stashing requests * bump dependency version * fix timeout in tests * make order of ledger sync customizable, add tranformer for transactions before adding to ledger, update seqno map db for transactions from catchup and update tests to check this * temporarily change config variable * fix regression where node starts catchup process if sent a consistency proff * bumping dependency version * bumping dependency version * bumping dependency version * Fix redundant reconnections and refactor (#165) * refactor retryForExpected * use maintainConnections in retryForExpected instead of connect * refactor resendRequests * up stp version to 0.1.49 * replace CLIENT_REQACK_TIMEOUT by CLIENT_REPLY_TIMEOUT when calling _filterExpected for REPLYs * up ledger to 0.2.19 * Add new helper-methods for conversion base58 to hex (#166) * increment state-trie and ledger versions (#168) * Make changes to support ChunkedFileStore (#167) * update _defaultStore of Ledger to make it correspond to original one * remove initialization of domain ledger - now it is done by ledger and file store it uses * replace manual copying of transaction files by usage of defaultFile * increase timeout for testNodeRequestingTxns * skip test testInstChangeWithLowerRatioThanDelta because it fails intermittently * up ledger version to 0.2.28 * Bump dependency version * [Closes SOV-980] Bug Fixed: A node should start catchup process if it realises that it has lagged behind the other node (#172) * [Closes SOV-980] Bug Fixed: A node should start catchup process if it realises that it has lagged behind the other node * Fixed failing tests * Fixed unsed imports * Update conftest.py * Increased test time global limit * Reverted timeouts * Added logs * Fixed filtering CPs * Fixed filtering CPs * Fixed filtering CPs * Input validation (#170) * Input sanitization: Add base logic and test cases * Input sanitization: add NODE and NYM txn, fixes * Input sanitization: implement node and client messages * roll away node to node validation * role field is option for NYM txn * fix tests * fixes for sovrin-node * implement validation for merkle root hash * uncomment new ConsistencyProof message implementation * add "nullable" property for FieldBase * fix usage of alphabet * add JsonField * add validation on message level * use hash size range instead of one value; use base58.alphabet instead of own list * fix usage hasSizes; made error comment more verbose * uncomment new implementation LedgerStatus message * uncomment new implementation for Prepare message and fix fields types * roll away node to node checkes * uncomment some new message implementations * use HexField instead of MerkleRootHashField for state_root and rxn_root in PrePrepare * make message about hash length in MerkleRootField more verbose * limit timestamp field by 253402290000.0 * modify randomString to remove length limitation * add tests for merkle root field validation * make randomString work not only with indexable containers * refactor tests for merkle root * fix timestamp validation * add test for timestamp validation * update validation of timestamp * check each node connected * improve log messages * fix testAdd2NewNodes * Improvement of validation rules (#173) * fix Node txn, INDY-9 * add tests for the Node operation * add checking that number of init parameters of message is equal to number of fields declared in schema * use OrderedDict in __dict__ * add test for merkle, fix logic * use OrderedDict for _fields and update all realted methods * - Provided all the disabled tests with the ticket references. (#176) - Enabled the tests testChangeNodeHaForPrimary and testChangeNodeHaForNonPrimary on non-Windows platforms. - Commented out parametrizers at the disabled tests in plenum.test.input_validation.test_common_checks module. * more removal of op to the top since it can affect checking of a number of args * Make SERVICES field required for add NODE txn * Added another test scenario for catchup (#178) * override items, keys, values methods of Mapping in MessageBase to make it compatible * Added another test scenario for catchup fix * clean MessageBase * INDY-73: Removed a commented out @skipif decorator (#177) * Removed a commented out @skipif decorator. * Updated a ticket reference at a disabled test. * Add base58 str class; destnode; destnym; * use - as default ppSeqNo instead of -1 * overtride __ftr__ for MessageBase * comment checking of presence of SERVICE field * move alphabet validation from _specific_validation to avoid accidental overriding of it * INDY-73: Added ticket references to test stubs (#180) * Removed a commented out @skipif decorator. * Updated a ticket reference at a disabled test. * - Added ticket references to the not implemented tests which had not contained them yet. - Uncommented testStopScriptIfNodeIsRunning and disabled it since it fails. * Added raising NotImplementedError to not implemented tests. * Move all specific checks to from validate method * Eliminate multiple inheritance * Enabled and updated testNodesConnectsWhenOneNodeIsLate (#181) * Enabled and updated testNodesConnectsWhenOneNodeIsLate * updated assertions logic * INDY-109 (#184) * updates to test helpers and change in forwarding requests logic (#187) * Use secure random number generator for randomString() Current usage of random.choice() is not secure so replaced that with libsodium provided randombytes_uniform() which is secure and also available on all platforms where libsodium is available * Fixes for INDY-71 (#185) * Refactor check_endpoint_valid func * Fix validation, INDY-71 * Fix imports * Do not connect to a new, non-active node * Remove confusing comments * Minor refactor * Fix test testAddInactiveNodeThenActivate * Validation for view change and 3pc messages (#193) * add exception for type error and raise it in MessageValidator * make error message in ChooseField more verbose * uncomment new implementations of Reelection, Primary and InstanceChange messages * update and fix testInstanceChangeMsgTypeChecking * fix testDiscardInstChngMsgFrmPastView * use NonNegativeField instead of TimestampFied inTieAmountField for its seconds part * make name pf test for merkle root field more standard * move tests for fields to own package * add missing empty line * remove ununsed imports * refactor test for timestamp * remove todo about code duplication since it is not valid anymore * add tests for some field types * make IterableField check type of inner field and that it is not none * add test for hex field * add test for ledger id * add test for request id field * override __eq__ for MessageBase * use NonNegativeNumberField instead of TimestampField in RequestIdentifierField * use IdentifierField instead of NonEmptyStringField in RequestIdentifierField * update test for request identifier * add test for IdentifierField * update test for RequestIdentifier field * implement validation for IdentifierField and reimplement DestNodeField using it * uncomment new implementations for Propagate, Prepare, Commit, Ordered * Refactor base58 check; add Verkey check; dest node; dest nym; merkelroot; * move test for base58 field to field validation package * remove unused import * add checking of max length hash to merkle tree root field test * update test for identifier field * update test for request identifier field * fix formatting * fix checking of zero * remove unused commented line * rename test_non_negative_number to test_positive_number * Make verkey validator accept empty strings * Add super's __init__ params to child calsses * Improve getMatchedHelpableMsg logic (#195) * Use original batch parameters in all tests except the ones that use batches and use alternate of eventuallyAll to check a collection of functions under a timeout (#190) * updates to test helpers and change in forwarding requests logic * overriding batch params in tests * use correct timeouts in tests * accounting for nomination delay * Disable view change (#196) * repaired calls to deprecated methods * Added unsafe 'disable_view_change' for testing - Added 'unsafe' attribute to default config. "unsafe" is a set of attributes that can set certain behaviors that are not safe, for example, 'disable_view_change' disables view changes from happening. This might be useful in testing scenarios, but never in a live network. - Added 'unsafe' behavior 'disable_view_change' which causes Monitor to always return false when asked if master instance performance is degraded. - Refactored view change fixture so it could be reused to test new 'disable_view_change' behavior. * fixed import for missed fixture in test * Fix validation types (#197) * Enable and update testNodeDiscardMessageFromUnknownView (#194) * added equality method for MessageBase * added ability to cancel created delayers * enabled and updated testNodeDiscardMessageFromUnknownView * updated test to recent changes in node logic * reverted recent changes not necessary for now * Catch up demoted (#201) * add tests for the issues with catch-up after demotion/disconnection * updates replica's last_ordered_pp_seq_no after catch-up * disconnect a demoted node * Fixes for dead catchup (#202) * add tests for the issues with catch-up after demotion/disconnection * updates replica's last_ordered_pp_seq_no after catch-up * disconnect a demoted node * up stp version to 0.1.56 * up stp version to 0.1.57 * fixed tests * fix test * Disable the test: INDY-147. * Make random string generation efficient The current algorithm requires calling the libsodium random function 'size' times where size is the expected length of random string Changed this so that this will require at the most '(size/64)' number of calls. Also we do not need the chars list now we always use only alphabets and digits available in hex this makes it very easy and still gives us what we need. Every instance of usage of this function has been checked and nowhere the 'chars' parameter is used * Change to pass exact size to libsodium randombytes() function This probably is cryptographically more accurate * Fix catchup when commit (#203) * remove 3PC messages that are already ordered during catch-up * Added more tests * fix catch-up in between 3pc msgs * fix tests * update validation rule for verkey (#208) * fix an incorrect test (#207) * fix an incorrect test * refactoring test * remove un-necessary parameter (#210) * Revert unordered batches before starting catch-up (not after) (#209) * Revert unordered batches before starting catch-up (not after) * add test for revert of the state before catchup * improve the test * Do not enqueue old pre-prepares (#211) * ignore pp requests with ppSeqNo less than expected * minor refactor * Optimisations and a testing util (#212) * add todo, fixing log levels, commenting out an expensive area of code and adding capability to diable monitor in tests * an optimisation avoiding re-calculation of merkle proof * remove commented code and add test * update names * add test fils for validators * add test for commit message * add test for ConsistencyProof message * update test for Commit message * add test for InstanceChange message * add test for LedgerStatus message * add test for nomination message * add test for Ordered message * add tests for Prepare and PrePrepare messages * add test for Primary message * add test for PROPAGATE message * add test for Reelection message * remove unused imports * Election bug fix (#215) * Election bug fix if `f` nodes do not send Primary and the node is the last one to send Primary, then it was not able to decide the primary, since a node was doing "primary setting" only on receiving a Primary message, changed that to try "setting primary" after sending Primary too * update test with more checks Signed-off-by: Andrei Goncharov Sign-off-executed-by: toktar Approved-at: h-master --- README.md | 2 +- plenum/common/ledger.py | 5 +- plenum/common/ledger_manager.py | 16 ++- plenum/common/messages/fields.py | 8 +- plenum/common/signer_did.py | 7 +- plenum/common/stack_manager.py | 6 +- plenum/common/util.py | 39 +++--- plenum/persistence/client_txn_log.py | 4 + plenum/persistence/util.py | 5 + plenum/server/monitor.py | 4 + plenum/server/node.py | 40 +++--- plenum/server/pool_manager.py | 9 +- plenum/server/primary_elector.py | 123 +++++++++--------- plenum/server/propagator.py | 2 +- plenum/server/replica.py | 75 ++++++----- plenum/server/req_handler.py | 2 +- plenum/test/batching_3pc/catch-up/__init__.py | 0 .../test_3pc_paused_during_catch_up.py | 20 +++ .../catch-up/test_catchup_during_3pc.py | 30 +++++ ...est_catchup_during_3pc_continue_working.py | 36 +++++ .../test_state_reverted_before_catchup.py | 70 ++++++++++ plenum/test/batching_3pc/helper.py | 97 +++++++++++++- .../checkpoints/test_stable_checkpoint.py | 9 +- plenum/test/common/test_random_string.py | 39 ++++++ plenum/test/helper.py | 54 +++++--- plenum/test/input_validation/helper.py | 6 +- .../message_validation/__init__.py | 0 .../message_validation/test_commit_message.py | 26 ++++ .../test_consistencyproof_message.py | 30 +++++ .../test_instanceChange_message.py | 25 ++++ .../test_ledgerstatus_message.py | 27 ++++ .../test_nomination_message.py | 28 ++++ .../test_ordered_message.py | 32 +++++ .../test_prepare_message.py | 31 +++++ .../test_preprepare_message.py | 35 +++++ .../test_primary_message.py | 28 ++++ .../test_propagate_message.py | 25 ++++ .../test_reelection_message.py | 28 ++++ ...come_active_with_less_than_four_servers.py | 3 + .../node_catchup/test_new_node_catchup.py | 18 --- .../node_catchup/test_new_node_catchup2.py | 80 ++++++++++++ .../test_node_catchup_after_disconnect.py | 2 +- ...test_node_catchup_after_lost_connection.py | 10 +- .../test_already_processed_request.py | 82 ++++++++++++ ...re_prepare_pp_seq_no_less_than_expected.py | 27 ++++ .../test_primary_election_case6.py | 64 +++++++++ plenum/test/test_node.py | 11 +- plenum/test/waits.py | 2 +- setup.py | 2 +- 49 files changed, 1118 insertions(+), 206 deletions(-) create mode 100644 plenum/test/batching_3pc/catch-up/__init__.py create mode 100644 plenum/test/batching_3pc/catch-up/test_3pc_paused_during_catch_up.py create mode 100644 plenum/test/batching_3pc/catch-up/test_catchup_during_3pc.py create mode 100644 plenum/test/batching_3pc/catch-up/test_catchup_during_3pc_continue_working.py create mode 100644 plenum/test/batching_3pc/catch-up/test_state_reverted_before_catchup.py create mode 100644 plenum/test/common/test_random_string.py create mode 100644 plenum/test/input_validation/message_validation/__init__.py create mode 100644 plenum/test/input_validation/message_validation/test_commit_message.py create mode 100644 plenum/test/input_validation/message_validation/test_consistencyproof_message.py create mode 100644 plenum/test/input_validation/message_validation/test_instanceChange_message.py create mode 100644 plenum/test/input_validation/message_validation/test_ledgerstatus_message.py create mode 100644 plenum/test/input_validation/message_validation/test_nomination_message.py create mode 100644 plenum/test/input_validation/message_validation/test_ordered_message.py create mode 100644 plenum/test/input_validation/message_validation/test_prepare_message.py create mode 100644 plenum/test/input_validation/message_validation/test_preprepare_message.py create mode 100644 plenum/test/input_validation/message_validation/test_primary_message.py create mode 100644 plenum/test/input_validation/message_validation/test_propagate_message.py create mode 100644 plenum/test/input_validation/message_validation/test_reelection_message.py create mode 100644 plenum/test/node_catchup/test_new_node_catchup2.py create mode 100644 plenum/test/node_request/test_already_processed_request.py create mode 100644 plenum/test/node_request/test_pre_prepare/test_ignore_pre_prepare_pp_seq_no_less_than_expected.py create mode 100644 plenum/test/primary_election/test_primary_election_case6.py diff --git a/README.md b/README.md index 36023244fa..1911995fcf 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Plenum Byzantine Fault Tolerant Protocol -[![Build Status](https://jenkins.evernym.com/buildStatus/icon?job=Plenum/stable)](https://jenkins.evernym.com/job/Plenum/job/stable/) +[![Build Status](https://jenkins.evernym.com/buildStatus/icon?job=Plenum/stable)](https://jenkins.evernym.com/job/Plenum/job/stable/) Plenum makes extensive use of coroutines and the async/await keywords in Python, and as such, requires Python version 3.5.0 or later. Plenum also diff --git a/plenum/common/ledger.py b/plenum/common/ledger.py index f04678d3fc..7bc4d345e0 100644 --- a/plenum/common/ledger.py +++ b/plenum/common/ledger.py @@ -55,9 +55,10 @@ def commitTxns(self, count: int) -> Tuple[Tuple[int, int], List]: numbers of the committed txns """ committedSize = self.size + committedTxns = [] for txn in self.uncommittedTxns[:count]: - self.append(txn) - committedTxns = self.uncommittedTxns[:count] + txn.update(self.append(txn)) + committedTxns.append(txn) self.uncommittedTxns = self.uncommittedTxns[count:] if not self.uncommittedTxns: self.uncommittedTree = None diff --git a/plenum/common/ledger_manager.py b/plenum/common/ledger_manager.py index e7e11be7e8..926c371f78 100644 --- a/plenum/common/ledger_manager.py +++ b/plenum/common/ledger_manager.py @@ -29,11 +29,13 @@ class LedgerManager(HasActionQueue): def __init__(self, owner, ownedByNode: bool=True, - postAllLedgersCaughtUp:Optional[Callable]=None): + postAllLedgersCaughtUp:Optional[Callable]=None, + preCatchupClbk: Callable = None): self.owner = owner self.ownedByNode = ownedByNode self.postAllLedgersCaughtUp = postAllLedgersCaughtUp + self.preCatchupClbk = preCatchupClbk self.config = getConfig() # Needs to schedule actions. The owner of the manager has the # responsibility of calling its `_serviceActions` method periodically @@ -332,7 +334,8 @@ def canProcessConsistencyProof(self, proof: ConsistencyProof) -> bool: self.owner.totalNodes, ledgerInfo.state, LedgerState.not_synced)) self.setLedgerState(ledgerId, LedgerState.not_synced) - if ledgerId == DOMAIN_LEDGER_ID: + self.preCatchupClbk(ledgerId) + if ledgerId == DOMAIN_LEDGER_ID and ledgerInfo.preCatchupStartClbk: ledgerInfo.preCatchupStartClbk() return self.canProcessConsistencyProof(proof) @@ -468,9 +471,7 @@ def _processCatchupReplies(self, ledgerId, ledger: Ledger, if result: ledgerInfo = self.getLedgerInfoByType(ledgerId) for _, txn in catchUpReplies[:toBeProcessed]: - merkleInfo = ledger.add(self._transform(txn)) - txn[F.seqNo.name] = merkleInfo[F.seqNo.name] - ledgerInfo.postTxnAddedToLedgerClbk(ledgerId, txn) + self._add_txn(ledgerId, ledger, ledgerInfo, txn) self._removePrcdCatchupReply(ledgerId, nodeName, seqNo) return numProcessed + toBeProcessed + \ self._processCatchupReplies(ledgerId, ledger, @@ -489,6 +490,11 @@ def _processCatchupReplies(self, ledgerId, ledger: Ledger, return numProcessed + toBeProcessed return numProcessed + def _add_txn(self, ledgerId, ledger: Ledger, ledgerInfo, txn): + merkleInfo = ledger.add(self._transform(txn)) + txn[F.seqNo.name] = merkleInfo[F.seqNo.name] + ledgerInfo.postTxnAddedToLedgerClbk(ledgerId, txn) + def _removePrcdCatchupReply(self, ledgerId, node, seqNo): ledgerInfo = self.getLedgerInfoByType(ledgerId) for i, rep in enumerate(ledgerInfo.recvdCatchupRepliesFrm[node]): diff --git a/plenum/common/messages/fields.py b/plenum/common/messages/fields.py index 61b197305d..bc82d75e0f 100644 --- a/plenum/common/messages/fields.py +++ b/plenum/common/messages/fields.py @@ -1,8 +1,6 @@ import ipaddress import json import base58 -import re -from datetime import datetime from plenum.common.constants import DOMAIN_LEDGER_ID, POOL_LEDGER_ID @@ -269,8 +267,9 @@ class VerkeyField(FieldBase): _b58long = Base58Field(long=True) def _specific_validation(self, val): - if len(val) == 0: - return None + vk_error = NonEmptyStringField().validate(val) + if vk_error: + return vk_error if val.startswith('~'): #short base58 return self._b58short.validate(val[1:]) @@ -318,6 +317,7 @@ class JsonField(FieldBase): _base_types = (str,) def _specific_validation(self, val): + # TODO: Need a mechanism to ensure a non-empty JSON if needed. try: json.loads(val) except json.decoder.JSONDecodeError: diff --git a/plenum/common/signer_did.py b/plenum/common/signer_did.py index 52ae5450da..1a9cc461ab 100644 --- a/plenum/common/signer_did.py +++ b/plenum/common/signer_did.py @@ -8,10 +8,13 @@ from stp_core.crypto.signer import Signer from plenum.common.signing import serializeMsg from stp_core.types import Identifier -from plenum.common.util import rawToFriendly, friendlyToRaw +from plenum.common.util import rawToFriendly, friendlyToRaw, cryptonymToHex from plenum.common.types import f + class DidIdentity: + abbr_prfx = '~' + def __init__(self, identifier, verkey=None, rawVerkey=None): assert (verkey or rawVerkey) and not (verkey and rawVerkey) if identifier: @@ -31,7 +34,7 @@ def identifier(self) -> Identifier: @property def verkey(self) -> str: if self.abbreviated: - return '~' + self._verkey + return self.abbr_prfx + self._verkey else: return self._verkey diff --git a/plenum/common/stack_manager.py b/plenum/common/stack_manager.py index 0b8560ccbd..499fc91a20 100644 --- a/plenum/common/stack_manager.py +++ b/plenum/common/stack_manager.py @@ -4,6 +4,7 @@ from collections import OrderedDict from plenum.common.keygen_utils import initRemoteKeys +from plenum.common.signer_did import DidIdentity from stp_core.types import HA from stp_core.network.exceptions import RemoteNotFound from stp_core.common.log import getlogger @@ -88,6 +89,7 @@ def parseLedgerForHaAndKeys(ledger, returnActive=True): nodeReg[nodeName] = HA(*nHa) if cHa: cliNodeReg[clientStackName] = HA(*cHa) + # TODO: Need to handle abbreviated verkey verkey = cryptonymToHex(txn[TARGET_NYM]) nodeKeys[nodeName] = verkey @@ -110,7 +112,9 @@ def parseLedgerForHaAndKeys(ledger, returnActive=True): else: return nodeReg, cliNodeReg, nodeKeys, activeValidators - def connectNewRemote(self, txn, remoteName, nodeOrClientObj, addRemote=True): + def connectNewRemote(self, txn, remoteName, nodeOrClientObj, + addRemote=True): + # TODO: Need to handle abbreviated verkey verkey = cryptonymToHex(txn[TARGET_NYM]) nodeHa = (txn[DATA][NODE_IP], txn[DATA][NODE_PORT]) diff --git a/plenum/common/util.py b/plenum/common/util.py index 1c4eceb748..9c3f2714e5 100644 --- a/plenum/common/util.py +++ b/plenum/common/util.py @@ -21,7 +21,7 @@ import base58 import libnacl.secret -from libnacl import randombytes_uniform +from libnacl import randombytes, randombytes_uniform import psutil from jsonpickle import encode, decode from six import iteritems, string_types @@ -40,27 +40,33 @@ Seconds = TypeVar("Seconds", int, float) -def randomString(size: int = 20, - chars = string.ascii_letters + string.digits) -> str: +def randomString(size: int = 20) -> str: """ - Generate a random string of the specified size + Generate a random string of the specified size, + DONOT use python provided random class its a Pseudo Random Number Generator + and not secure enough for our needs :param size: size of the random string to generate - :param chars: the set of characters to use to generate the random string. Uses alphanumerics by default. :return: the random string generated """ - if not hasattr(chars, "__getitem__"): - # choice does not work with non indexed containers - chars = list(chars) + def randomStr(size): + assert (size > 0), "Expected random string size cannot be less than 1" + #Approach 1 + rv = randombytes(size // 2).hex() + return rv if size % 2 == 0 else rv + hex(randombytes_uniform(15))[-1] - def randomChar(): - # DONOT use random.choice its as PRNG not secure enough for our needs - # return random.choice(chars) - rn = randombytes_uniform(len(chars)) - return chars[rn] + #Approach 2 this is faster than Approach 1, but lovesh had a doubt + # that part of a random may not be truely random, so until + # we have definite proof going to retain it commented + #rstr = randombytes(size).hex() + #return rstr[:size] - return ''.join(randomChar() for _ in range(size)) + return randomStr(size) + + +def randomSeed(size=32): + return randomString(size) def mostCommonElement(elements: Iterable[T]) -> T: @@ -469,11 +475,6 @@ def isMaxCheckTimeExpired(startTime, maxCheckForMillis): return startTimeRounded + maxCheckForMillis < curTimeRounded -def randomSeed(size=32): - return ''.join(random.choice(string.hexdigits) - for _ in range(size)).encode() - - def lxor(a, b): # Logical xor of 2 items, return true when one of them is truthy and # one of them falsy diff --git a/plenum/persistence/client_txn_log.py b/plenum/persistence/client_txn_log.py index 74c1f859be..a63ad09afd 100644 --- a/plenum/persistence/client_txn_log.py +++ b/plenum/persistence/client_txn_log.py @@ -44,3 +44,7 @@ def hasTxn(self, identifier, reqId) -> bool: if key == str(reqId): return True return False + + def reset(self): + self.transactionLog.reset() + diff --git a/plenum/persistence/util.py b/plenum/persistence/util.py index 28ff0b74f4..dc53d63d64 100644 --- a/plenum/persistence/util.py +++ b/plenum/persistence/util.py @@ -3,6 +3,7 @@ from copy import deepcopy from ledger.util import F +from plenum.common.util import pop_keys def txnsWithSeqNo(seqNoStart, seqNoEnd, txns): @@ -24,3 +25,7 @@ def txnsWithMerkleInfo(ledger, committedTxns): mi = ledger.merkleInfo(txn.get(F.seqNo.name)) txn.update(mi) return committedTxns + + +def pop_merkle_info(txn): + pop_keys(txn, lambda k: k in (F.auditPath.name, F.rootHash.name)) \ No newline at end of file diff --git a/plenum/server/monitor.py b/plenum/server/monitor.py index bde5bd97b8..aba0840084 100644 --- a/plenum/server/monitor.py +++ b/plenum/server/monitor.py @@ -128,6 +128,10 @@ def __init__(self, name: str, Delta: float, Lambda: float, Omega: float, if 'disable_view_change' in config.unsafe: self.isMasterDegraded = lambda: False + if 'disable_monitor' in config.unsafe: + self.requestOrdered = lambda *args, **kwargs: {} + self.sendPeriodicStats = lambda: None + self.checkPerformance = lambda: None def __repr__(self): return self.name diff --git a/plenum/server/node.py b/plenum/server/node.py index 328eddf707..30ac4033da 100644 --- a/plenum/server/node.py +++ b/plenum/server/node.py @@ -479,13 +479,13 @@ def getHashStore(self, name) -> HashStore: def getLedgerManager(self) -> LedgerManager: return LedgerManager(self, ownedByNode=True, - postAllLedgersCaughtUp=self.allLedgersCaughtUp) + postAllLedgersCaughtUp=self.allLedgersCaughtUp, + preCatchupClbk=self.preLedgerCatchUp) def init_ledger_manager(self): # TODO: this and tons of akin stuff should be exterminated self.ledgerManager.addLedger(DOMAIN_LEDGER_ID, self.domainLedger, - preCatchupStartClbk=self.preDomainLedgerCatchUp, postCatchupCompleteClbk=self.postDomainLedgerCaughtUp, postTxnAddedToLedgerClbk=self.postTxnFromCatchupAddedToLedger) self.on_new_ledger_added(DOMAIN_LEDGER_ID) @@ -1406,12 +1406,6 @@ def start_domain_ledger_sync(self): self.sendDomainLedgerStatus(nm) self.ledgerManager.processStashedLedgerStatuses(DOMAIN_LEDGER_ID) - def preDomainLedgerCatchUp(self): - """ - Ledger got out of sync. Setting node's state accordingly - :return: - """ - self.mode = Mode.syncing def postDomainLedgerCaughtUp(self, **kwargs): """ @@ -1421,6 +1415,14 @@ def postDomainLedgerCaughtUp(self, **kwargs): """ pass + def preLedgerCatchUp(self, ledger_id): + # make the node Syncing + self.mode = Mode.syncing + + # revert uncommitted txns and state for unordered requests + self.replicas[0].revert_unordered_batches(ledger_id) + + def postTxnFromCatchupAddedToLedger(self, ledgerId: int, txn: Any): self.reqsFromCatchupReplies.add((txn.get(f.IDENTIFIER.nm), txn.get(f.REQ_ID.nm))) @@ -1900,7 +1902,7 @@ def verifySignature(self, msg): req = msg.as_dict identifier = self.authNr(req).authenticate(req) - logger.display("{} authenticated {} signature on {} request {}". + logger.info("{} authenticated {} signature on {} request {}". format(self, identifier, typ, req['reqId']), extra={"cli": True, "tags": ["node-msg-processing"]}) @@ -1932,7 +1934,7 @@ def executeBatch(self, ppSeqNo: int, ppTime: float, reqs: List[Request], if committedTxns: lastTxnSeqNo = committedTxns[-1][F.seqNo.name] self.batchToSeqNos[ppSeqNo] = (ledgerId, lastTxnSeqNo) - logger.debug('{} storing ppSeqno {} for ledger {} seqNo {}'. + logger.display('{} storing ppSeqno {} for ledger {} seqNo {}'. format(self, ppSeqNo, ledgerId, lastTxnSeqNo)) if len(self.batchToSeqNos) > self.config.ProcessedBatchMapsToKeep: x = self.batchToSeqNos.popitem(last=False) @@ -1947,8 +1949,6 @@ def commitAndSendReplies(self, reqHandler, ppTime, reqs: List[Request], stateRoot, txnRoot) -> List: committedTxns = reqHandler.commit(len(reqs), stateRoot, txnRoot) self.updateSeqNoMap(committedTxns) - committedTxns = txnsWithMerkleInfo(reqHandler.ledger, - committedTxns) self.sendRepliesToClients( map(self.update_txn_with_extra_data, committedTxns), ppTime) @@ -1980,7 +1980,7 @@ def onBatchCreated(self, ledgerId, stateRoot): logger.debug('{} did not know how to handle for ledger {}'. format(self, ledgerId)) - def onBatchRejected(self, ledgerId, stateRoot=None): + def onBatchRejected(self, ledgerId): """ A batch of requests has been rejected, if stateRoot is None, reject the current batch. @@ -1990,9 +1990,9 @@ def onBatchRejected(self, ledgerId, stateRoot=None): """ if ledgerId == POOL_LEDGER_ID: if isinstance(self.poolManager, TxnPoolManager): - self.poolManager.reqHandler.onBatchRejected(stateRoot) + self.poolManager.reqHandler.onBatchRejected() elif ledgerId == DOMAIN_LEDGER_ID: - self.reqHandler.onBatchRejected(stateRoot) + self.reqHandler.onBatchRejected() else: logger.debug('{} did not know how to handle for ledger {}'. format(self, ledgerId)) @@ -2230,12 +2230,10 @@ def getReplyFromLedger(self, ledger, request): seqNo = self.seqNoDB.get(request.identifier, request.reqId) if seqNo: txn = ledger.getBySeqNo(int(seqNo)) - else: - txn = ledger.get(identifier=request.identifier, reqId=request.reqId) - if txn: - txn.update(ledger.merkleInfo(txn.get(F.seqNo.name))) - txn = self.update_txn_with_extra_data(txn) - return Reply(txn) + if txn: + txn.update(ledger.merkleInfo(txn.get(F.seqNo.name))) + txn = self.update_txn_with_extra_data(txn) + return Reply(txn) def update_txn_with_extra_data(self, txn): """ diff --git a/plenum/server/pool_manager.py b/plenum/server/pool_manager.py index 92ca296889..6647ef35f6 100644 --- a/plenum/server/pool_manager.py +++ b/plenum/server/pool_manager.py @@ -17,7 +17,7 @@ from plenum.common.stack_manager import TxnStackManager from plenum.common.types import NodeDetail from plenum.persistence.storage import initKeyValueStorage -from plenum.persistence.util import txnsWithMerkleInfo +from plenum.persistence.util import txnsWithMerkleInfo, pop_merkle_info from plenum.server.pool_req_handler import PoolRequestHandler from plenum.server.suspicion_codes import Suspicions from state.pruning_state import PruningState @@ -152,8 +152,11 @@ def executePoolTxnBatch(self, ppTime, reqs, stateRoot, txnRoot) -> List: committedTxns = self.reqHandler.commit(len(reqs), stateRoot, txnRoot) self.node.updateSeqNoMap(committedTxns) for txn in committedTxns: - self.onPoolMembershipChange(deepcopy(txn)) - committedTxns = txnsWithMerkleInfo(self.reqHandler.ledger, committedTxns) + t = deepcopy(txn) + # Since the committed transactions contain merkle info, + # try to avoid this kind of strictness + pop_merkle_info(t) + self.onPoolMembershipChange(t) self.node.sendRepliesToClients(committedTxns, ppTime) return committedTxns diff --git a/plenum/server/primary_elector.py b/plenum/server/primary_elector.py index 8c7f41442b..03074fd5e2 100644 --- a/plenum/server/primary_elector.py +++ b/plenum/server/primary_elector.py @@ -360,62 +360,7 @@ def processPrimary(self, prim: Primary, sender: str) -> None: self.primaryDeclarations[instId][sndrRep] = (prim.name, prim.ordSeqNo) - # If got more than 2f+1 primary declarations then in a position to - # decide whether it is the primary or not `2f + 1` declarations - # are enough because even when all the `f` malicious nodes declare - # a primary, we still have f+1 primary declarations from - # non-malicious nodes. One more assumption is that all the non - # malicious nodes vote for the the same primary - - # Find for which node there are maximum primary declarations. - # Cant be a tie among 2 nodes since all the non malicious nodes - # which would be greater than or equal to f+1 would vote for the - # same node - - if replica.isPrimary is not None: - logger.debug( - "{} Primary already selected; ignoring PRIMARY msg".format( - replica)) - return - - if self.hasPrimaryQuorum(instId): - if replica.isPrimary is None: - primary, seqNo = mostCommonElement( - self.primaryDeclarations[instId].values()) - logger.display("{} selected primary {} for instance {} " - "(view {})".format(replica, primary, - instId, self.viewNo), - extra={"cli": "ANNOUNCE", - "tags": ["node-election"]}) - logger.debug("{} selected primary on the basis of {}". - format(replica, - self.primaryDeclarations[instId]), - extra={"cli": False}) - - # If the maximum primary declarations are for this node - # then make it primary - replica.primaryChanged(primary, seqNo) - - if instId == 0: - self.previous_master_primary = None - - # If this replica has nominated itself and since the - # election is over, reset the flag - if self.replicaNominatedForItself == instId: - self.replicaNominatedForItself = None - - self.node.primary_found() - - self.scheduleElection() - else: - self.discard(prim, - "it already decided primary which is {}". - format(replica.primaryName), - logger.debug) - else: - logger.debug( - "{} received {} but does it not have primary quorum " - "yet".format(self.name, prim)) + self.select_primary(instId, prim) else: self.discard(prim, "already got primary declaration from {}". @@ -429,6 +374,66 @@ def processPrimary(self, prim: Primary, sender: str) -> None: # self.send(BlacklistMsg( # Suspicions.DUPLICATE_PRI_SENT.code, sender)) + def select_primary(self, inst_id: int, prim: Primary): + # If got more than 2f+1 primary declarations then in a position to + # decide whether it is the primary or not `2f + 1` declarations + # are enough because even when all the `f` malicious nodes declare + # a primary, we still have f+1 primary declarations from + # non-malicious nodes. One more assumption is that all the non + # malicious nodes vote for the the same primary + + # Find for which node there are maximum primary declarations. + # Cant be a tie among 2 nodes since all the non malicious nodes + # which would be greater than or equal to f+1 would vote for the + # same node + + replica = self.replicas[inst_id] + + if replica.isPrimary is not None: + logger.debug( + "{} Primary already selected; ignoring PRIMARY msg".format( + replica)) + return + + if self.hasPrimaryQuorum(inst_id): + if replica.isPrimary is None: + primary, seqNo = mostCommonElement( + self.primaryDeclarations[inst_id].values()) + logger.display("{} selected primary {} for instance {} " + "(view {})".format(replica, primary, + inst_id, self.viewNo), + extra={"cli": "ANNOUNCE", + "tags": ["node-election"]}) + logger.debug("{} selected primary on the basis of {}". + format(replica, + self.primaryDeclarations[inst_id]), + extra={"cli": False}) + + # If the maximum primary declarations are for this node + # then make it primary + replica.primaryChanged(primary, seqNo) + + if inst_id == 0: + self.previous_master_primary = None + + # If this replica has nominated itself and since the + # election is over, reset the flag + if self.replicaNominatedForItself == inst_id: + self.replicaNominatedForItself = None + + self.node.primary_found() + + self.scheduleElection() + else: + self.discard(prim, + "it already decided primary which is {}". + format(replica.primaryName), + logger.debug) + else: + logger.debug( + "{} received {} but does it not have primary quorum " + "yet".format(self.name, prim)) + def processReelection(self, reelection: Reelection, sender: str): """ Process reelection requests sent by other nodes. @@ -660,8 +665,10 @@ def sendPrimary(self, instId: int, primaryName: str, logger.debug("{} declaring primary as: {} on the basis of {}". format(replica, primaryName, self.nominations[instId])) - self.send(Primary(primaryName, instId, self.viewNo, - lastOrderedSeqNo)) + prim = Primary(primaryName, instId, self.viewNo, + lastOrderedSeqNo) + self.send(prim) + self.select_primary(instId, prim) def sendReelection(self, instId: int, primaryCandidates: Sequence[str] = None) -> None: diff --git a/plenum/server/propagator.py b/plenum/server/propagator.py index c14d22c6b8..f906b7a48c 100644 --- a/plenum/server/propagator.py +++ b/plenum/server/propagator.py @@ -135,7 +135,7 @@ def propagate(self, request: Request, clientName): # catchup process. QUESTION: WHY? if self.isParticipating: propagate = self.createPropagate(request, clientName) - logger.display("{} propagating {} request {} from client {}". + logger.info("{} propagating {} request {} from client {}". format(self, request.identifier, request.reqId, clientName), extra={"cli": True, "tags": ["node-propagate"]}) diff --git a/plenum/server/replica.py b/plenum/server/replica.py index f8570c9be8..8d6d2980ee 100644 --- a/plenum/server/replica.py +++ b/plenum/server/replica.py @@ -555,7 +555,7 @@ def create3PCBatch(self, ledger_id): self.stateRootHash(ledger_id), self.txnRootHash(ledger_id) ) - logger.debug('{} created a PRE-PREPARE with {} requests for ledger {}' + logger.display('{} created a PRE-PREPARE with {} requests for ledger {}' .format(self, len(validReqs), ledger_id)) self.lastPrePrepareSeqNo = ppSeqNo if self.isMaster: @@ -806,7 +806,15 @@ def nonFinalisedReqs(self, reqKeys: List[Tuple[str, int]]): """ return {key for key in reqKeys if not self.requests.isFinalised(key)} - def isNextPrePrepare(self, ppSeqNo: int): + def __is_next_pre_prepare(self, ppSeqNo: int): + if ppSeqNo != self.__last_pp_seq_no + 1: + logger.debug('{} missing PRE-PREPAREs between {} and {}'. + format(self, ppSeqNo, self.__last_pp_seq_no)) + return False + return True + + @property + def __last_pp_seq_no(self): lastPp = self.lastPrePrepare if lastPp: # TODO: Is it possible that lastPp.ppSeqNo is less than @@ -817,12 +825,7 @@ def isNextPrePrepare(self, ppSeqNo: int): else self.lastOrderedPPSeqNo else: lastPpSeqNo = self.lastOrderedPPSeqNo - - if ppSeqNo - lastPpSeqNo != 1: - logger.debug('{} missing PRE-PREPAREs between {} and {}'. - format(self, ppSeqNo, lastPpSeqNo)) - return False - return True + return lastPpSeqNo def revert(self, ledgerId, stateRootHash, reqCount): ledger = self.node.getLedger(ledgerId) @@ -834,7 +837,7 @@ def revert(self, ledgerId, stateRootHash, reqCount): ledger.discardTxns(reqCount) self.node.onBatchRejected(ledgerId) - def validatePrePrepare(self, pp: PrePrepare, sender: str): + def validate_pre_prepare(self, pp: PrePrepare, sender: str): """ This will apply the requests part of the PrePrepare to the ledger and state. It will not commit though (the ledger on disk will not @@ -912,18 +915,19 @@ def canProcessPrePrepare(self, pp: PrePrepare, sender: str) -> bool: # do not make change to state or ledger return True - nonFinReqs = self.nonFinalisedReqs(pp.reqIdr) + if pp.ppSeqNo <= self.__last_pp_seq_no: + return False # ignore old pre-prepare - if nonFinReqs: - self.enqueuePrePrepare(pp, sender, nonFinReqs) - return False + non_fin_reqs = self.nonFinalisedReqs(pp.reqIdr) - if not self.isNextPrePrepare(pp.ppSeqNo): - self.enqueuePrePrepare(pp, sender) - return False + non_next_upstream_pp = pp.ppSeqNo > self.__last_pp_seq_no and \ + not self.__is_next_pre_prepare(pp.ppSeqNo) - self.validatePrePrepare(pp, sender) + if non_fin_reqs or non_next_upstream_pp: + self.enqueue_pre_prepare(pp, sender, non_fin_reqs) + return False + self.validate_pre_prepare(pp, sender) return True def addToPrePrepares(self, pp: PrePrepare) -> None: @@ -1224,7 +1228,7 @@ def isLowestCommitInView(self, commit): def doOrder(self, commit: Commit): key = (commit.viewNo, commit.ppSeqNo) - logger.debug("{} ordering COMMIT{}".format(self, key)) + logger.info("{} ordering COMMIT{}".format(self, key)) pp = self.getPrePrepare(*key) assert pp self.addToOrdered(*key) @@ -1391,6 +1395,7 @@ def gc(self, tillSeqNo): self.prePrepares.pop(k, None) self.prepares.pop(k, None) self.commits.pop(k, None) + self.batches.pop(k[1], None) for k in reqKeys: self.requests[k].forwardedTo -= 1 @@ -1465,8 +1470,8 @@ def addToOrdered(self, viewNo: int, ppSeqNo: int): if ppSeqNo > self.lastOrderedPPSeqNo: self.lastOrderedPPSeqNo = ppSeqNo - def enqueuePrePrepare(self, ppMsg: PrePrepare, sender: str, - nonFinReqs: Set=None): + def enqueue_pre_prepare(self, ppMsg: PrePrepare, sender: str, + nonFinReqs: Set=None): if nonFinReqs: logger.debug("Queueing pre-prepares due to unavailability of finalised " "requests. PrePrepare {} from {}".format(ppMsg, sender)) @@ -1504,7 +1509,7 @@ def dequeuePrePrepares(self): self.prePreparesPendingPrevPP[pp.viewNo, pp.ppSeqNo] = (pp, sender) r = 0 - while self.prePreparesPendingPrevPP and self.isNextPrePrepare( + while self.prePreparesPendingPrevPP and self.__is_next_pre_prepare( self.prePreparesPendingPrevPP.iloc[0][1]): _, (pp, sender) = self.prePreparesPendingPrevPP.popitem(last=False) if not self.can_pp_seq_no_be_in_view(pp.viewNo, pp.ppSeqNo): @@ -1622,34 +1627,36 @@ def send(self, msg, stat=None) -> None: :param rid: remote id of one recipient (sends to all recipients if None) :param msg: the message to send """ - logger.display("{} sending {}".format(self, msg.__class__.__name__), + logger.info("{} sending {}".format(self, msg.__class__.__name__), extra={"cli": True, "tags": ['sending']}) logger.trace("{} sending {}".format(self, msg)) if stat: self.stats.inc(stat) self.outBox.append(msg) + def revert_unordered_batches(self, ledger_id): + for key in sorted(self.batches.keys(), reverse=True): + if key > self.lastOrderedPPSeqNo: + count, _, prevStateRoot = self.batches.pop(key) + self.revert(ledger_id, prevStateRoot, count) + else: + break + def caught_up_till_pp_seq_no(self, last_caught_up_pp_seq_no): self.addToOrdered(self.viewNo, last_caught_up_pp_seq_no) - # self._remove_till_caught_up_pp_seq_no(last_caught_up_pp_seq_no) + self._remove_till_caught_up_pp_seq_no(last_caught_up_pp_seq_no) def _remove_till_caught_up_pp_seq_no(self, last_caught_up_pp_seq_no): outdated_pre_prepares = set() + outdated_ledger_ids = set() for key, pp in self.prePrepares.items(): if (key[1] <= last_caught_up_pp_seq_no): - outdated_pre_prepares.add((pp.viewNo, pp.ppSeqNo, pp.ledgerId)) + outdated_pre_prepares.add((pp.viewNo, pp.ppSeqNo)) + outdated_ledger_ids.add(pp.ledgerId) self.prePrepares.pop(key, None) self.ordered.add((pp.viewNo, pp.ppSeqNo)) for key in sorted(list(outdated_pre_prepares), key=itemgetter(1), reverse=True): - count, _, prevStateRoot = self.batches[key[1]] - self.batches.pop(key[1]) + self.batches.pop(key[1], None) self.sentPrePrepares.pop(key, None) - self.prepares.pop(key, None) - - ledger_id = key[2] - ledger = self.node.getLedger(ledger_id) - ledger.discardTxns(len(ledger.uncommittedTxns)) - - state = self.node.getState(ledger_id) - state.revertToHead(state.committedHeadHash) + self.prepares.pop(key, None) \ No newline at end of file diff --git a/plenum/server/req_handler.py b/plenum/server/req_handler.py index 74dd8c28e5..473471e208 100644 --- a/plenum/server/req_handler.py +++ b/plenum/server/req_handler.py @@ -64,5 +64,5 @@ def commit(self, txnCount, stateRoot, txnRoot) -> List: def onBatchCreated(self, stateRoot): pass - def onBatchRejected(self, stateRoot=None): + def onBatchRejected(self): pass diff --git a/plenum/test/batching_3pc/catch-up/__init__.py b/plenum/test/batching_3pc/catch-up/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/plenum/test/batching_3pc/catch-up/test_3pc_paused_during_catch_up.py b/plenum/test/batching_3pc/catch-up/test_3pc_paused_during_catch_up.py new file mode 100644 index 0000000000..dedc6022f5 --- /dev/null +++ b/plenum/test/batching_3pc/catch-up/test_3pc_paused_during_catch_up.py @@ -0,0 +1,20 @@ +from plenum.common.types import Prepare +from plenum.test.batching_3pc.helper import make_node_syncing, fail_on_execute_batch_on_master +from plenum.test.helper import sendRandomRequests +from plenum.test.test_node import getNonPrimaryReplicas + + +def test_no_ordering_during_syncup(tconf, looper, txnPoolNodeSet, client, wallet1): + non_primary_replica = getNonPrimaryReplicas(txnPoolNodeSet, instId=0)[0] + + # Put non-primary Node to syncing state once first Prepare is recieved + make_node_syncing( + non_primary_replica, + Prepare) + + # Patch non-primary Node to fail if Order is executed + fail_on_execute_batch_on_master(non_primary_replica.node) + + # Send requests. The non-primary Node should not fail since no ordering is called while syncing + sendRandomRequests(wallet1, client, tconf.Max3PCBatchSize) + looper.runFor(5) diff --git a/plenum/test/batching_3pc/catch-up/test_catchup_during_3pc.py b/plenum/test/batching_3pc/catch-up/test_catchup_during_3pc.py new file mode 100644 index 0000000000..2f0f94ed9e --- /dev/null +++ b/plenum/test/batching_3pc/catch-up/test_catchup_during_3pc.py @@ -0,0 +1,30 @@ +import pytest +from plenum.test.batching_3pc.helper import send_and_check, \ + add_txns_to_ledger_before_order +from plenum.test.helper import signed_random_requests +from plenum.test.test_node import getNonPrimaryReplicas + +@pytest.fixture(scope="module") +def tconf(tconf, request): + oldSize = tconf.Max3PCBatchSize + oldTimeout = tconf.Max3PCBatchWait + tconf.Max3PCBatchSize = 10 + tconf.Max3PCBatchWait = 1 + + def reset(): + tconf.Max3PCBatchSize = oldSize + tconf.Max3PCBatchWait = oldTimeout + + request.addfinalizer(reset) + return tconf + +def test_catchup_during_3pc(tconf, looper, txnPoolNodeSet, client, + wallet1): + reqs = signed_random_requests(wallet1, tconf.Max3PCBatchSize) + non_primary_replica = getNonPrimaryReplicas(txnPoolNodeSet, instId=0)[0] + + # Simulate catch-up (add txns to ledger): + # add txns corresponding to the requests after we got enough COMMITs to order, but before ordering. + add_txns_to_ledger_before_order(non_primary_replica, reqs[:tconf.Max3PCBatchSize]) + + send_and_check(reqs, looper, txnPoolNodeSet, client) diff --git a/plenum/test/batching_3pc/catch-up/test_catchup_during_3pc_continue_working.py b/plenum/test/batching_3pc/catch-up/test_catchup_during_3pc_continue_working.py new file mode 100644 index 0000000000..d2986c78a4 --- /dev/null +++ b/plenum/test/batching_3pc/catch-up/test_catchup_during_3pc_continue_working.py @@ -0,0 +1,36 @@ +import pytest +from plenum.test.batching_3pc.helper import send_and_check, \ + add_txns_to_ledger_before_order +from plenum.test.helper import signed_random_requests +from plenum.test.test_node import getNonPrimaryReplicas + +@pytest.fixture(scope="module") +def tconf(tconf, request): + oldSize = tconf.Max3PCBatchSize + oldTimeout = tconf.Max3PCBatchWait + tconf.Max3PCBatchSize = 10 + tconf.Max3PCBatchWait = 1 + + def reset(): + tconf.Max3PCBatchSize = oldSize + tconf.Max3PCBatchWait = oldTimeout + + request.addfinalizer(reset) + return tconf + +def test_catchup_during_3pc_continue_sending(tconf, looper, txnPoolNodeSet, client, + wallet1): + reqs = signed_random_requests(wallet1, tconf.Max3PCBatchSize + 2) + non_primary_replica = getNonPrimaryReplicas(txnPoolNodeSet, instId=0)[0] + + # Simulate catch-up (add txns to ledger): + # add txns corresponding to the requests after we got enough COMMITs to order, but before ordering. + add_txns_to_ledger_before_order(non_primary_replica, reqs[:tconf.Max3PCBatchSize]) + + send_and_check(reqs, looper, txnPoolNodeSet, client) + + # send another requests and check that they are received + reqs = signed_random_requests(wallet1, 2 * tconf.Max3PCBatchSize - 2) + send_and_check(reqs, looper, txnPoolNodeSet, client) + + diff --git a/plenum/test/batching_3pc/catch-up/test_state_reverted_before_catchup.py b/plenum/test/batching_3pc/catch-up/test_state_reverted_before_catchup.py new file mode 100644 index 0000000000..b8c1fc8a00 --- /dev/null +++ b/plenum/test/batching_3pc/catch-up/test_state_reverted_before_catchup.py @@ -0,0 +1,70 @@ +from plenum.common.constants import DOMAIN_LEDGER_ID +from plenum.test.batching_3pc.helper import send_and_check +from plenum.test.delayers import cDelay +from plenum.test.helper import signed_random_requests, sendRandomRequests, waitForSufficientRepliesForRequests +from plenum.test.test_node import getNonPrimaryReplicas + + +def test_unordered_state_reverted_before_catchup(tconf, looper, txnPoolNodeSet, client, + wallet1): + """ + Check that unordered state is reverted before starting catchup: + - save the initial state on a node + - slow down processing of COMMITs + - send requests + - wait until other nodes come to consensus + - call start of catch-up + - check that the state of the slow node is reverted and equal to the initial one. + """ + # CONFIG + + ledger_id = DOMAIN_LEDGER_ID + non_primary_node = getNonPrimaryReplicas(txnPoolNodeSet, instId=0)[0].node + non_primary_ledger = non_primary_node.getLedger(ledger_id) + non_primary_state = non_primary_node.getState(ledger_id) + + # send reqs and make sure we are at the same state + reqs = signed_random_requests(wallet1, 10) + send_and_check(reqs, looper, txnPoolNodeSet, client) + + # the state of the node before + committed_ledger_before = non_primary_ledger.tree.root_hash + uncommitted_ledger_before = non_primary_ledger.uncommittedRootHash + committed_state_before = non_primary_state.committedHeadHash + uncommitted_state_before = non_primary_state.headHash + + # EXECUTE + + # Delay commit requests on the node + delay_c = 60 + non_primary_node.nodeIbStasher.delay(cDelay(delay_c)) + + # send requests + reqs = sendRandomRequests(wallet1, client, tconf.Max3PCBatchSize) + waitForSufficientRepliesForRequests(looper, client, requests=reqs, total_timeout=40) + + committed_ledger_during_3pc = non_primary_node.getLedger(ledger_id).tree.root_hash + uncommitted_ledger_during_3pc = non_primary_node.getLedger(ledger_id).uncommittedRootHash + committed_state_during_3pc = non_primary_node.getState(ledger_id).committedHeadHash + uncommitted_state_during_3pc = non_primary_node.getState(ledger_id).headHash + + # start catchup + non_primary_node.ledgerManager.preCatchupClbk(ledger_id) + + committed_ledger_reverted = non_primary_ledger.tree.root_hash + uncommitted_ledger_reverted = non_primary_ledger.uncommittedRootHash + committed_state_reverted = non_primary_state.committedHeadHash + uncommitted_state_reverted = non_primary_state.headHash + + # CHECK + + # check that initial uncommitted state differs from the state during 3PC but committed does not + assert committed_ledger_before == committed_ledger_during_3pc + assert uncommitted_ledger_before != uncommitted_ledger_during_3pc + assert committed_state_before == committed_state_during_3pc + assert uncommitted_state_before != uncommitted_state_during_3pc + + assert committed_ledger_before == committed_ledger_reverted + assert uncommitted_ledger_before == uncommitted_ledger_reverted + assert committed_state_before == committed_state_reverted + assert uncommitted_state_before == uncommitted_state_reverted diff --git a/plenum/test/batching_3pc/helper.py b/plenum/test/batching_3pc/helper.py index b337ebfa13..45e6517ddd 100644 --- a/plenum/test/batching_3pc/helper.py +++ b/plenum/test/batching_3pc/helper.py @@ -1,18 +1,37 @@ +import types from binascii import hexlify -from stp_core.loop.eventually import eventually from plenum.common.constants import DOMAIN_LEDGER_ID -from plenum.test.helper import waitForSufficientRepliesForRequests +from plenum.common.startable import Mode +from plenum.common.txn_util import reqToTxn +from plenum.common.types import ThreePhaseType +from plenum.test.helper import waitForSufficientRepliesForRequests, send_signed_requests def checkNodesHaveSameRoots(nodes, checkUnCommitted=True, - checkCommitted=True): + checkCommitted=True, + checkLastOrderedPpSeqNo=True, + checkSeqNoDb=True): def addRoot(root, collection): if root: collection.add(hexlify(root)) else: collection.add(root) + if checkLastOrderedPpSeqNo: + ppSeqNos = set() + for node in nodes: + ppSeqNos.add(node.replicas[0].lastOrderedPPSeqNo) + + assert len(ppSeqNos) == 1 + + if checkSeqNoDb: + seqNoSizes = set() + for node in nodes: + seqNoSizes.add(node.seqNoDB.size) + + assert len(seqNoSizes) == 1 + if checkUnCommitted: stateRoots = set() txnRoots = set() @@ -35,3 +54,75 @@ def addRoot(root, collection): assert len(stateRoots) == 1 assert len(txnRoots) == 1 + + +def send_and_check(signed_reqs, looper, txnPoolNodeSet, client): + reqs = send_signed_requests(client, signed_reqs) + waitForSufficientRepliesForRequests(looper, client, requests=reqs) + checkNodesHaveSameRoots(txnPoolNodeSet) + + +def add_txns_to_ledger_before_order(replica, reqs): + added = False + origMethod = replica.tryOrder + + def tryOrderAndAddTxns(self, commit): + nonlocal added + canOrder, _ = self.canOrder(commit) + node = replica.node + if not added and canOrder: + + ledger_manager = node.ledgerManager + ledger_id = DOMAIN_LEDGER_ID + ledger = ledger_manager.ledgerRegistry[ledger_id].ledger + ledgerInfo = ledger_manager.getLedgerInfoByType(ledger_id) + + ledger_manager.preCatchupClbk(ledger_id) + for req in reqs: + ledger_manager._add_txn(ledger_id, ledger, ledgerInfo, reqToTxn(req)) + ledger_manager.catchupCompleted(DOMAIN_LEDGER_ID, commit.ppSeqNo) + + added = True + + return origMethod(commit) + + replica.tryOrder = types.MethodType(tryOrderAndAddTxns, replica) + +def start_precatchup_before_order(replica): + called = False + origMethod = replica.tryOrder + + def tryOrderAndAddTxns(self, commit): + nonlocal called + canOrder, _ = self.canOrder(commit) + + if not called and canOrder: + ledger_manager = replica.node.ledgerManager + ledger_manager.preCatchupClbk(DOMAIN_LEDGER_ID) + called = True + + return origMethod(commit) + + replica.tryOrder = types.MethodType(tryOrderAndAddTxns, replica) + +def make_node_syncing(replica, three_phase_type: ThreePhaseType): + added = False + + def specificPrePrepares(wrappedMsg): + msg, sender = wrappedMsg + nonlocal added + node = replica.node + if isinstance(msg, three_phase_type) and not added: + node.mode = Mode.syncing + added = True + return 0 + + replica.node.nodeIbStasher.delay(specificPrePrepares) + + +def fail_on_execute_batch_on_master(node): + def fail_process_ordered(self, ordered): + if ordered.instId == 0: + raise Exception('Should not process Ordered at this point') + + node.processOrdered = types.MethodType(fail_process_ordered, node) diff --git a/plenum/test/checkpoints/test_stable_checkpoint.py b/plenum/test/checkpoints/test_stable_checkpoint.py index 0b98ad76aa..c919ecbf93 100644 --- a/plenum/test/checkpoints/test_stable_checkpoint.py +++ b/plenum/test/checkpoints/test_stable_checkpoint.py @@ -4,12 +4,13 @@ from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies -def checkRequestCounts(nodes, req_count, cons_count): +def checkRequestCounts(nodes, req_count, cons_count, batches_count): for node in nodes: assert len(node.requests) == req_count for r in node.replicas: assert len(r.commits) == cons_count assert len(r.prepares) == cons_count + assert len(r.batches) == batches_count def testRequestOlderThanStableCheckpointRemoved(chkFreqPatched, looper, @@ -21,17 +22,17 @@ def testRequestOlderThanStableCheckpointRemoved(chkFreqPatched, looper, timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, retryWait=1, timeout=timeout)) - checkRequestCounts(txnPoolNodeSet, len(reqs), chkFreqPatched.CHK_FREQ-1) + checkRequestCounts(txnPoolNodeSet, len(reqs), chkFreqPatched.CHK_FREQ-1, 1) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, chkFreqPatched.Max3PCBatchSize, 1) looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, 0, retryWait=1, timeout=timeout)) - checkRequestCounts(txnPoolNodeSet, 0, 0) + checkRequestCounts(txnPoolNodeSet, 0, 0, 0) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, reqs_for_checkpoint + 1, 1) looper.run(eventually(chkChkpoints, txnPoolNodeSet, 2, 0, retryWait=1, timeout=timeout)) - checkRequestCounts(txnPoolNodeSet, 1, 1) + checkRequestCounts(txnPoolNodeSet, 1, 1, 1) diff --git a/plenum/test/common/test_random_string.py b/plenum/test/common/test_random_string.py new file mode 100644 index 0000000000..bbeba828d4 --- /dev/null +++ b/plenum/test/common/test_random_string.py @@ -0,0 +1,39 @@ +import time +from plenum.common.util import randomString + +# Checks if the function randomString() is returning correct +# length random string for various lengths +def test_random_string1(): + assert (len(randomString(3)) == 3), \ + "Function randomString(3) did not return string of len 3 characters" + assert (len(randomString(20)) == 20), \ + "Function randomString() did not return string of default len 20 characters" + assert (len(randomString(32)) == 32), \ + "Function call randomString(32) did not return string of len 32 characters" + assert (len(randomString(128)) == 128), \ + "Function randomString(128) did not return string of len 128 characters" + assert (len(randomString(12800)) == 12800), \ + "Function randomString(12800) did not return string of len 12800 characters" + + +# Checks if there is a collision of the returned random strings +# If we generate a random string with fewer number of characters collision will happen sooner +# Testing several times has shown numbers less than 5 will cause collision 100% +# times if tested for about 1000 iterations +def test_random_string2(): + test_iterations = 1000 + rss = [] + for i in range(test_iterations): + rs = randomString(20) + assert rs not in rss, "random string # %d exists in list, we have a collision" % i + rss.append(rs) + +# Test to check the performance of randomString() function +# def test_random_string3(): +# t0 = time.time() # start time +# test_iterations = 1000000 +# for i in range(test_iterations): +# rs = randomString(5) +# # the code to time goes here +# t1 = time.time() # end time +# print("The time take is: %f" % (t1 - t0)) \ No newline at end of file diff --git a/plenum/test/helper.py b/plenum/test/helper.py index 52abd82e5e..58d74a87ca 100644 --- a/plenum/test/helper.py +++ b/plenum/test/helper.py @@ -12,8 +12,6 @@ List, Any, Sequence from typing import Union -from psutil import Popen - from plenum.client.client import Client from plenum.client.wallet import Wallet from plenum.common.constants import REPLY, REQACK, REQNACK, REJECT, OP_FIELD_NAME @@ -30,6 +28,7 @@ from plenum.test.test_client import TestClient, genTestClient from plenum.test.test_node import TestNode, TestReplica, TestNodeSet, \ checkNodesConnected, ensureElectionsDone, NodeRef +from psutil import Popen from stp_core.common.log import getlogger from stp_core.loop.eventually import eventuallyAll, eventually from stp_core.loop.looper import Looper @@ -69,8 +68,8 @@ def checkSufficientRepliesReceived(receivedMsgs: Iterable, logger.debug("received replies for reqId {}: {}". format(reqId, receivedReplies)) assert len(receivedReplies) > fValue, "Received {} replies but expected " \ - "at-least {} for reqId {}".\ - format(len(receivedReplies), fValue+1, reqId) + "at-least {} for reqId {}". \ + format(len(receivedReplies), fValue + 1, reqId) result = checkIfMoreThanFSameItems([reply[f.RESULT.nm] for reply in receivedReplies], fValue) assert result @@ -83,8 +82,8 @@ def checkSufficientRepliesReceived(receivedMsgs: Iterable, def waitForSufficientRepliesForRequests(looper, client, *, # To force usage of names - requests = None, - requestIds = None, + requests=None, + requestIds=None, fVal=None, customTimeoutPerReq=None, add_delay_to_timeout: float = 0, @@ -193,8 +192,8 @@ def getPendingRequestsForReplica(replica: TestReplica, requestType: Any): def assertLength(collection: Iterable[Any], expectedLength: int): assert len( - collection) == expectedLength, "Observed length was {} but " \ - "expected length was {}".\ + collection) == expectedLength, "Observed length was {} but " \ + "expected length was {}". \ format(len(collection), expectedLength) @@ -272,14 +271,29 @@ def randomOperation(): } +def random_requests(count): + return [{ + "type": "buy", + "amount": random.randint(10, 100) + } for _ in range(count)] + + +def signed_random_requests(wallet, count): + reqs = random_requests(count) + return [wallet.signOp(req) for req in reqs] + + +def send_signed_requests(client: Client, signed_reqs: Sequence): + return client.submitReqs(*signed_reqs) + + def sendRandomRequest(wallet: Wallet, client: Client): return sendRandomRequests(wallet, client, 1)[0] def sendRandomRequests(wallet: Wallet, client: Client, count: int): - logger.debug('{} random requests will be sent'.format(count)) - reqs = [wallet.signOp(randomOperation()) for _ in range(count)] - return client.submitReqs(*reqs) + return send_signed_requests(client, + signed_random_requests(wallet, count)) def buildCompletedTxnFromReply(request, reply: Reply) -> Dict: @@ -298,7 +312,7 @@ async def msgAll(nodes: TestNodeSet): async def sendMessageAndCheckDelivery(nodes: TestNodeSet, frm: NodeRef, to: NodeRef, - msg: Optional[Tuple]=None, + msg: Optional[Tuple] = None, customTimeout=None): """ Sends message from one node to another and checks that it was delivered @@ -345,7 +359,7 @@ def checkPropagateReqCountOfNode(node: TestNode, identifier: str, reqId: int): def requestReturnedToNode(node: TestNode, identifier: str, reqId: int, - instId: int): + instId: int): params = getAllArgs(node, node.processOrdered) # Skipping the view no and time from each ordered request recvdOrderedReqs = [(p['ordered'].instId, *p['ordered'].reqIdr[0]) for p in params] @@ -380,7 +394,7 @@ def checkPrepareReqSent(replica: TestReplica, identifier: str, reqId: int, replica.canPrepare) assert [(identifier, reqId)] in \ [p["ppReq"].reqIdr and p["ppReq"].viewNo == view_no for p in paramsList] - idx = [p["ppReq"].reqIdr for p in paramsList if p["ppReq"].viewNo == view_no].index([(identifier, reqId)]) + idx = [p["ppReq"].reqIdr for p in paramsList if p["ppReq"].viewNo == view_no].index([(identifier, reqId)]) assert rv[idx] @@ -401,7 +415,7 @@ def checkSufficientCommitReqRecvd(replicas: Iterable[TestReplica], viewNo: int, assert received > minimum -def checkReqAck(client, node, idr, reqId, update: Dict[str, str]=None): +def checkReqAck(client, node, idr, reqId, update: Dict[str, str] = None): rec = {OP_FIELD_NAME: REQACK, f.REQ_ID.nm: reqId, f.IDENTIFIER.nm: idr} if update: rec.update(update) @@ -412,7 +426,7 @@ def checkReqAck(client, node, idr, reqId, update: Dict[str, str]=None): assert client.inBox.count(expected) > 0 -def checkReqNack(client, node, idr, reqId, update: Dict[str, str]=None): +def checkReqNack(client, node, idr, reqId, update: Dict[str, str] = None): rec = {OP_FIELD_NAME: REQNACK, f.REQ_ID.nm: reqId, f.IDENTIFIER.nm: idr} if update: rec.update(update) @@ -443,7 +457,7 @@ def wait_for_replies(looper, client, idr, reqId, count, custom_timeout=None): def checkReqNackWithReason(client, reason: str, sender: str): found = False for msg, sdr in client.inBox: - if msg[OP_FIELD_NAME] == REQNACK and reason in msg.get(f.REASON.nm, "")\ + if msg[OP_FIELD_NAME] == REQNACK and reason in msg.get(f.REASON.nm, "") \ and sdr == sender: found = True break @@ -467,7 +481,7 @@ def waitReqNackWithReason(looper, client, reason: str, sender: str): def checkRejectWithReason(client, reason: str, sender: str): found = False for msg, sdr in client.inBox: - if msg[OP_FIELD_NAME] == REJECT and reason in msg.get(f.REASON.nm, "")\ + if msg[OP_FIELD_NAME] == REJECT and reason in msg.get(f.REASON.nm, "") \ and sdr == sender: found = True break @@ -496,7 +510,7 @@ def waitReqNackFromPoolWithReason(looper, nodes, client, reason): def waitRejectFromPoolWithReason(looper, nodes, client, reason): for node in nodes: waitRejectWithReason(looper, client, reason, - node.clientstack.name) + node.clientstack.name) def checkViewNoForNodes(nodes: Iterable[TestNode], expectedViewNo: int = None): @@ -520,7 +534,7 @@ def checkViewNoForNodes(nodes: Iterable[TestNode], expectedViewNo: int = None): return vNo -def waitForViewChange(looper, nodeSet, expectedViewNo=None, customTimeout = None): +def waitForViewChange(looper, nodeSet, expectedViewNo=None, customTimeout=None): """ Waits for nodes to come to same view. Raises exception when time is out diff --git a/plenum/test/input_validation/helper.py b/plenum/test/input_validation/helper.py index bd44bf0e2b..e03a145b92 100644 --- a/plenum/test/input_validation/helper.py +++ b/plenum/test/input_validation/helper.py @@ -219,8 +219,10 @@ def positive_test_cases(self): class VerkeyField(NonEmptyStringField): - # TODO implement - pass + # TODO implement positive cases + @property + def negative_test_cases(self): + return ['', ] class RoleField(TestFieldBase): diff --git a/plenum/test/input_validation/message_validation/__init__.py b/plenum/test/input_validation/message_validation/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/plenum/test/input_validation/message_validation/test_commit_message.py b/plenum/test/input_validation/message_validation/test_commit_message.py new file mode 100644 index 0000000000..4631617d85 --- /dev/null +++ b/plenum/test/input_validation/message_validation/test_commit_message.py @@ -0,0 +1,26 @@ +import pytest +from plenum.common.types import Commit +from collections import OrderedDict +from plenum.common.messages.fields import NonNegativeNumberField + + +EXPECTED_ORDERED_FIELDS = OrderedDict([ + ("instId", NonNegativeNumberField), + ("viewNo", NonNegativeNumberField), + ("ppSeqNo", NonNegativeNumberField), +]) + + +def test_hash_expected_type(): + assert Commit.typename == "COMMIT" + + +def test_has_expected_fields(): + actual_field_names = OrderedDict(Commit.schema).keys() + assert actual_field_names == EXPECTED_ORDERED_FIELDS.keys() + + +def test_has_expected_validators(): + schema = dict(Commit.schema) + for field, validator in EXPECTED_ORDERED_FIELDS.items(): + assert isinstance(schema[field], validator) diff --git a/plenum/test/input_validation/message_validation/test_consistencyproof_message.py b/plenum/test/input_validation/message_validation/test_consistencyproof_message.py new file mode 100644 index 0000000000..984327ea3e --- /dev/null +++ b/plenum/test/input_validation/message_validation/test_consistencyproof_message.py @@ -0,0 +1,30 @@ +import pytest +from plenum.common.types import ConsistencyProof +from collections import OrderedDict +from plenum.common.messages.fields import NonNegativeNumberField, \ + LedgerIdField, MerkleRootField, IterableField + +EXPECTED_ORDERED_FIELDS = OrderedDict([ + ("ledgerId", LedgerIdField), + ("seqNoStart", NonNegativeNumberField), + ("seqNoEnd", NonNegativeNumberField), + ("ppSeqNo", NonNegativeNumberField), + ("oldMerkleRoot", MerkleRootField), + ("newMerkleRoot", MerkleRootField), + ("hashes", IterableField), +]) + + +def test_hash_expected_type(): + assert ConsistencyProof.typename == "CONSISTENCY_PROOF" + + +def test_has_expected_fields(): + actual_field_names = OrderedDict(ConsistencyProof.schema).keys() + assert actual_field_names == EXPECTED_ORDERED_FIELDS.keys() + + +def test_has_expected_validators(): + schema = dict(ConsistencyProof.schema) + for field, validator in EXPECTED_ORDERED_FIELDS.items(): + assert isinstance(schema[field], validator) diff --git a/plenum/test/input_validation/message_validation/test_instanceChange_message.py b/plenum/test/input_validation/message_validation/test_instanceChange_message.py new file mode 100644 index 0000000000..d9625456b3 --- /dev/null +++ b/plenum/test/input_validation/message_validation/test_instanceChange_message.py @@ -0,0 +1,25 @@ +import pytest + +from collections import OrderedDict +from plenum.common.messages.fields import NonNegativeNumberField +from plenum.common.types import InstanceChange + +EXPECTED_ORDERED_FIELDS = OrderedDict([ + ("viewNo", NonNegativeNumberField), + ("reason", NonNegativeNumberField) +]) + + +def test_hash_expected_type(): + assert InstanceChange.typename == "INSTANCE_CHANGE" + + +def test_has_expected_fields(): + actual_field_names = OrderedDict(InstanceChange.schema).keys() + assert actual_field_names == EXPECTED_ORDERED_FIELDS.keys() + + +def test_has_expected_validators(): + schema = dict(InstanceChange.schema) + for field, validator in EXPECTED_ORDERED_FIELDS.items(): + assert isinstance(schema[field], validator) diff --git a/plenum/test/input_validation/message_validation/test_ledgerstatus_message.py b/plenum/test/input_validation/message_validation/test_ledgerstatus_message.py new file mode 100644 index 0000000000..d2806fcaa4 --- /dev/null +++ b/plenum/test/input_validation/message_validation/test_ledgerstatus_message.py @@ -0,0 +1,27 @@ +import pytest + +from collections import OrderedDict +from plenum.common.messages.fields import NonNegativeNumberField, \ + LedgerIdField, MerkleRootField +from plenum.common.types import LedgerStatus + +EXPECTED_ORDERED_FIELDS = OrderedDict([ + ("ledgerId", LedgerIdField), + ("txnSeqNo", NonNegativeNumberField), + ("merkleRoot", MerkleRootField), +]) + + +def test_hash_expected_type(): + assert LedgerStatus.typename == "LEDGER_STATUS" + + +def test_has_expected_fields(): + actual_field_names = OrderedDict(LedgerStatus.schema).keys() + assert actual_field_names == EXPECTED_ORDERED_FIELDS.keys() + + +def test_has_expected_validators(): + schema = dict(LedgerStatus.schema) + for field, validator in EXPECTED_ORDERED_FIELDS.items(): + assert isinstance(schema[field], validator) diff --git a/plenum/test/input_validation/message_validation/test_nomination_message.py b/plenum/test/input_validation/message_validation/test_nomination_message.py new file mode 100644 index 0000000000..0ca3d2e12e --- /dev/null +++ b/plenum/test/input_validation/message_validation/test_nomination_message.py @@ -0,0 +1,28 @@ +import pytest + +from collections import OrderedDict +from plenum.common.messages.fields import NonNegativeNumberField, \ + NonEmptyStringField +from plenum.common.types import Nomination + +EXPECTED_ORDERED_FIELDS = OrderedDict([ + ("name", NonEmptyStringField), + ("instId", NonNegativeNumberField), + ("viewNo", NonNegativeNumberField), + ("ordSeqNo", NonNegativeNumberField), +]) + + +def test_hash_expected_type(): + assert Nomination.typename == "NOMINATE" + + +def test_has_expected_fields(): + actual_field_names = OrderedDict(Nomination.schema).keys() + assert actual_field_names == EXPECTED_ORDERED_FIELDS.keys() + + +def test_has_expected_validators(): + schema = dict(Nomination.schema) + for field, validator in EXPECTED_ORDERED_FIELDS.items(): + assert isinstance(schema[field], validator) diff --git a/plenum/test/input_validation/message_validation/test_ordered_message.py b/plenum/test/input_validation/message_validation/test_ordered_message.py new file mode 100644 index 0000000000..f62021c80e --- /dev/null +++ b/plenum/test/input_validation/message_validation/test_ordered_message.py @@ -0,0 +1,32 @@ +import pytest + +from collections import OrderedDict +from plenum.common.messages.fields import NonNegativeNumberField, \ + LedgerIdField, IterableField, TimestampField, HexField +from plenum.common.types import Ordered + +EXPECTED_ORDERED_FIELDS = OrderedDict([ + ("instId", NonNegativeNumberField), + ("viewNo", NonNegativeNumberField), + ("reqIdr", IterableField), + ("ppSeqNo", NonNegativeNumberField), + ("ppTime", TimestampField), + ("ledgerId", LedgerIdField), + ("stateRootHash", HexField), + ("txnRootHash", HexField), +]) + + +def test_hash_expected_type(): + assert Ordered.typename == "ORDERED" + + +def test_has_expected_fields(): + actual_field_names = OrderedDict(Ordered.schema).keys() + assert actual_field_names == EXPECTED_ORDERED_FIELDS.keys() + + +def test_has_expected_validators(): + schema = dict(Ordered.schema) + for field, validator in EXPECTED_ORDERED_FIELDS.items(): + assert isinstance(schema[field], validator) diff --git a/plenum/test/input_validation/message_validation/test_prepare_message.py b/plenum/test/input_validation/message_validation/test_prepare_message.py new file mode 100644 index 0000000000..5ed19ae96d --- /dev/null +++ b/plenum/test/input_validation/message_validation/test_prepare_message.py @@ -0,0 +1,31 @@ +import pytest + +from collections import OrderedDict +from plenum.common.messages.fields import NonNegativeNumberField, \ + NonEmptyStringField, \ + HexField +from plenum.common.types import Prepare + +EXPECTED_ORDERED_FIELDS = OrderedDict([ + ("instId", NonNegativeNumberField), + ("viewNo", NonNegativeNumberField), + ("ppSeqNo", NonNegativeNumberField), + ("digest", NonEmptyStringField), + ("stateRootHash", HexField), + ("txnRootHash", HexField), +]) + + +def test_hash_expected_type(): + assert Prepare.typename == "PREPARE" + + +def test_has_expected_fields(): + actual_field_names = OrderedDict(Prepare.schema).keys() + assert actual_field_names == EXPECTED_ORDERED_FIELDS.keys() + + +def test_has_expected_validators(): + schema = dict(Prepare.schema) + for field, validator in EXPECTED_ORDERED_FIELDS.items(): + assert isinstance(schema[field], validator) diff --git a/plenum/test/input_validation/message_validation/test_preprepare_message.py b/plenum/test/input_validation/message_validation/test_preprepare_message.py new file mode 100644 index 0000000000..6f86c3cb8b --- /dev/null +++ b/plenum/test/input_validation/message_validation/test_preprepare_message.py @@ -0,0 +1,35 @@ +import pytest + +from collections import OrderedDict +from plenum.common.messages.fields import NonNegativeNumberField, \ + LedgerIdField, IterableField, NonEmptyStringField, \ + TimestampField, HexField +from plenum.common.types import PrePrepare + +EXPECTED_ORDERED_FIELDS = OrderedDict([ + ("instId", NonNegativeNumberField), + ("viewNo", NonNegativeNumberField), + ("ppSeqNo", NonNegativeNumberField), + ("ppTime", TimestampField), + ("reqIdr", IterableField), + ("discarded", NonNegativeNumberField), + ("digest", NonEmptyStringField), + ("ledgerId", LedgerIdField), + ("stateRootHash", HexField), + ("txnRootHash", HexField), +]) + + +def test_hash_expected_type(): + assert PrePrepare.typename == "PREPREPARE" + + +def test_has_expected_fields(): + actual_field_names = OrderedDict(PrePrepare.schema).keys() + assert actual_field_names == EXPECTED_ORDERED_FIELDS.keys() + + +def test_has_expected_validators(): + schema = dict(PrePrepare.schema) + for field, validator in EXPECTED_ORDERED_FIELDS.items(): + assert isinstance(schema[field], validator) diff --git a/plenum/test/input_validation/message_validation/test_primary_message.py b/plenum/test/input_validation/message_validation/test_primary_message.py new file mode 100644 index 0000000000..678162bb4c --- /dev/null +++ b/plenum/test/input_validation/message_validation/test_primary_message.py @@ -0,0 +1,28 @@ +import pytest + +from collections import OrderedDict +from plenum.common.messages.fields import NonNegativeNumberField, \ + NonEmptyStringField +from plenum.common.types import Primary + +EXPECTED_ORDERED_FIELDS = OrderedDict([ + ("name", NonEmptyStringField), + ("instId", NonNegativeNumberField), + ("viewNo", NonNegativeNumberField), + ("ordSeqNo", NonNegativeNumberField), +]) + + +def test_hash_expected_type(): + assert Primary.typename == "PRIMARY" + + +def test_has_expected_fields(): + actual_field_names = OrderedDict(Primary.schema).keys() + assert actual_field_names == EXPECTED_ORDERED_FIELDS.keys() + + +def test_has_expected_validators(): + schema = dict(Primary.schema) + for field, validator in EXPECTED_ORDERED_FIELDS.items(): + assert isinstance(schema[field], validator) diff --git a/plenum/test/input_validation/message_validation/test_propagate_message.py b/plenum/test/input_validation/message_validation/test_propagate_message.py new file mode 100644 index 0000000000..ebe578edd2 --- /dev/null +++ b/plenum/test/input_validation/message_validation/test_propagate_message.py @@ -0,0 +1,25 @@ +import pytest + +from collections import OrderedDict +from plenum.common.messages.fields import NonEmptyStringField +from plenum.common.types import Propagate, ClientMessageValidator + +EXPECTED_ORDERED_FIELDS = OrderedDict([ + ("request", ClientMessageValidator), + ("senderClient", NonEmptyStringField), +]) + + +def test_hash_expected_type(): + assert Propagate.typename == "PROPAGATE" + + +def test_has_expected_fields(): + actual_field_names = OrderedDict(Propagate.schema).keys() + assert actual_field_names == EXPECTED_ORDERED_FIELDS.keys() + + +def test_has_expected_validators(): + schema = dict(Propagate.schema) + for field, validator in EXPECTED_ORDERED_FIELDS.items(): + assert isinstance(schema[field], validator) diff --git a/plenum/test/input_validation/message_validation/test_reelection_message.py b/plenum/test/input_validation/message_validation/test_reelection_message.py new file mode 100644 index 0000000000..d4b3231d3a --- /dev/null +++ b/plenum/test/input_validation/message_validation/test_reelection_message.py @@ -0,0 +1,28 @@ +import pytest + +from collections import OrderedDict +from plenum.common.messages.fields import NonNegativeNumberField, \ + IterableField +from plenum.common.types import Reelection + +EXPECTED_ORDERED_FIELDS = OrderedDict([ + ("instId", NonNegativeNumberField), + ("round", NonNegativeNumberField), + ("tieAmong", IterableField), + ("viewNo", NonNegativeNumberField), +]) + + +def test_hash_expected_type(): + assert Reelection.typename == "REELECTION" + + +def test_has_expected_fields(): + actual_field_names = OrderedDict(Reelection.schema).keys() + assert actual_field_names == EXPECTED_ORDERED_FIELDS.keys() + + +def test_has_expected_validators(): + schema = dict(Reelection.schema) + for field, validator in EXPECTED_ORDERED_FIELDS.items(): + assert isinstance(schema[field], validator) diff --git a/plenum/test/instances/test_instance_cannot_become_active_with_less_than_four_servers.py b/plenum/test/instances/test_instance_cannot_become_active_with_less_than_four_servers.py index cf7bbb840b..a5b06d7814 100644 --- a/plenum/test/instances/test_instance_cannot_become_active_with_less_than_four_servers.py +++ b/plenum/test/instances/test_instance_cannot_become_active_with_less_than_four_servers.py @@ -17,6 +17,9 @@ logger = getlogger() +@pytest.fixture(scope="function", autouse=True) +def limitTestRunningTime(): + return 200 # noinspection PyIncorrectDocstring def testProtocolInstanceCannotBecomeActiveWithLessThanFourServers( diff --git a/plenum/test/node_catchup/test_new_node_catchup.py b/plenum/test/node_catchup/test_new_node_catchup.py index 00f930b46e..ed0cf20a7d 100644 --- a/plenum/test/node_catchup/test_new_node_catchup.py +++ b/plenum/test/node_catchup/test_new_node_catchup.py @@ -154,21 +154,3 @@ def send_and_chk(ledger_state): # cons_proof = updateNamedTuple(cons_proof, seqNoEnd=cons_proof.seqNoStart, # seqNoStart=cons_proof.seqNoEnd) # send_and_chk(LedgerState.synced) - - -def testNodeDoesNotParticipateUntilCaughtUp(txnPoolNodeSet, - nodeSetWithNodeAddedAfterSomeTxns): - """ - A new node that joins after some transactions should stash new transactions - until it has caught up - :return: - """ - looper, newNode, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns - sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5) - - for node in txnPoolNodeSet[:4]: - for replica in node.replicas: - for commit in replica.commits.values(): - assert newNode.name not in commit.voters - for prepare in replica.prepares.values(): - assert newNode.name not in prepare.voters diff --git a/plenum/test/node_catchup/test_new_node_catchup2.py b/plenum/test/node_catchup/test_new_node_catchup2.py new file mode 100644 index 0000000000..4d31a4db61 --- /dev/null +++ b/plenum/test/node_catchup/test_new_node_catchup2.py @@ -0,0 +1,80 @@ +from itertools import chain +from time import perf_counter + +import pytest + +from plenum.common.constants import DOMAIN_LEDGER_ID, LedgerState +from plenum.common.util import updateNamedTuple +from plenum.test.delayers import cqDelay, cr_delay +from plenum.test.test_node import ensureElectionsDone +from stp_zmq.zstack import KITZStack + +from stp_core.common.log import getlogger +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies +from plenum.test.node_catchup.helper import waitNodeDataEquality, \ + check_ledger_state + +# Do not remove the next import +from plenum.test.node_catchup.conftest import whitelist + +logger = getlogger() +txnCount = 5 +catchup_delay = 20 + + +@pytest.fixture(scope="module") +def nodes_slow_to_process_catchup_reqs(txnPoolNodeSet): + """ + This will make the new node slow to complete the catchup and hence will + not send any 3PC messages till catchup is complete + """ + for node in txnPoolNodeSet: + node.nodeIbStasher.delay(cqDelay(catchup_delay)) + + +def testNodeDoesNotParticipateUntilCaughtUp(txnPoolNodeSet, + nodes_slow_to_process_catchup_reqs, + nodeCreatedAfterSomeTxns): + """ + A new node that joins after some transactions should stash new transactions + until it has caught up + :return: + """ + + looper, new_node, client, wallet, newStewardClient, newStewardWallet = \ + nodeCreatedAfterSomeTxns + txnPoolNodeSet.append(new_node) + old_nodes = txnPoolNodeSet[:-1] + ensureElectionsDone(looper, txnPoolNodeSet) + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5) + new_node_replica_names = {r.instId: r.name for r in new_node.replicas} + + def chk_commits_prepares_recvd(count): + counts = {} + for node in old_nodes: + for replica in node.replicas: + if replica.instId not in counts: + counts[replica.instId] = 0 + nm = new_node_replica_names[replica.instId] + for commit in replica.commits.values(): + counts[replica.instId] += int(nm in commit.voters) + for prepare in replica.prepares.values(): + counts[replica.instId] += int(nm in prepare.voters) + for c in counts.values(): + assert count == c + + chk_commits_prepares_recvd(0) + + for node in old_nodes: + node.resetDelays() + node.force_process_delayeds() + + waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) + + looper.runFor(20) + + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 2) + + # Commits and Prepares are received by all old nodes + chk_commits_prepares_recvd(2 * (len(old_nodes))) + waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:4]) diff --git a/plenum/test/node_catchup/test_node_catchup_after_disconnect.py b/plenum/test/node_catchup/test_node_catchup_after_disconnect.py index 4d262fdb89..7ec2bb8185 100644 --- a/plenum/test/node_catchup/test_node_catchup_after_disconnect.py +++ b/plenum/test/node_catchup/test_node_catchup_after_disconnect.py @@ -40,4 +40,4 @@ def testNodeCatchupAfterDisconnect(newNodeCaughtUp, txnPoolNodeSet, logger.debug("Sending more requests") sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 10) - checkNodeDataForEquality(newNode, *txnPoolNodeSet[:-1]) + checkNodeDataForEquality(newNode, *txnPoolNodeSet[:-1]) \ No newline at end of file diff --git a/plenum/test/node_catchup/test_node_catchup_after_lost_connection.py b/plenum/test/node_catchup/test_node_catchup_after_lost_connection.py index f74e733c66..9b44681991 100644 --- a/plenum/test/node_catchup/test_node_catchup_after_lost_connection.py +++ b/plenum/test/node_catchup/test_node_catchup_after_lost_connection.py @@ -1,7 +1,7 @@ from stp_core.common.log import getlogger from plenum.test.test_node import ensure_node_disconnected from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies -from plenum.test.node_catchup.helper import waitNodeDataEquality, waitNodeDataUnequality +from plenum.test.node_catchup.helper import waitNodeDataEquality, waitNodeDataUnequality, checkNodeDataForEquality # Do not remove the next import from plenum.test.node_catchup.conftest import whitelist @@ -22,14 +22,22 @@ def testNodeCatchupAfterLostConnection(newNodeCaughtUp, txnPoolNodeSet, logger.debug("Stopping node {} with pool ledger size {}". format(newNode, newNode.poolManager.txnSeqNo)) looper.removeProdable(newNode) + # TODO: Check if the node has really stopped processing requests? logger.debug("Sending requests") sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5) # Make sure new node got out of sync waitNodeDataUnequality(looper, newNode, *txnPoolNodeSet[:-1]) + logger.debug("Ensure node {} gets disconnected".format(newNode)) ensure_node_disconnected(looper, newNode, txnPoolNodeSet[:-1]) + logger.debug("Starting the stopped node, {}".format(newNode)) looper.add(newNode) + logger.debug("Waiting for the node to catch up, {}".format(newNode)) waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1]) + + logger.debug("Sending more requests") + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 10) + checkNodeDataForEquality(newNode, *txnPoolNodeSet[:-1]) diff --git a/plenum/test/node_request/test_already_processed_request.py b/plenum/test/node_request/test_already_processed_request.py new file mode 100644 index 0000000000..ae6de19476 --- /dev/null +++ b/plenum/test/node_request/test_already_processed_request.py @@ -0,0 +1,82 @@ +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, \ + send_signed_requests, waitForSufficientRepliesForRequests +from plenum.test.pool_transactions.conftest import looper, clientAndWallet1, \ + client1, wallet1, client1Connected +from plenum.test.spy_helpers import getAllReturnVals + + +def test_all_replicas_hold_request_keys(looper, txnPoolNodeSet, client1, + wallet1, client1Connected): + """ + Client re-sending request and checking that nodes picked the reply from + ledger and did not process the request again + """ + + def get_method_call_count(method): + counts = set() + for node in txnPoolNodeSet: + c = node.spylog.count(method) + counts.add(c) + assert len(counts) == 1 + return counts.pop() + + def get_getReplyFromLedger_call_count(): + return get_method_call_count(next(iter(txnPoolNodeSet)).getReplyFromLedger) + + def get_recordAndPropagate_call_count(): + return get_method_call_count(next(iter(txnPoolNodeSet)).recordAndPropagate) + + def get_last_returned_val(): + rvs = [] + for node in txnPoolNodeSet: + rv = getAllReturnVals(node, node.getReplyFromLedger) + rvs.append(rv[0]) + # All items are same in the list + assert rvs.count(rvs[0]) == len(txnPoolNodeSet) + return rvs[0] + + # Send a request + rlc1 = get_getReplyFromLedger_call_count() + rpc1 = get_recordAndPropagate_call_count() + req1, = sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1) + rlc2 = get_getReplyFromLedger_call_count() + rpc2 = get_recordAndPropagate_call_count() + assert rlc2 - rlc1 == 1 # getReplyFromLedger was called + assert rpc2 - rpc1 == 1 # recordAndPropagate was called + r1 = get_last_returned_val() + assert r1 is None # getReplyFromLedger returned None since had not seen request + + req2, = sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1) + assert req2.reqId != req1.reqId + rlc3 = get_getReplyFromLedger_call_count() + rpc3 = get_recordAndPropagate_call_count() + assert rlc3 - rlc2 == 1 # getReplyFromLedger was called again + assert rpc3 - rpc2 == 1 # recordAndPropagate was called again + r2 = get_last_returned_val() + assert r2 is None # getReplyFromLedger returned None since had not seen request + + # Reply for the first request, which is going to be sent again + rep1 = client1.getReply(req1.identifier, req1.reqId) + + # Clear the client so that test waits for client getting reply + client1.inBox.clear() + client1.txnLog.reset() + + # Client re-sending request + req3, = send_signed_requests(client1, [req1, ]) + waitForSufficientRepliesForRequests(looper, client1, requests=[req3,]) + assert req3.reqId == req1.reqId + rlc4 = get_getReplyFromLedger_call_count() + rpc4 = get_recordAndPropagate_call_count() + assert rlc4 - rlc3 == 1 # getReplyFromLedger was called again + assert rpc4 - rpc3 == 0 # recordAndPropagate was not called + r3 = get_last_returned_val() + assert r3 is not None # getReplyFromLedger did not return None this time since had seen request + rep3 = client1.getReply(req3.identifier, req3.reqId) + + # Since txnTime is not stored in ledger and reading from ledger return + # all possible fields from transactions + rep3 = {k:v for k,v in rep3[0].items() if v is not None} + rep1 = {k:v for k,v in rep1[0].items() if k in rep3} + + assert rep3 == rep1 # The reply client got is same as the previous one diff --git a/plenum/test/node_request/test_pre_prepare/test_ignore_pre_prepare_pp_seq_no_less_than_expected.py b/plenum/test/node_request/test_pre_prepare/test_ignore_pre_prepare_pp_seq_no_less_than_expected.py new file mode 100644 index 0000000000..e309821b6c --- /dev/null +++ b/plenum/test/node_request/test_pre_prepare/test_ignore_pre_prepare_pp_seq_no_less_than_expected.py @@ -0,0 +1,27 @@ +import pytest + +from plenum.common.util import adict +from plenum.test.helper import sendRandomRequests, waitForSufficientRepliesForRequests +from plenum.test.test_node import getNonPrimaryReplicas + + +def test_ignore_pre_prepare_pp_seq_no_less_than_expected(looper, + nodeSet, up, + wallet1, client1): + """ + A node should NOT pend a pre-prepare request which + has ppSeqNo less than expected. + + https://jira.hyperledger.org/browse/INDY-159, + https://jira.hyperledger.org/browse/INDY-160 + + """ + replica = getNonPrimaryReplicas(nodeSet, instId=0)[0] + replica.lastOrderedPPSeqNo = 10 + + requests = sendRandomRequests(wallet1, client1, 1) + waitForSufficientRepliesForRequests(looper, client1, + requests=requests) + assert len(replica.prePreparesPendingPrevPP) == 0, \ + "the pending request buffer is empty" + diff --git a/plenum/test/primary_election/test_primary_election_case6.py b/plenum/test/primary_election/test_primary_election_case6.py new file mode 100644 index 0000000000..739cdebacf --- /dev/null +++ b/plenum/test/primary_election/test_primary_election_case6.py @@ -0,0 +1,64 @@ +import pytest + +from plenum.common.types import Primary, Nomination, Reelection +from plenum.test.delayers import delay +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies +from plenum.test.test_node import checkNodesConnected, \ + checkProtocolInstanceSetup +from stp_core.loop.eventually import eventually + + +@pytest.fixture(scope='module') +def case_6_setup(startedNodes): + A, B, C, D = startedNodes.nodes.values() + + # A will get Nomination, Primary, Reelection from after elections get over + for m in (Nomination, Primary, Reelection): + delay(m, frm=B, to=A, howlong=120) + + # A will get Primary earlier than Nominates + delay(Nomination, frm=(C, D), to=A, howlong=10) + + +# noinspection PyIncorrectDocstring +@pytest.fixture(scope='module') +def elections_done(case_6_setup, looper, keySharedNodes): + # Make sure elections are done successfully + nodeSet = keySharedNodes + A, B, C, D = nodeSet.nodes.values() + looper.run(checkNodesConnected(nodeSet)) + + inst_ids = (0, 1) + + def chk(): + # Check that each Primary is received by A before A has sent any Primary + primary_recv_times = { + i: [entry.starttime for entry in A.elector.spylog.getAll( + A.elector.processPrimary) if entry.params['prim'].instId == i] + for i in inst_ids + } + primary_send_times = { + i: [entry.starttime for entry in A.elector.spylog.getAll( + A.elector.sendPrimary) if entry.params['instId'] == 0] + for i in inst_ids + } + + for i in inst_ids: + assert primary_send_times[i][0] > max(primary_recv_times[i]) + + looper.run(eventually(chk, retryWait=1, timeout=15)) + checkProtocolInstanceSetup(looper=looper, nodes=nodeSet, retryWait=1) + + for i in inst_ids: + assert B.replicas[i].name not in A.elector.nominations[i] + assert B.replicas[i].name not in A.elector.primaryDeclarations[i] + + +def test_primary_election_case6(elections_done, looper, client1, wallet1): + """ + A is disconnected with B so A does not get any Nomination/Primary from + B (simulated by a large delay). A gets Nominations delayed due to which is + sends Primary only after it has received Primary from other 2 nodes. + A should still be able to select a primary and the pool should function. + """ + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5) diff --git a/plenum/test/test_node.py b/plenum/test/test_node.py index 3db4eb01a8..dda3a2ce86 100644 --- a/plenum/test/test_node.py +++ b/plenum/test/test_node.py @@ -236,7 +236,9 @@ def getDomainReqHandler(self): Node.checkPerformance, Node.processStashedOrderedReqs, Node.lost_master_primary, - Node.propose_view_change + Node.propose_view_change, + Node.getReplyFromLedger, + Node.recordAndPropagate ]) class TestNode(TestNodeCore, Node): @@ -262,11 +264,14 @@ def clientStackClass(self): def getLedgerManager(self): return TestLedgerManager(self, ownedByNode=True, - postAllLedgersCaughtUp=self.allLedgersCaughtUp) + postAllLedgersCaughtUp=self.allLedgersCaughtUp, + preCatchupClbk=self.preLedgerCatchUp) @spyable(methods=[ - PrimaryElector.discard + PrimaryElector.discard, + PrimaryElector.processPrimary, + PrimaryElector.sendPrimary ]) class TestPrimaryElector(PrimaryElector): def __init__(self, *args, **kwargs): diff --git a/plenum/test/waits.py b/plenum/test/waits.py index 7200f10a45..a51142155a 100644 --- a/plenum/test/waits.py +++ b/plenum/test/waits.py @@ -69,7 +69,7 @@ def expectedPoolInterconnectionTime(nodeCount): # https://evernym.atlassian.net/browse/SOV-995 # multiply by 2 because we need to re-create connections which can be done on a second re-try only # (we may send pings on some of the re-tries) - return min(90, + return min(0.8 * config.TestRunningTimeLimitSec, interconnectionCount * nodeConnectionTimeout + 2 * KITZStack.RETRY_TIMEOUT_RESTRICTED + 2) diff --git a/setup.py b/setup.py index 4129f76f53..b9be59663a 100644 --- a/setup.py +++ b/setup.py @@ -58,7 +58,7 @@ data_files=[( (BASE_DIR, ['data/pool_transactions_sandbox', ]) )], - install_requires=['ledger==0.2.14', 'stp==0.1.10', + install_requires=['ledger==0.2.15', 'stp==0.1.11', 'state-trie==0.1.1', 'jsonpickle', 'prompt_toolkit==0.57', 'pygments', 'ioflo==1.5.4', 'semver', 'base58', 'orderedset', From 22c3b7a63bd9b6c334c5e1ea0148e7b1b0734068 Mon Sep 17 00:00:00 2001 From: Devin Fisher Date: Wed, 21 Jun 2017 12:22:00 -0600 Subject: [PATCH 012/100] Stable build (#227) * add super initialization, update usage of super methods * update docstring of commit * remove commitReq since it does the same as commit * adding timeout to test utility method * changes for load testing * sanitising log messages * Versioning fix (#113) * changes for request handlers * [Closes SOV-905] Big Fixed: Orientdb is not installing with the latest build 0.3.16 * changing to rlp encoding * moving error codes to stp * upgrading version of stp * req_handler changes * fixing import * temp commit * Stp (#116) * use common stp interface for changing of node's HA * get rid of explicitly calling zmq and raet; move all transport stuff into stp interfaces. * fix tests; increment stp version * add AuthMode enum instead of auto parameter * fixed testConnectWithoutKeySharingFails test * increased test timeout * minor changes to serialization * Plenum test runner fix (#115) * Fixed a bug in plenum test runner with an incorrect test results summary pattern. Previously it had been expected that the line with the test results summary must begin with equality signs and end with them. But this line is padded with equality signs only if it is shorter than 80 characters. Now the test results summary patterns don't require equality signs. Instead they may match only the last line in the file. * Corrected test results summary patterns in plenum test runner. * updating apply of request handler * fix import of OP_FIELD_NAME * up version of stp to 0.1.24 * Agent issuer wallet restoration (#117) * supplied keys parameter as True to wallet encoding and decoding functions so that dictionary keys which are custom objects restore properly, otherwise it used to be restored as string oppossed to actual object it was before persistence * minor changes in list keyrings to show issuer wallet for agent as well * minor change to print base wallet first and then any other related wallets * up stp version to 0.1.26 * skip tests in test_new_node_catchup.py * scheduling primary selection * Skip failing tests (#122) * skeip test testChangeNodeHaForNonPrimary due to SOV-941 * skip test testProtocolInstanceCannotBecomeActiveWithLessThanFourServers due to SOV-940 * leveldb HashStore * parametrize test timeouts (#120) * rename expectedWait to expectedNodeInterconnectionTime * add signature for other time expect functions * use named timeouts in conftest * move totalConnections from waits to utils * use named timeout in ensureElectionsDone * use float instead of int for seconds * use default args of ensureElectionsDone where it is possible * use named argument for timeouts * use named timeout in helper * use names for parameters * rename 'timeoutPerReq' of checkSufficientRepliesForRequests and sendReqsToNodesAndVerifySuffReplies to 'customTimeoutPerRequest' to emphasize the fact that there is a default one * use default timeout instead of custom timeout of checkSufficientRepliesForRequests and sendReqsToNodesAndVerifySuffReplies where it can be used; use named argument for timeouts * add comments for two functions with similar names * rename checkSufficientRepliesRecvd to checkSufficientRepliesReceived * rename checkSufficientRepliesForRequests to waitForSufficientRepliesForRequests * add 'returns' to docstrings * fix passing of args to waits.expectedElectionTimeout * use waitForSufficientRepliesForRequests and it's default timeout instead of checkSufficientRepliesReceived everywhere it is possible * update doc lines * create waitForViewChange and use it in ensureView * replace usages of checkViewNoForNodes with eventually by waitForViewChange * add waits.expectedNodeToNodeMessageDeliveryTime and use it in sendMsgAndCheck * rename checkMsg to checkMessageReceived * rename sendMsgAndCheck to sendMessageAndCheckDelivery * add docstring to sendMessageAndCheckDelivery * remove unused helper function * add expectedGetReadyTimeout and use it in checkPoolReady * rename overrideTimeout parameter to customTimeout in checkNodesConnected * use default timeout of checkNodesConnected * create expectedPoolLedgerCheck and expectedClientConnectionTimeout waits and use them * add todo for ensureDisconnectedToNodes * update waits.expectedPoolLedgerCheck * add todo for checkNodesParticipating * add requestIds parameter for waitForSufficientRepliesForRequests * update docstring of waitForSufficientRepliesForRequests * fix waitForSufficientRepliesForRequests * remove unused imports from test_log_filtering * use named timeout in test_status_command * use waits.expectedTransactionExecutionTime in testTransactions * refactor testTransactions * use waitRequestSuccess and waitBalanceChange in test_cli_with_auction_plugin, move them to test.cli.helper * use named timeout in test_basic_client_commands * use named timeout in helper.checkRequest * create waitClientConnected and use it instead of checkClientConnected with eventually * add docstrings * rename checkNodeStarted to waitNodeStarted and use named timeout 'expectedNodeStartUpTimeout' in it * rename expectedGetReadyTimeout to expectedPoolGetReadyTimeout * rename checkAllNodesStarted to waitAllNodesStarted * fix default value of customTimeout of checkPoolReady * create waitAllNodesUp and use it instead of checkAllNodesUp with eventually * create waitReplyCount and use instead of checkReplyCount and eventually * use named timeouts in test_client * use named timeouts in some more tests * add basic implementation for waits.expectedCatchupTime and use it * make expectedCatchupTime get custom ConsistencyProofsTimeout as parameter * use named timeout in testNodeDiscardMessageFromUnknownView * use waits.expectedElectionTimeout and rename timeout arg to custonTimeout in checkEveryNodeHasAtMostOnePrimary * rename timeout argument of plenum/test/node_catchup/test_discard_view_no.py to customTimeout and make it used named timeouts from waits as default * update timeouts in testNodeDiscardMessageFromUnknownView * create waits.expectedRequestStashingTime and use it * add TODO to test_catchup_delayed_nodes * create waitNodeLedgersEquality and use it instead of directo usage of checkNodeLedgersEquality * use waits.expectedPoolLedgerCheck in waitNodeLedgersEquality * use named timeout in testOrderingCase2 * add waits.expectedNominationTimeout and use it * use named timeout in some more tests * add missing empty lines * update waits * add 'TODO[slow-factor]' * update timeouts in the tests * fix testTestNodeDelay and missed import * skip testMultipleRequests test * skip testStatusAfterClientAdded test * fix testInstChangeWithLowerRatioThanDelta * fix test_new_node_catchup.py * fix testChangeHaPersistsPostNodesRestart * fix testAdd2NewNodes * increase expectedElectionTimeout timeout * rename logger.warn => logger.warning * tune timeouts in the tests * make sections in waits.py * add --repat for runner.py * increase expectedCatchupTime timeout * improve runner.py * tune the test timeouts * skip some catchup tests * parametrize test timeouts * rm eventually.py * fix testQueueingReqFromFutureView, testNumOfPrePrepareWithFPlusOneFaults, testNumOfPrepareWithFPlusOneFaults * fix testProtocolInstanceCannotBecomeActiveWithLessThanFourServers * tune propagate and preprepare test timeouts * skip testNumOf*WithFPlusOneFaults * removing commented code and fixing bugs in pool request handler * lowering Delta in pool_transactions conftest * fix jenkins build (#123) * fixed deps in setup.py * added pypi publishing * updated deps * updated deps stp-dev -> stp-perf-imp * removed unnecessary sources * updated setup.py to hold correct deps names and self name for publishing * fixed plenum for the latest stp (#127) * fixed plenum for the latest stp * increment stp version * archive runner.py output for all agents (#107) * archive runner.py results * using env variable NODE_NAME instaed of func param for artifacts * configured archiveArtifacts to allow empty/no archive * sending last pre-prepare sequence number in consistency proof so already processed request can be ignored, re-enabling tests and re-adding proper timeouts * Feature Added: Deb build * Feature Added: Deb build * Do view change if a primary is disconnected (#128) * check whether function is a partial function in getCallableName * add tests for view change when primary goes down * start view change if primary went offline * use startViewChangeIfPrimaryWentOffline instead of doElectionIfNeeded * processing stashed ordered requests after all ledgers have caught up and applying any request if mode changed while processing and removing obsolete code from pool manager * Unclosed file warnings (#124) * Removed "ignore" rule for ResourceWarning about an unclosed file from the warnings filter. * Fixed some causes of ResourceWarnings about unclosed files. * - Fixed some causes of ResourceWarnings about unclosed files. - Corrected plenum.common.txn_util.updateGenesisPoolTxnFile function. * - Fixed the rest causes of ResourceWarnings about unclosed files. - Removed TimeAndSizeRotatingFileHandler class which is not used anymore (the class with the same name from stp is now used instead). * Updated stp-dev dependency to the new version. * Reverted update of stp-dev dependency. * Skipped the tests in plenum.test.test_log_rotation module since they require stp-dev dependency of a newer version (0.1.28 or higher). * refactoring, documentation and updating setup to get rid of pyorient * refactoring * Updated ledger dep * deprecating orientdb * check state equality too while checking ledger equality in tests * removing obsolete test helper method * Agent generalization (#126) * refactored test_network_setup; added some role-base helper classes for members of the network * fixed issue with wrong import and minor refactoring * more work on tests * bugfixes * interim changes during refactoring * adding init_utils * cleaning up before keys * removed type hint causing issues; this issue is fixed in Python 3.5.3 with a new typing.Coroutine * added a check in DidVerifier to check that we have a verkey * fixed unreliable tmpdir test fixture * modifications to tests to make more robust when running from command line * changed spyable to be able to monkey patch a single method * Added a run_till_quiet, waits for quiet prods This function will wait till there is a quiet period from the prodables. Dependent on prodables correctly reporting events. Will run forever if there is always activity. * small tweek to log format to keep | alined * changes to fix some issues happening in end-to-end test * using same seed for creating keys * ignore log files * added a small test for verifier * removed some excpetion class that were moved to STP * init Local keys for client in test network * Add default values for role and verkey in state and fix test timeouts * refactor * import logger * updated to use new API from shared lib * updated to use new API from shared lib * handle pyorient import * handle pyorient import * increment stp version * Enabled the tests in plenum.test.test_log_rotation module. (#135) * increment stp version * close files in tests * check for close method before calling it (#136) * minor fix in batch creation logic and caching primary replica no in node to avoid recomputation * fix testNumOf*WithFPlusOneFaults tests * Updated stp dep * Merge branches '3pc-batch' and 'master' of github.com:evernym/plenum into 3pc-batch # Conflicts: # plenum/common/test_network_setup.py # plenum/server/node.py # plenum/test/test_node.py # setup.py * add missing __init__ for member package * added missed __init__.py for common/member * skip temporarily a failing test * fix primary disconnection and new primary not being same as old for master instance * add more tests for primary disconnection, same primary not elected in the next view and refactor * add new test for minor network glitch with primary * add missing parameter to bootstrap test network (#143) * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * minor change in a test and removing sip statements * fix the catchup tests (#140) * make tests a little robust * fix test, check for disconnections after processing sent and received messages and little documentation * Close SOV-976 (#144) * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * [Closes SOV-947] Sovrin-node test testTrusteeCannotChangeVerkey fails on Linux * add missing parameter to bootstrap test network * add missing parameter to bootstrap test network * [Closes SOV-976] Unable to create the genesis transaction files * Merge branches '3pc-batch' and 'master' of github.com:evernym/plenum into 3pc-batch # Conflicts: # plenum/test/node_catchup/test_new_node_catchup.py # setup.py * up stp to 0.1.42 (#148) * refactor, enable test, adding timeout for view change and create remotes as soon as nodestack starts * fix broken test * [Closes SOV-981] Bug Fixed: Unable to create pool transaction file on the client machine without passing it a node number (#149) * increment stp * increment ledger * up stp to 0.1.14 * move state and kv storage to 'state' repo * get rid of orientdb code * fix tests * fix generation of trustee txns * fix genesis txns for trustee * fix generation of trustee txns (#151) * remove obsolete code * client to attempt establishing connections with nodes on start * refactor monitor.isMasterThroughputTooLow * update method for forcing view change cna checking it; intoduce wait method for it * use provoke_and_wait_for_view_change instead of eventually and check * remove unused dependency and skip tests - removed unused dependencies - minor fixes - skip tests that fail in master too (we have separate tickets for this) * increment ledger * increment ledger * enable debug logging in tests * using reverted changes in state interface * increment state version * Removed redundant copying of the file "pool_transactions_sandbox" to the directory "~/.plenum". (It is already done using "data_files" argument of "setup" function call.) (#152) * speed up tests (#147) * speed up test_nodes_with_pool_txns.py tests * reduce the timeout in checkNodesConnected * speed up the pool_transactions test * fix the timeout in test_delay.py * fix the timeout in test_primary_election_case5.py * speed up monitoring tests * revert changes in test_instance_change_with_Delta.py * each test's running time is limited by TestRunningTimeLimitSec * increase pool interconnection timeout on zstack retry timeout * fix timeout in test_delay.py * fix tests, make them faster * speedup the tests, tidy test timeouts * add a timeout into waits * bump stp-dev==0.1.43 * Unskip testProtocolInstanceCannotBecomeActiveWithLessThanFourServers since it works well (#153) * unskip testProtocolInstanceCannotBecomeActiveWithLessThanFourServers since it works well * decrease number of nodes in test_instance_cannot_become_active_with_less_than_four_servers to 13 * fix change back HA, SOV-908 (#154) * Fix testChangeNodeHaForNonPrimary (#157) * unskip test_change_non_primary_node_ha * fix waits name - it was renamed * verify genesis ledgers only if such option set in config * use primaryName instead of isPrimary to check that election is done * add numOfReelections parameter to waits.expectedPoolElectionTimeout * refactor test_node.py * set custom election timeout (numOfReelections=3) in changeNodeHa * remove debug prints * check VerKey is base58 for NODE txn, SOV-988 (#158) * client using a method which makes the code more testable, update to an old test with more checks and removing async from a method where it was not needed * Fix test testChangeNodeHaForPrimary (#160) * unskip testChangeNodeHaForPrimary * simplify conditional in changeNodeHa * node which is going down should not participate in a view change * change formating in testChangeNodeHaForPrimary * refactor to make method overridable and adding an argument to a test function * updated pypi package name for state repo and its version (#159) * Squashed commit of the following: create LedgerInfo class and replace collections of LedgerManager by fields * rename LedgerManager.ledgers to ledgerRegistry * fix usages of LedgerManager.ledgerRegistry * use correct attribute and renamed method * bring change from master and unskip a test * use correct attribute and set state to be committed when received from catchup * formatting * up ledger-3pc-batch version to 0.2.16 * up stp-3pc-batch version to 0.1.15 * improve the check of the arguments in "generate_sovrin_pool_transactions" (#162) * check client input for the NODE txn (#161) * fix testInstChangeWithLowerRatioThanDelta - decrease number of messages in a batch * decrease timeout to fix provoke_and_wait_for_view_change * fixing test * Fix post 3pc-batch merge (#163) * remove development options from Jenkinsfile * skip some broken tests * Update versions and skip windows build steps (#164) * switch off windows related build steps * update versions of ledger and stp * fix name * up ledger and stp version * skip test testNodeRequestingTxns * using correct timeout in tests * move some enums to correct location, using ledger's string encoding methods and test for stashing requests * bump dependency version * fix timeout in tests * make order of ledger sync customizable, add tranformer for transactions before adding to ledger, update seqno map db for transactions from catchup and update tests to check this * temporarily change config variable * fix regression where node starts catchup process if sent a consistency proff * bumping dependency version * bumping dependency version * bumping dependency version * Fix redundant reconnections and refactor (#165) * refactor retryForExpected * use maintainConnections in retryForExpected instead of connect * refactor resendRequests * up stp version to 0.1.49 * replace CLIENT_REQACK_TIMEOUT by CLIENT_REPLY_TIMEOUT when calling _filterExpected for REPLYs * up ledger to 0.2.19 * Add new helper-methods for conversion base58 to hex (#166) * increment state-trie and ledger versions (#168) * Make changes to support ChunkedFileStore (#167) * update _defaultStore of Ledger to make it correspond to original one * remove initialization of domain ledger - now it is done by ledger and file store it uses * replace manual copying of transaction files by usage of defaultFile * increase timeout for testNodeRequestingTxns * skip test testInstChangeWithLowerRatioThanDelta because it fails intermittently * up ledger version to 0.2.28 * Bump dependency version * [Closes SOV-980] Bug Fixed: A node should start catchup process if it realises that it has lagged behind the other node (#172) * [Closes SOV-980] Bug Fixed: A node should start catchup process if it realises that it has lagged behind the other node * Fixed failing tests * Fixed unsed imports * Update conftest.py * Increased test time global limit * Reverted timeouts * Added logs * Fixed filtering CPs * Fixed filtering CPs * Fixed filtering CPs * Input validation (#170) * Input sanitization: Add base logic and test cases * Input sanitization: add NODE and NYM txn, fixes * Input sanitization: implement node and client messages * roll away node to node validation * role field is option for NYM txn * fix tests * fixes for sovrin-node * implement validation for merkle root hash * uncomment new ConsistencyProof message implementation * add "nullable" property for FieldBase * fix usage of alphabet * add JsonField * add validation on message level * use hash size range instead of one value; use base58.alphabet instead of own list * fix usage hasSizes; made error comment more verbose * uncomment new implementation LedgerStatus message * uncomment new implementation for Prepare message and fix fields types * roll away node to node checkes * uncomment some new message implementations * use HexField instead of MerkleRootHashField for state_root and rxn_root in PrePrepare * make message about hash length in MerkleRootField more verbose * limit timestamp field by 253402290000.0 * modify randomString to remove length limitation * add tests for merkle root field validation * make randomString work not only with indexable containers * refactor tests for merkle root * fix timestamp validation * add test for timestamp validation * update validation of timestamp * check each node connected * improve log messages * fix testAdd2NewNodes * Improvement of validation rules (#173) * fix Node txn, INDY-9 * add tests for the Node operation * add checking that number of init parameters of message is equal to number of fields declared in schema * use OrderedDict in __dict__ * add test for merkle, fix logic * use OrderedDict for _fields and update all realted methods * - Provided all the disabled tests with the ticket references. (#176) - Enabled the tests testChangeNodeHaForPrimary and testChangeNodeHaForNonPrimary on non-Windows platforms. - Commented out parametrizers at the disabled tests in plenum.test.input_validation.test_common_checks module. * more removal of op to the top since it can affect checking of a number of args * Make SERVICES field required for add NODE txn * Added another test scenario for catchup (#178) * override items, keys, values methods of Mapping in MessageBase to make it compatible * Added another test scenario for catchup fix * clean MessageBase * INDY-73: Removed a commented out @skipif decorator (#177) * Removed a commented out @skipif decorator. * Updated a ticket reference at a disabled test. * Add base58 str class; destnode; destnym; * use - as default ppSeqNo instead of -1 * overtride __ftr__ for MessageBase * comment checking of presence of SERVICE field * move alphabet validation from _specific_validation to avoid accidental overriding of it * INDY-73: Added ticket references to test stubs (#180) * Removed a commented out @skipif decorator. * Updated a ticket reference at a disabled test. * - Added ticket references to the not implemented tests which had not contained them yet. - Uncommented testStopScriptIfNodeIsRunning and disabled it since it fails. * Added raising NotImplementedError to not implemented tests. * Move all specific checks to from validate method * Eliminate multiple inheritance * Enabled and updated testNodesConnectsWhenOneNodeIsLate (#181) * Enabled and updated testNodesConnectsWhenOneNodeIsLate * updated assertions logic * INDY-109 (#184) * updates to test helpers and change in forwarding requests logic (#187) * Use secure random number generator for randomString() Current usage of random.choice() is not secure so replaced that with libsodium provided randombytes_uniform() which is secure and also available on all platforms where libsodium is available * Fixes for INDY-71 (#185) * Refactor check_endpoint_valid func * Fix validation, INDY-71 * Fix imports * Do not connect to a new, non-active node * Remove confusing comments * Minor refactor * Fix test testAddInactiveNodeThenActivate * Validation for view change and 3pc messages (#193) * add exception for type error and raise it in MessageValidator * make error message in ChooseField more verbose * uncomment new implementations of Reelection, Primary and InstanceChange messages * update and fix testInstanceChangeMsgTypeChecking * fix testDiscardInstChngMsgFrmPastView * use NonNegativeField instead of TimestampFied inTieAmountField for its seconds part * make name pf test for merkle root field more standard * move tests for fields to own package * add missing empty line * remove ununsed imports * refactor test for timestamp * remove todo about code duplication since it is not valid anymore * add tests for some field types * make IterableField check type of inner field and that it is not none * add test for hex field * add test for ledger id * add test for request id field * override __eq__ for MessageBase * use NonNegativeNumberField instead of TimestampField in RequestIdentifierField * use IdentifierField instead of NonEmptyStringField in RequestIdentifierField * update test for request identifier * add test for IdentifierField * update test for RequestIdentifier field * implement validation for IdentifierField and reimplement DestNodeField using it * uncomment new implementations for Propagate, Prepare, Commit, Ordered * Refactor base58 check; add Verkey check; dest node; dest nym; merkelroot; * move test for base58 field to field validation package * remove unused import * add checking of max length hash to merkle tree root field test * update test for identifier field * update test for request identifier field * fix formatting * fix checking of zero * remove unused commented line * rename test_non_negative_number to test_positive_number * Make verkey validator accept empty strings * Add super's __init__ params to child calsses * Improve getMatchedHelpableMsg logic (#195) * Use original batch parameters in all tests except the ones that use batches and use alternate of eventuallyAll to check a collection of functions under a timeout (#190) * updates to test helpers and change in forwarding requests logic * overriding batch params in tests * use correct timeouts in tests * accounting for nomination delay * Disable view change (#196) * repaired calls to deprecated methods * Added unsafe 'disable_view_change' for testing - Added 'unsafe' attribute to default config. "unsafe" is a set of attributes that can set certain behaviors that are not safe, for example, 'disable_view_change' disables view changes from happening. This might be useful in testing scenarios, but never in a live network. - Added 'unsafe' behavior 'disable_view_change' which causes Monitor to always return false when asked if master instance performance is degraded. - Refactored view change fixture so it could be reused to test new 'disable_view_change' behavior. * fixed import for missed fixture in test * Fix validation types (#197) * Enable and update testNodeDiscardMessageFromUnknownView (#194) * added equality method for MessageBase * added ability to cancel created delayers * enabled and updated testNodeDiscardMessageFromUnknownView * updated test to recent changes in node logic * reverted recent changes not necessary for now * Catch up demoted (#201) * add tests for the issues with catch-up after demotion/disconnection * updates replica's last_ordered_pp_seq_no after catch-up * disconnect a demoted node * Fixes for dead catchup (#202) * add tests for the issues with catch-up after demotion/disconnection * updates replica's last_ordered_pp_seq_no after catch-up * disconnect a demoted node * up stp version to 0.1.56 * up stp version to 0.1.57 * fixed tests * fix test * Disable the test: INDY-147. * Make random string generation efficient The current algorithm requires calling the libsodium random function 'size' times where size is the expected length of random string Changed this so that this will require at the most '(size/64)' number of calls. Also we do not need the chars list now we always use only alphabets and digits available in hex this makes it very easy and still gives us what we need. Every instance of usage of this function has been checked and nowhere the 'chars' parameter is used * Change to pass exact size to libsodium randombytes() function This probably is cryptographically more accurate * Fix catchup when commit (#203) * remove 3PC messages that are already ordered during catch-up * Added more tests * fix catch-up in between 3pc msgs * fix tests * update validation rule for verkey (#208) * fix an incorrect test (#207) * fix an incorrect test * refactoring test * remove un-necessary parameter (#210) * Revert unordered batches before starting catch-up (not after) (#209) * Revert unordered batches before starting catch-up (not after) * add test for revert of the state before catchup * improve the test * Do not enqueue old pre-prepares (#211) * ignore pp requests with ppSeqNo less than expected * minor refactor * Optimisations and a testing util (#212) * add todo, fixing log levels, commenting out an expensive area of code and adding capability to diable monitor in tests * an optimisation avoiding re-calculation of merkle proof * remove commented code and add test * update names * add test fils for validators * add test for commit message * add test for ConsistencyProof message * update test for Commit message * add test for InstanceChange message * add test for LedgerStatus message * add test for nomination message * add test for Ordered message * add tests for Prepare and PrePrepare messages * add test for Primary message * add test for PROPAGATE message * add test for Reelection message * remove unused imports * Election bug fix (#215) * Election bug fix if `f` nodes do not send Primary and the node is the last one to send Primary, then it was not able to decide the primary, since a node was doing "primary setting" only on receiving a Primary message, changed that to try "setting primary" after sending Primary too * update test with more checks * removed obsolete test testStopScriptIfNodeIsRunning (#220) * Fix processing Propagate msg, INDY-245 (#221) * Bugfix/non human readable error (#222) * handling case with odd-length verkey * updated exception message, check it in test * Stable release Signed-off-by: Devin Fisher Sign-off-executed-by: toktar Approved-at: h-master --- plenum/common/exceptions.py | 5 +++++ plenum/common/verifier.py | 7 ++++++- plenum/server/node.py | 3 ++- plenum/test/common/test_verifier.py | 13 +++++++++++++ plenum/test/script/test_change_node_ha.py | 23 ----------------------- 5 files changed, 26 insertions(+), 25 deletions(-) delete mode 100644 plenum/test/script/test_change_node_ha.py diff --git a/plenum/common/exceptions.py b/plenum/common/exceptions.py index 4aeb677bd1..2fa11ce156 100644 --- a/plenum/common/exceptions.py +++ b/plenum/common/exceptions.py @@ -113,6 +113,11 @@ class KeysNotFoundException(Exception): 'To generate them run script ' +class InvalidKey(Exception): + code = 142 + reason = 'invalid key' + + class SuspiciousNode(BaseExc): def __init__(self, node: str, suspicion: Suspicion, offendingMsg): node = node.decode() if isinstance(node, bytes) else node diff --git a/plenum/common/verifier.py b/plenum/common/verifier.py index 3989f25006..6f5d412ef8 100644 --- a/plenum/common/verifier.py +++ b/plenum/common/verifier.py @@ -4,6 +4,7 @@ from base58 import b58decode, b58encode from plenum.common.signing import serializeMsg +from plenum.common.exceptions import InvalidKey from stp_core.crypto.nacl_wrappers import Verifier as NaclVerifier @@ -19,6 +20,7 @@ def verifyMsg(self, sig, msg: Dict): class DidVerifier(Verifier): def __init__(self, verkey, identifier=None): + _verkey = verkey self._verkey = None self._vr = None if identifier: @@ -30,7 +32,10 @@ def __init__(self, verkey, identifier=None): if verkey[0] == '~': # abbreviated verkey = b58encode(b58decode(identifier) + b58decode(verkey[1:])) - self.verkey = verkey + try: + self.verkey = verkey + except Exception as ex: + raise InvalidKey("verkey {}".format(_verkey)) from ex @property def verkey(self): diff --git a/plenum/server/node.py b/plenum/server/node.py index 30ac4033da..8f65adccb3 100644 --- a/plenum/server/node.py +++ b/plenum/server/node.py @@ -1582,7 +1582,8 @@ def processPropagate(self, msg: Propagate, frm): logger.debug("Node {} received propagated request: {}". format(self.name, msg)) reqDict = msg.request - request = SafeRequest(**reqDict) + + request = self._client_request_class(**reqDict) clientName = msg.senderClient diff --git a/plenum/test/common/test_verifier.py b/plenum/test/common/test_verifier.py index 8bf21f35ce..a657953765 100644 --- a/plenum/test/common/test_verifier.py +++ b/plenum/test/common/test_verifier.py @@ -1,10 +1,23 @@ +import pytest + +from plenum.common.exceptions import InvalidKey from plenum.common.verifier import DidVerifier SAMPLE_ABBR_VERKEY = '~8zH9ZSyZTFPGJ4ZPL5Rvxx' SAMPLE_IDENTIFIER = '99BgFBg35BehzfSADV5nM4' EXPECTED_VERKEY = '5SMfqc4NGeQM21NMx3cB9sqop6KCFFC1TqoGKGptdock' +ODD_LENGTH_VERKEY = 'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF' def test_create_verifier(): verifier = DidVerifier(SAMPLE_ABBR_VERKEY, identifier=SAMPLE_IDENTIFIER) assert verifier.verkey == EXPECTED_VERKEY + + +def test_create_verifier_with_odd_length_verkey(): + with pytest.raises( + InvalidKey, + message="invalid verkey {} accepted".format( + ODD_LENGTH_VERKEY)) as excinfo: + verifier = DidVerifier(ODD_LENGTH_VERKEY) + excinfo.match(r'verkey {}'.format(ODD_LENGTH_VERKEY)) diff --git a/plenum/test/script/test_change_node_ha.py b/plenum/test/script/test_change_node_ha.py deleted file mode 100644 index 4ca4820e12..0000000000 --- a/plenum/test/script/test_change_node_ha.py +++ /dev/null @@ -1,23 +0,0 @@ -import pytest -from stp_core.network.port_dispenser import genHa -from stp_core.types import HA - -from plenum.common.script_helper import changeHA - - -@pytest.mark.skip(reason='INDY-99') -def testStopScriptIfNodeIsRunning(looper, txnPoolNodeSet, poolTxnData, - poolTxnStewardData, tconf): - nodeName = txnPoolNodeSet[0].name - nodeSeed = poolTxnData["seeds"][nodeName].encode() - stewardName, stewardsSeed = poolTxnStewardData - ip, port = genHa() - nodeStackNewHA = HA(ip, port) - - # the node `nodeName` is not stopped here - - # change HA - with pytest.raises(Exception, message="Node '{}' must be stopped " - "before".format(nodeName)): - changeHA(looper, tconf, nodeName, nodeSeed, nodeStackNewHA, - stewardName, stewardsSeed) From a2731bcec4dee54a0bef967f50767f4166c2a133 Mon Sep 17 00:00:00 2001 From: Devin Fisher Date: Thu, 6 Jul 2017 15:05:25 -0600 Subject: [PATCH 013/100] re-pin indy dependencies Signed-off-by: Devin Fisher Sign-off-executed-by: toktar Approved-at: h-master --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index b9be59663a..23478f0a93 100644 --- a/setup.py +++ b/setup.py @@ -58,8 +58,8 @@ data_files=[( (BASE_DIR, ['data/pool_transactions_sandbox', ]) )], - install_requires=['ledger==0.2.15', 'stp==0.1.11', - 'state-trie==0.1.1', 'jsonpickle', + install_requires=['ledger==0.2.16', 'stp==0.1.12', + 'state-trie==0.1.3', 'jsonpickle', 'prompt_toolkit==0.57', 'pygments', 'ioflo==1.5.4', 'semver', 'base58', 'orderedset', 'sortedcontainers==1.5.7', 'psutil', 'pip'], From 7c33e84e4d9a13ffa73085eb98804e0b35d69e14 Mon Sep 17 00:00:00 2001 From: Andrei Goncharov Date: Tue, 18 Jul 2017 15:11:43 +0300 Subject: [PATCH 014/100] Stable (#285) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add test for SerializedValueField * process stashed ledger status only once * calling correct method in force_ordered * add documentation for method * added common constants for input validation, tests for verkeys * optimized error output for incorrect b58 chars * updated tests for fields input validation * renamed decodedLengthConstraints to byte_lengths * revert uncommitted ledger on setting primary of master replica * checking for exception in case of ledger not present * fixed PR comments * reformat code and comment in a test * made API to get b58 strings with specified byte length * fixed pep8 warnings * fix primary selection * Add pool_upgrade force field; allow to send forced requests without waiting for sufficient number of nodes; (#228) * fix test * Commented out raet logging * Add test for verkey check (#230) * add new test that checks requests from different ledgers interleave, skip an probably incorrect test and refactoring * fix test * add log to test helper * Set commit quorum N-f Ref: testNodeRejectingInvalidTxns Fix: testNodeRejectingInvalidTxns Remove excess 'sendRandomRequests' Ref: add quorums, propagate Ref: use quorums in replica class Ref: quorums election Ref: remove old quorum code * Ref: remove useless condition * Ref: quorums for client * Ref: propagator Ref: minor Fix ReqState class * Fix: do not count node's propagate for quorum * Fix testNonPrimarySendsAPrePrepare * Fix propagator * workaround for byte sender names * fix removing name of node from senders * Fix testNodeDoesNotParticipateUntilCaughtUp * Skip test_primary_election_case6 * small clean-ups in ledger manager * use Base58Field for CONS_PROOF and MapField ionstead of IterableField for txns * create StringifiedNonNegativeNumberField and use it in CatchupRep * fix a bug with processing commit and catchup req * Change view_change, prepare quorums * view_change N-f * prepare N-f-1 ('-1' - except the primary) * update fixture to work even if catchup was tried again * Merge branches 'master' and 'simple-election' of https://github.com/evernym/plenum into simple-election * Merge branches 'master' and 'simple-election' of https://github.com/evernym/plenum into simple-election * small cleaning in fields.py * adding get_by_rank in test util and update a new test * fix test * dump * change schema of ViewChangeDone message and send acceptabted view change * reformat code and comment in a test * fix test * fix intermediately failing tests Catchup can be marked completed if got all txns from stashed ordered requests * fix test * add a test for catchup requests not needed and add ability to process specific delayed messages * remove unused imports and add a update test * re-factor slow_node tests: make them independent * remove debug print * update error message for validation * add missing blank line * update logic in CatchupOperationField * remove redundant blank line * changes for validation * create a build from a branch * create a build from a branch: disable tests temporarily * enable tests back * set name for exception for debug purpose * During view change, cathcup only certain number of times if catchup did not result in any new txns * bump dependency version * remove commented code, propagate regardless of participation, use correct wallet for node and some refactoring * add CatchupOperationField and use it in CatchupRep * use ClientMessageValidator instead of CatchupOperationField - it is possible because transactions in txn field are reformated back to request format * uncomment digest field of client request * remove fields from 'operation' when rcopying them to root level of document * remove CatchupOperationField, it is not needed anymore * test * add support of not strict schema * add field 'optional' to FieldValidator since all its successors have it * fix testNodeRejectingInvalidTxns * fix tests in test_client_node_op.py * fix tests in test_catchuprep_message.py * Validation for transaction sequence number and sha256 hash (#233) * add input validation for transaction sequence number field * adding sha256 validation * splitting test * adding TRUSTEE as an allowed role * adding perf tests * Added LimitedLengthStringField (#234) * Added LimitedLengthStringField * Code review fixes * Code review fixes * Code review fixes * update doc * refactoring * Add MessageFactory * Add node_message_factory * Fix the validation error messages, make "_raise_invalid*" static * Ref: MessageFactory * Add TestMsg to the message factory * Use node_message_factory for client, refactoring * fix timeout in tests * Make rest messages MessageBase * Fix testClientNotRetryRequestWhenReqnackReceived * Remove TaggedTuples * Ref: input validation * move node messages into a separate file * remove TaggedTuple * Add update_schemas_by_field_type method * Validation for transaction sequence number and sha256 hash (#233) * add input validation for transaction sequence number field * adding sha256 validation * splitting test * adding TRUSTEE as an allowed role * Added LimitedLengthStringField (#234) * Added LimitedLengthStringField * Code review fixes * Code review fixes * Code review fixes * adding perf tests * update doc * rename file * Fix reqToTxn, roll back catchup_rep validation * Update setup.py * Fix test notes from indy-111 * Fix test_has_expected_validators * Remove runner.py * fix jenkinsfile * fix test name and remove commented skip (#240) * Version update * fix tests * ZStack takes config (#237) * pass config to ZStacks and move utility to stp * remove unused imports * Test fix * Roll back f.TXN_ROOT validation for PrePrepares * Improve validation error massages * Fix tests * Fix tests and merge issues * Update readme for new home in Hyperledger. * Ledger using generators (#246) * pass config to ZStacks and move utility to stp * remove unused imports * midcommit * ledger iteration is done using generators * bump dependency version * add test for state re-creation from ledger * Added predicates constant * log level change, deleting useless test and updating tests (#247) * log level change, deleting useless test and updating tests * bum dependency version * Rearrange tests * Reverted unusued changes * handle corrupt pool ledger files (INDY-150) * Fix test testInstanceChangeMsgTypeChecking * Make stateRootHash and txnRootHash fields MerkleRootField type * update test with descriptive name * Make stateRootHash and txnRootHash fields nullable * Fix update_schemas_by_field_type, node message factory * Cli did support (#239) * added some tests for genesis pool files, showing some brittlness * Switched CLI to use DID instead of Cryptonyms Required a few changes to some signers to handle some of the use case in the CLI * Added output for new Signer to include verfication keys in addtion to identifiers * fixed test to match current CLI output * made signer_did handle case where verkey is not passed a little better * changed veriable to non-keyword * changed veriable to not be a keyword * add test for full_verkey * Version fieldvalidator (#243) * Add validator for vrsion field * CHnages based on code review * Add check spaces in the midle of version * Add more testcases; fix conditions; * Simplify the version component check * Moved handleGetTnxReq function in plenum * Corrected params in getReplyFromLedger function * Fix test_message_factory tests * Add runner.py back * GET_TXN request handling * Fix memory leaks in Replica (#248) * refactor creation of inBoxRouter * remove view_ends_at * add test for cleaning of ordered * add removal of outdated ordereds * add cleaning for primaryNames * rename test_ordered_cleaning to test_buffers_cleaning * remove stashed checkpoints when they were processed * remove self.stashedRecvdCheckpoints[key] if it is empty * move calling of self.compact_ordered() to gc() * do gc() manually * Fixed comments * Changed GET_TXN response * manage permissions for keyring files and directories INDY-323 (#232) * helper class to manage keyrings and tests for it * fixed tests * allow save wallet by absolute path, updated tests * default values for files and dirs permissions in keyring * added logging about saved wallet * switch cli to use new API to save wallets, added test * removed saveGivenWallet API as obsolete * added loadWallet api and tests * swicthed wallet loading logic to new API, removed old one * added TODO comment * replace TypeError with more appropriate ValueError * improved style: variable name for imported class * pathlib onstead of os for WalletStorageHelper * updated exception message check * delayed walletSaver initialization * Added test that resu; of get_txn transaction same as expected * Ability to turn off spyables for benchmarking and memory tracking utils (#254) * print size of objects * add method to inspect collection size of objects and add option to disable spies for benchmarking * add more options to memory usage methods * do checkpointing frequently * Update imports from stp (#256) * fix imports * up stp to 0.2.42 * reduce delay in test_view_change_in_between_3pc_all_nodes_random_delays_long_delay because it runs for too long * Support GET_TXN transaction (#249) * test * Added predicates constant * Reverted unusued changes * Moved handleGetTnxReq function in plenum * Corrected params in getReplyFromLedger function * GET_TXN request handling * Fixed comments * Changed GET_TXN response * Added test that resu; of get_txn transaction same as expected * Add capability to request messages (#251) * log level change, deleting useless test and updating tests * bum dependency version * Add capability to request messages A node can request certain kinds of messages from other nodes. Currently PRE-PREPARE, LEDGER_STATUS and CONSISTENCY_PROOF are supported types. The old way of LEDGER_STATUS and CONSISTENCY_PROOF is removed. A replica can request PRE-PREPAREs from other nodes if it is missing them but has sufficient PREPAREs * remove commented code and commits are queued even when prepares missing * add test for checking malicious behavior while requesting pre-prepare * refactor * adding more tests * rename objects * add new test for view change and use fixture in a logging test * fix broken merge * update imports * add comment to revisit * Bump dependencies * Refactor: Use correct error code for instance change after timeout, use correct method name for get transaction handler, separate method to send acknowledgement and minor improvement in test utility (#258) * log level change, deleting useless test and updating tests * bum dependency version * Add capability to request messages A node can request certain kinds of messages from other nodes. Currently PRE-PREPARE, LEDGER_STATUS and CONSISTENCY_PROOF are supported types. The old way of LEDGER_STATUS and CONSISTENCY_PROOF is removed. A replica can request PRE-PREPAREs from other nodes if it is missing them but has sufficient PREPAREs * remove commented code and commits are queued even when prepares missing * add test for checking malicious behavior while requesting pre-prepare * refactor * adding more tests * rename objects * add new test for view change and use fixture in a logging test * fix broken merge * update imports * add comment to revisit * Refactor: Use correct error code for instance change after timeout, use correct method name for get transaction handler, separate method to send acknowledgement and minor improvement in test utility * update test to check if pool is functional * let GET_TXN handler return reply * removing unused config variables and imports (#263) * removing unused config variables and imports * remove commented code * Feature Added: Repo consolidation (#262) * [WIP] Repo merge Merged in ledger * [WIP] Repo merge Merged in state, stp * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * Update Jenkinsfile * Update setup.py * Fix consensus tests; add catchup consensus to Quorums (#260) * Fix consensus tests; add catchup consensus to Quorums - fixed consensus tests - added new tests - added catchup consensus values to Quorums - added checkpoint consensus values to Quorums * add catchup consensus to Quorums * add ledger_status quorum * Checkpoint bug (#264) * removing unused config variables and imports * remove commented code * fix bug with checkpoint and gc after view change * Feature Added: Repo consolidation (#262) * [WIP] Repo merge Merged in ledger * [WIP] Repo merge Merged in state, stp * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * Update Jenkinsfile * Update setup.py * update test * Corrected GET_TXN Responce type * Corrected get_Txn response * Test network did (#261) * added some tests for genesis pool files, showing some brittlness * Changed test_network_setup to use DIDs test_network_setup used cryptonym but we are switching over to DID style Ids. Cli no longer easily support cryptonym so we need the test genisus txn to also use DID. * Signature is not required for GET_TXN transaction. Corrected GET_TXN tets * Fix serialization of messages when ujson < 1.35 is used (#266) * add test for serialization of messages * override __dir__ method for MessageBase to exclude service fields * add test for dir over message * Corrected checking * Node requests Propagates if needed (#269) * removing unused config variables and imports * remove commented code * fix bug with checkpoint and gc after view change * Feature Added: Repo consolidation (#262) * [WIP] Repo merge Merged in ledger * [WIP] Repo merge Merged in state, stp * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * [WIP] Repo merge Test fixes * Update Jenkinsfile * Update setup.py * update test * Node requests PROPAGATEs If a node receives a PRE-PREPARE but does not enough finalised requests, it requests PROPAGATEs * use fixture params in test and update test with more checks * use correct type * add and update tests * Deleted reference to sovrin_common * Corrected GET_TXN Response type (#265) * test * Added predicates constant * Reverted unusued changes * Moved handleGetTnxReq function in plenum * Corrected params in getReplyFromLedger function * GET_TXN request handling * Fixed comments * Changed GET_TXN response * Added test that resu; of get_txn transaction same as expected * Corrected GET_TXN Responce type * Corrected get_Txn response * Signature is not required for GET_TXN transaction. Corrected GET_TXN tets * Corrected checking * Deleted reference to sovrin_common * HashStore is used to recover when the ledger starts, fixed the calcu… (#271) * Hash store is used to recover when the ledger starts, fixed the calculation for consistency Recovering from transaction log is slow as each transaction is applied again to the tree. * change log level * put imports at top and add a property to HashStore * fix ujson version to have the same serialization of composite messages (#272) * fix ujson version to have the same serialization of composite messages * fix serialization test * Set consensus for catchup and ladger_status to f + 1 * use epoch for time in PRE-PREPARE and PREPARE * Authenticator will not authenticate cryptonyms not on ledger - INDY 213 (#267) * CRYPTONYMS WILL NOW CHECK ON LEDGER When a cryptonym was given that was not on the ledger, the authenticator would not verfiy and use the credentials supplied by the client. * RYPTONYMS WILL NOW CHECK ON LEDGER - INDY 213 When a cryptonym not on the ledger is attempted to be authenticated, it should check the ledger and not depend on the credentials provided by the client. When this occurs, the authenticator should throw an exception (because the verkey is None) * Removed unittest, switched for proper pytest.raises * Feature/indy 399 (#274) * Feature Added: Package renaming support * Feature Added: Package renaming support * Feature Added: Package renaming support * Feature Added: Package renaming support * Merge preparations * Update Jenkinsfile * Update Jenkinsfile * first working draft of timestamps * refactor, clear previous view checkpoint and ignore checkpoint digest… (#273) * refactor, clear previous view checkpoint and ignore checkpoint digest check for non-master * rename method, fix spelling and indentation * Catchup if 3pc messages are missed, INDY-335 * start the catchup procedure if a node going to stash second CHECKPOINT * minor refactor * add TODOs * Do not start catchup if already started * Do not start catchup for primary or non-master Start the catchup procedure only for master non-primary replicas * Fix stashed_checkpoints_with_quorum * Updated ci helpers (#276) * storing time in ledger * add error log to global whitelist and handle exception while processing PRE-PREPARE * rename and add missed parameter * update test * Corrected GET_TXN response * more tests * not ignoring txn time * Message for notifying lagged nodes about previously selected primary (#277) * add test for CurrentState message * create CurrentState message * send CurrentState to lagged nodes * make message tests check order of fields correctly * refactor creation of router for node messages * handler received CurrentState message * update name of method for sending current state * update CurrentState message * add CurrentState message to whitelist * add test for propagation of current state * change type of field in CurrentState * add methods of node related to CurrentState message to spylog * add debug log message for processing of current state * add verbose messag for assertion * unwrap ViewChangeDone message from LedgerStatus and send it to elector * remove commented sendElectionMsgsToLaggingNode method * add MessageField field validator * make _validate_message instane method * discard LedgerStatus if cannot parse internal messages * set todo about changing validation of internal messages * Add explicit log message on successful Notifier Plugin Import (#281) * adding item to quorums * Stable Signed-off-by: Andrei Goncharov Sign-off-executed-by: toktar Approved-at: h-master --- .gitignore | 31 + Jenkinsfile | 147 ++- README.md | 33 +- ci/ubuntu.dockerfile | 22 + ledger/__init__.py | 0 ledger/compact_merkle_tree.py | 284 +++++ ledger/error.py | 156 +++ ledger/immutable_store.py | 22 + ledger/ledger.py | 221 ++++ ledger/merkle_tree.py | 74 ++ ledger/merkle_verifier.py | 262 +++++ ledger/serializers/__init__.py | 0 ledger/serializers/compact_serializer.py | 77 ++ ledger/serializers/field.py | 10 + ledger/serializers/json_serializer.py | 75 ++ ledger/serializers/mapping_serializer.py | 7 + ledger/stores/__init__.py | 0 ledger/stores/binary_file_store.py | 47 + ledger/stores/chunked_file_store.py | 308 +++++ ledger/stores/directory_store.py | 54 + ledger/stores/file_hash_store.py | 113 ++ ledger/stores/file_store.py | 224 ++++ ledger/stores/hash_store.py | 176 +++ ledger/stores/memory_hash_store.py | 49 + ledger/stores/store_utils.py | 11 + ledger/stores/text_file_store.py | 32 + ledger/test/__init__.py | 7 + ledger/test/__main__.py | 3 + ledger/test/conftest.py | 11 + ledger/test/helper.py | 45 + ledger/test/merkle_test.py | 505 ++++++++ ledger/test/test_chunked_file_store.py | 153 +++ ledger/test/test_compact_serializer.py | 207 ++++ ledger/test/test_fields.py | 34 + ledger/test/test_file_hash_store.py | 100 ++ ledger/test/test_file_store_perf.py | 32 + ledger/test/test_file_stores_equailty.py | 56 + ledger/test/test_json_serializer.py | 43 + ledger/test/test_ledger.py | 222 ++++ ledger/test/test_ledger_chunked_store.py | 77 ++ ledger/test/test_merkle_proof.py | 349 ++++++ ledger/test/test_txn_persistence.py | 47 + ledger/tree_hasher.py | 76 ++ ledger/util.py | 56 + plenum/__metadata__.py | 2 +- plenum/cli/cli.py | 46 +- plenum/client/client.py | 46 +- plenum/client/pool_manager.py | 3 +- plenum/client/wallet.py | 132 +++ plenum/common/batched.py | 5 +- plenum/common/config_util.py | 6 +- plenum/common/constants.py | 10 +- plenum/common/ledger.py | 12 + plenum/common/ledger_info.py | 38 +- plenum/common/ledger_manager.py | 454 ++++---- plenum/common/message_processor.py | 5 +- plenum/common/messages/client_request.py | 46 +- plenum/common/messages/fields.py | 272 ++++- plenum/common/messages/message_base.py | 60 +- .../common/messages/node_message_factory.py | 112 ++ plenum/common/messages/node_messages.py | 334 ++++++ plenum/common/perf_util.py | 33 + plenum/common/request.py | 12 +- plenum/common/signer_did.py | 31 +- plenum/common/signer_simple.py | 4 +- plenum/common/stack_manager.py | 31 +- plenum/common/stacks.py | 20 +- plenum/common/startable.py | 20 +- plenum/common/test_network_setup.py | 27 +- plenum/common/transaction_store.py | 3 +- plenum/common/transactions.py | 1 + plenum/common/txn_util.py | 44 +- plenum/common/types.py | 409 +------ plenum/common/util.py | 132 +-- plenum/config.py | 17 +- plenum/persistence/leveldb_hash_store.py | 4 + plenum/persistence/storage.py | 2 +- plenum/server/client_authn.py | 4 + plenum/server/domain_req_handler.py | 15 +- plenum/server/message_req_processor.py | 267 +++++ plenum/server/models.py | 14 +- plenum/server/monitor.py | 4 +- plenum/server/msg_filter.py | 43 + plenum/server/node.py | 917 ++++++++++----- plenum/server/notifier_plugin_manager.py | 4 +- plenum/server/pool_manager.py | 129 ++- plenum/server/pool_req_handler.py | 4 +- plenum/server/primary_decider.py | 123 +- plenum/server/primary_elector.py | 145 +-- plenum/server/primary_selector.py | 356 +++++- plenum/server/propagator.py | 135 ++- plenum/server/quorums.py | 29 + plenum/server/replica.py | 1016 ++++++++++++----- plenum/server/req_handler.py | 10 +- plenum/server/router.py | 18 +- plenum/server/suspicion_codes.py | 24 +- plenum/server/view_change/__init__.py | 0 .../view_change/view_change_msg_filter.py | 38 + plenum/test/README.md | 10 +- plenum/test/__init__.py | 6 + .../test_3pc_paused_during_catch_up.py | 2 +- .../catch-up/test_catchup_during_3pc.py | 1 + .../test_state_reverted_before_catchup.py | 3 +- plenum/test/batching_3pc/conftest.py | 4 +- plenum/test/batching_3pc/helper.py | 11 +- .../test/batching_3pc/test_batch_rejection.py | 4 +- .../batching_3pc/test_batching_scenarios.py | 2 +- ..._blacklist_node_on_multiple_nominations.py | 2 +- ...t_node_on_multiple_primary_declarations.py | 2 +- plenum/test/checkpoints/conftest.py | 3 +- plenum/test/checkpoints/helper.py | 12 + .../checkpoints/test_basic_checkpointing.py | 9 +- .../test_discard_old_checkpoint_messages.py | 2 +- .../test_message_outside_watermark.py | 9 +- .../test_message_outside_watermark1.py | 34 +- .../checkpoints/test_stable_checkpoint.py | 22 +- .../test_view_change_after_checkpoint.py | 83 ++ plenum/test/cli/conftest.py | 2 +- plenum/test/cli/helper.py | 33 +- plenum/test/cli/test_save_wallet.py | 25 + plenum/test/client/test_client.py | 4 +- plenum/test/client/test_client_authn.py | 20 +- plenum/test/client/test_client_retry.py | 6 +- plenum/test/common/test_parse_ledger.py | 38 + plenum/test/common/test_signers.py | 30 + plenum/test/conftest.py | 7 +- plenum/test/delayers.py | 47 +- plenum/test/helper.py | 207 +++- plenum/test/input_validation/constants.py | 15 + .../fields_validation/test_base58_field.py | 67 +- .../fields_validation/test_bool_field.py | 19 + .../test_identifier_field.py | 33 +- .../fields_validation/test_ledger_id_field.py | 2 +- .../test_limited_length_string_field.py | 22 + .../test_merkle_tree_root_field.py | 26 +- .../fields_validation/test_message_field.py | 15 + .../test_request_identifier_field.py | 23 +- .../test_serializedvalue_field.py | 20 + .../test_sha256_hex_field.py | 50 + .../fields_validation/test_timestamp_field.py | 10 +- .../test_txn_seq_no_field.py | 12 + .../fields_validation/test_verkey_field.py | 36 + .../fields_validation/test_version_field.py | 53 + .../message_validation/test_batch_message.py | 27 + .../test_catchuprep_message.py | 26 + .../test_catchupreq_message.py | 28 + .../test_checkpoint_message.py | 29 + .../message_validation/test_commit_message.py | 4 +- .../test_consistencyproof_message.py | 5 +- .../test_currentstate_message.py | 25 + .../test_instanceChange_message.py | 4 +- .../test_ledgerstatus_message.py | 6 +- .../test_nomination_message.py | 4 +- .../test_ordered_message.py | 10 +- .../test_prepare_message.py | 16 +- .../test_preprepare_message.py | 14 +- .../test_primary_message.py | 6 +- .../test_propagate_message.py | 7 +- .../test_reelection_message.py | 6 +- .../test_threepcstate_message.py | 25 + .../test_viewchangedone_messsage.py | 25 + plenum/test/input_validation/messages.py | 15 +- plenum/test/input_validation/stub_messages.py | 43 + .../test_client_get_txn_op.py | 33 + .../input_validation/test_client_node_op.py | 10 +- .../input_validation/test_client_nym_op.py | 53 + .../input_validation/test_message_factory.py | 94 ++ .../test_message_serialization.py | 42 + plenum/test/input_validation/utils.py | 14 +- plenum/test/instances/helper.py | 12 +- ...come_active_with_less_than_four_servers.py | 5 + .../test_msgs_from_slow_instances.py | 2 +- plenum/test/instances/test_multiple_commit.py | 11 +- .../test_multiple_instance_change_msgs.py | 2 +- .../instances/test_multiple_pre_prepare.py | 2 +- .../test/instances/test_multiple_prepare.py | 4 +- .../test/instances/test_pre_prepare_digest.py | 3 +- plenum/test/instances/test_prepare_digest.py | 2 +- plenum/test/malicious_behaviors_node.py | 34 +- plenum/test/monitoring/test_avg_latency.py | 1 - .../test_instance_change_with_Delta.py | 4 +- .../test_instance_change_with_req_Lambda.py | 25 +- plenum/test/monitoring/test_throughput.py | 1 - plenum/test/msgs.py | 19 +- plenum/test/node_catchup/conftest.py | 42 +- plenum/test/node_catchup/helper.py | 79 +- .../test_catchup_delayed_nodes.py | 1 - .../test/node_catchup/test_catchup_demoted.py | 29 +- .../node_catchup/test_catchup_f_plus_one.py | 53 + .../test_catchup_inlcuding_3PC.py | 69 ++ .../node_catchup/test_catchup_scenarios.py | 2 +- ...test_catchup_while_new_request_incoming.py | 7 +- .../test/node_catchup/test_discard_view_no.py | 58 +- .../node_catchup/test_new_node_catchup.py | 61 +- .../node_catchup/test_new_node_catchup2.py | 52 +- .../test_no_catchup_if_got_from_3pc.py | 70 ++ .../test_node_catchup_after_checkpoints.py | 83 ++ .../test_node_catchup_after_disconnect.py | 8 +- ...test_node_catchup_after_lost_connection.py | 25 +- ...ode_catchup_and_view_change_after_start.py | 1 + ..._node_reject_invalid_txn_during_catchup.py | 108 +- .../test_node_request_consistency_proof.py | 56 +- .../test_node_request_missing_transactions.py | 31 +- .../test_revert_during_catchup.py | 122 ++ .../node_request/message_request/__init__.py | 0 .../node_request/message_request/conftest.py | 30 + .../node_request/message_request/helper.py | 20 + .../test_node_requests_missing_preprepare.py | 158 +++ .../test_preprepare_request.py | 107 ++ .../test_requested_preprepare_handling.py | 66 ++ .../test_valid_message_request.py | 155 +++ .../test/node_request/node_request_helper.py | 86 +- .../test_already_processed_request.py | 4 +- .../test_commit/test_commits_recvd_first.py | 33 + .../test_commits_without_prepares.py | 29 + .../test_num_commit_with_2_of_6_faulty.py | 33 + ...st_num_of_commit_with_f_plus_one_faults.py | 13 +- .../test_num_of_sufficient_commit.py | 12 +- ...est_different_ledger_request_interleave.py | 70 ++ .../test_discard_3pc_for_ordered.py | 63 + ..._ordering_when_pre_prepare_not_received.py | 37 +- .../test_order/test_request_ordering_2.py | 2 +- ...re_prepare_pp_seq_no_less_than_expected.py | 4 +- .../test_non_primary_sends_a_pre_prepare.py | 10 +- ...m_of_pre_prepare_with_f_plus_one_faults.py | 13 +- .../test_num_of_pre_prepare_with_one_fault.py | 6 +- .../test_num_of_sufficient_preprepare.py | 8 +- ...st_primary_sends_preprepare_of_high_num.py | 5 +- ...t_num_of_prepare_with_f_plus_one_faults.py | 19 +- .../test_num_of_prepare_with_one_fault.py | 12 +- .../test_num_of_sufficient_prepare.py | 13 +- .../test_num_prepare_with_2_of_6_faulty.py | 33 + .../test_node_lacks_finalised_requests.py | 65 ++ ..._propagate_with_f_plus_one_faulty_nodes.py | 15 +- .../test_num_of_propagate_with_one_fault.py | 4 +- .../test_num_of_sufficient_propagate.py | 13 +- .../node_request/test_quorum_disconnected.py | 38 + .../test/node_request/test_quorum_faulty.py | 53 + .../node_request/test_request_forwarding.py | 6 +- .../node_request/test_timestamp/__init__.py | 0 .../node_request/test_timestamp/conftest.py | 3 + .../node_request/test_timestamp/helper.py | 29 + .../test_timestamp/test_3pc_timestamp.py | 67 ++ .../test_timestamp/test_clock_disruption.py | 70 ++ .../test_timestamp/test_timestamp_new_node.py | 48 + .../test_timestamp_post_view_change.py | 73 ++ plenum/test/pool_transactions/conftest.py | 6 +- .../test/pool_transactions/get_txn_request.py | 75 ++ plenum/test/pool_transactions/helper.py | 54 +- ...t_change_ha_persists_post_nodes_restart.py | 1 - .../test_node_key_changed.py | 48 +- .../test_nodes_data_changed.py | 11 +- .../test_nodes_with_pool_txns.py | 229 ++-- plenum/test/primary_election/helpers.py | 6 +- .../test_primary_election_case1.py | 3 +- .../test_primary_election_case2.py | 4 +- .../test_primary_election_case4.py | 3 +- .../test_primary_election_case5.py | 3 +- .../test_primary_election_case6.py | 6 +- .../test_primary_election_contested.py | 3 +- ...test_primary_election_with_clear_winner.py | 1 + .../test_primary_election_with_tie.py | 3 +- plenum/test/primary_selection/conftest.py | 17 + plenum/test/primary_selection/helper.py | 42 + .../test_catchup_after_view_change.py | 106 ++ .../test_catchup_multiple_rounds.py | 77 ++ .../test_catchup_needed_check.py | 77 ++ .../test_primary_selection.py | 39 +- .../test_primary_selection_pool_txn.py | 53 + .../test_primary_selection_routes.py | 8 + .../test_primary_selector.py | 185 +++ plenum/test/propagate/helper.py | 2 +- .../test_propagate_recvd_after_request.py | 4 +- .../test_propagate_recvd_before_request.py | 2 +- plenum/test/replica/test_buffers_cleaning.py | 66 ++ ...y_marked_suspicious_for_sending_prepare.py | 16 +- .../test_replica_reject_same_pre_prepare.py | 65 +- plenum/test/signing/test_signing.py | 2 +- plenum/test/spy_helpers.py | 19 +- plenum/test/stasher.py | 54 +- plenum/test/test_bootstrapping.py | 6 +- plenum/test/test_client.py | 4 +- plenum/test/test_current_state_propagation.py | 42 + plenum/test/test_delay.py | 4 +- plenum/test/test_ledger_manager.py | 13 +- plenum/test/test_log_rotation.py | 19 +- plenum/test/test_node.py | 203 +++- plenum/test/test_node_basic.py | 3 +- plenum/test/test_node_genesis.py | 148 +++ plenum/test/test_node_request.py | 36 +- plenum/test/test_performance.py | 300 +++++ .../test_round_trip_with_one_faulty_node.py | 22 +- .../test_state_regenerated_from_ledger.py | 55 + plenum/test/test_util.py | 38 +- plenum/test/testable.py | 13 + plenum/test/view_change/conftest.py | 24 - plenum/test/view_change/helper.py | 86 +- .../test/view_change/slow_nodes/__init__.py | 0 .../test/view_change/slow_nodes/conftest.py | 8 + ...ew_change_2_of_4_nodes_with_new_primary.py | 38 + ...ew_change_2_of_4_nodes_with_non_primary.py | 33 + ...e_2_of_4_nodes_with_old_and_new_primary.py | 34 + ...ew_change_2_of_4_nodes_with_old_primary.py | 36 + .../slow_nodes/test_view_change_all_nodes.py | 25 + ...test_view_change_all_nodes_random_delay.py | 31 + .../slow_nodes/test_view_change_complex.py | 46 + .../test_3pc_msgs_during_view_change.py | 66 ++ .../view_change/test_disable_view_change.py | 12 +- .../test_elections_after_view_change.py | 40 - .../test_inst_chng_msg_throttling.py | 2 +- .../test_instance_change_msg_checking.py | 8 +- ..._master_primary_different_from_previous.py | 14 +- ...detecting_lag_from_view_change_messages.py | 115 ++ .../test_pp_seq_no_starts_from_1.py | 45 + .../test_queueing_req_from_future_view.py | 87 +- plenum/test/view_change/test_view_change.py | 92 +- .../test_view_change_done_delayed.py | 66 ++ .../test_view_change_happens_post_timeout.py | 44 +- .../test_view_change_max_catchup_rounds.py | 67 ++ .../test_view_change_on_master_degraded.py | 80 ++ .../view_change/test_view_change_timeout.py | 73 ++ ..._changes_if_master_primary_disconnected.py | 23 +- ..._changed_if_backup_primary_disconnected.py | 2 +- ...ew_not_changed_when_short_disconnection.py | 1 - plenum/test/waits.py | 22 +- .../test/wallet/test_wallet_storage_helper.py | 258 +++++ .../zstack_tests/test_zstack_reconnection.py | 28 +- runner.py | 177 --- setup.cfg | 3 + setup.py | 17 +- state/__init__.py | 0 state/db/__init__.py | 0 state/db/db.py | 16 + state/db/persistent_db.py | 29 + state/db/refcount_db.py | 157 +++ state/kv/__init__.py | 0 state/kv/kv_in_memory.py | 51 + state/kv/kv_store.py | 57 + state/kv/kv_store_leveldb.py | 65 ++ state/kv/kv_store_rocksdb.py | 19 + state/pruning_state.py | 110 ++ state/state.py | 60 + state/test/__init__.py | 0 state/test/bench.py | 1 + state/test/conftest.py | 6 + state/test/kv/__init__.py | 0 state/test/kv/test_kv_leveldb.py | 118 ++ state/test/kv/test_kv_memory.py | 126 ++ state/test/test_state_in_memory.py | 226 ++++ state/test/test_state_leveldb.py | 239 ++++ state/test/trie/__init__.py | 0 .../test_trie_values_at_different_roots.py | 80 ++ state/trie/__init__.py | 0 state/trie/pruning_trie.py | 955 ++++++++++++++++ state/util/__init__.py | 0 state/util/fast_rlp.py | 112 ++ state/util/utils.py | 522 +++++++++ stp_core/__init__.py | 0 stp_core/common/__init__.py | 0 stp_core/common/config/__init__.py | 0 stp_core/common/config/util.py | 22 + stp_core/common/error.py | 16 + stp_core/common/log.py | 162 +++ .../logging/TimeAndSizeRotatingFileHandler.py | 40 + stp_core/common/logging/__init__.py | 0 stp_core/common/logging/handlers.py | 76 ++ stp_core/common/temp_file_util.py | 18 + stp_core/common/util.py | 47 + stp_core/config.py | 43 + stp_core/crypto/__init__.py | 0 stp_core/crypto/encoding.py | 29 + stp_core/crypto/nacl_wrappers.py | 514 +++++++++ stp_core/crypto/signer.py | 27 + stp_core/crypto/util.py | 90 ++ stp_core/error_codes.py | 3 + stp_core/loop/__init__.py | 0 stp_core/loop/eventually.py | 196 ++++ stp_core/loop/exceptions.py | 7 + stp_core/loop/looper.py | 305 +++++ stp_core/loop/motor.py | 86 ++ stp_core/loop/startable.py | 45 + stp_core/network/__init__.py | 0 stp_core/network/auth_mode.py | 10 + stp_core/network/exceptions.py | 53 + stp_core/network/keep_in_touch.py | 147 +++ stp_core/network/network_interface.py | 216 ++++ stp_core/network/port_dispenser.py | 77 ++ stp_core/network/util.py | 81 ++ stp_core/ratchet.py | 91 ++ stp_core/test/__init__.py | 0 stp_core/test/conftest.py | 19 + stp_core/test/helper.py | 136 +++ stp_core/types.py | 6 + stp_raet/__init__.py | 0 stp_raet/rstack.py | 709 ++++++++++++ stp_raet/test/__init__.py | 0 stp_raet/test/conftest.py | 41 + stp_raet/test/helper.py | 55 + stp_raet/test/stack_message_loss.py | 83 ++ stp_raet/test/test_communication.py | 197 ++++ stp_raet/test/test_kitrstack.py | 52 + stp_raet/test/test_raet_comm_with_one_key.py | 104 ++ stp_raet/test/test_rstack.py | 50 + stp_raet/util.py | 78 ++ stp_zmq/__init__.py | 0 stp_zmq/authenticator.py | 102 ++ stp_zmq/kit_zstack.py | 108 ++ stp_zmq/remote.py | 157 +++ stp_zmq/simple_zstack.py | 37 + stp_zmq/test/__init__.py | 0 stp_zmq/test/conftest.py | 51 + stp_zmq/test/helper.py | 116 ++ stp_zmq/test/test_heartbeats.py | 111 ++ stp_zmq/test/test_kitzstack.py | 27 + stp_zmq/test/test_large_messages.py | 65 ++ stp_zmq/test/test_quotas.py | 70 ++ stp_zmq/test/test_reconnect.py | 138 +++ stp_zmq/test/test_utils.py | 8 + stp_zmq/test/test_zstack.py | 167 +++ stp_zmq/test/test_zstack_communication.py | 63 + stp_zmq/util.py | 149 +++ stp_zmq/zstack.py | 938 +++++++++++++++ terminology.md | 3 + 423 files changed, 24375 insertions(+), 3063 deletions(-) create mode 100644 ledger/__init__.py create mode 100644 ledger/compact_merkle_tree.py create mode 100644 ledger/error.py create mode 100644 ledger/immutable_store.py create mode 100644 ledger/ledger.py create mode 100644 ledger/merkle_tree.py create mode 100644 ledger/merkle_verifier.py create mode 100644 ledger/serializers/__init__.py create mode 100644 ledger/serializers/compact_serializer.py create mode 100644 ledger/serializers/field.py create mode 100644 ledger/serializers/json_serializer.py create mode 100644 ledger/serializers/mapping_serializer.py create mode 100644 ledger/stores/__init__.py create mode 100644 ledger/stores/binary_file_store.py create mode 100644 ledger/stores/chunked_file_store.py create mode 100644 ledger/stores/directory_store.py create mode 100644 ledger/stores/file_hash_store.py create mode 100644 ledger/stores/file_store.py create mode 100644 ledger/stores/hash_store.py create mode 100644 ledger/stores/memory_hash_store.py create mode 100644 ledger/stores/store_utils.py create mode 100644 ledger/stores/text_file_store.py create mode 100644 ledger/test/__init__.py create mode 100644 ledger/test/__main__.py create mode 100644 ledger/test/conftest.py create mode 100644 ledger/test/helper.py create mode 100644 ledger/test/merkle_test.py create mode 100644 ledger/test/test_chunked_file_store.py create mode 100644 ledger/test/test_compact_serializer.py create mode 100644 ledger/test/test_fields.py create mode 100644 ledger/test/test_file_hash_store.py create mode 100644 ledger/test/test_file_store_perf.py create mode 100644 ledger/test/test_file_stores_equailty.py create mode 100644 ledger/test/test_json_serializer.py create mode 100644 ledger/test/test_ledger.py create mode 100644 ledger/test/test_ledger_chunked_store.py create mode 100644 ledger/test/test_merkle_proof.py create mode 100644 ledger/test/test_txn_persistence.py create mode 100644 ledger/tree_hasher.py create mode 100644 ledger/util.py create mode 100644 plenum/common/messages/node_message_factory.py create mode 100644 plenum/common/messages/node_messages.py create mode 100644 plenum/server/message_req_processor.py create mode 100644 plenum/server/msg_filter.py create mode 100644 plenum/server/quorums.py create mode 100644 plenum/server/view_change/__init__.py create mode 100644 plenum/server/view_change/view_change_msg_filter.py create mode 100644 plenum/test/checkpoints/test_view_change_after_checkpoint.py create mode 100644 plenum/test/cli/test_save_wallet.py create mode 100644 plenum/test/common/test_parse_ledger.py create mode 100644 plenum/test/common/test_signers.py create mode 100644 plenum/test/input_validation/constants.py create mode 100644 plenum/test/input_validation/fields_validation/test_bool_field.py create mode 100644 plenum/test/input_validation/fields_validation/test_limited_length_string_field.py create mode 100644 plenum/test/input_validation/fields_validation/test_message_field.py create mode 100644 plenum/test/input_validation/fields_validation/test_serializedvalue_field.py create mode 100644 plenum/test/input_validation/fields_validation/test_sha256_hex_field.py create mode 100644 plenum/test/input_validation/fields_validation/test_txn_seq_no_field.py create mode 100644 plenum/test/input_validation/fields_validation/test_verkey_field.py create mode 100644 plenum/test/input_validation/fields_validation/test_version_field.py create mode 100644 plenum/test/input_validation/message_validation/test_batch_message.py create mode 100644 plenum/test/input_validation/message_validation/test_catchuprep_message.py create mode 100644 plenum/test/input_validation/message_validation/test_catchupreq_message.py create mode 100644 plenum/test/input_validation/message_validation/test_checkpoint_message.py create mode 100644 plenum/test/input_validation/message_validation/test_currentstate_message.py create mode 100644 plenum/test/input_validation/message_validation/test_threepcstate_message.py create mode 100644 plenum/test/input_validation/message_validation/test_viewchangedone_messsage.py create mode 100644 plenum/test/input_validation/stub_messages.py create mode 100644 plenum/test/input_validation/test_client_get_txn_op.py create mode 100644 plenum/test/input_validation/test_client_nym_op.py create mode 100644 plenum/test/input_validation/test_message_factory.py create mode 100644 plenum/test/input_validation/test_message_serialization.py create mode 100644 plenum/test/node_catchup/test_catchup_f_plus_one.py create mode 100644 plenum/test/node_catchup/test_catchup_inlcuding_3PC.py create mode 100644 plenum/test/node_catchup/test_no_catchup_if_got_from_3pc.py create mode 100644 plenum/test/node_catchup/test_node_catchup_after_checkpoints.py create mode 100644 plenum/test/node_catchup/test_node_catchup_and_view_change_after_start.py create mode 100644 plenum/test/node_catchup/test_revert_during_catchup.py create mode 100644 plenum/test/node_request/message_request/__init__.py create mode 100644 plenum/test/node_request/message_request/conftest.py create mode 100644 plenum/test/node_request/message_request/helper.py create mode 100644 plenum/test/node_request/message_request/test_node_requests_missing_preprepare.py create mode 100644 plenum/test/node_request/message_request/test_preprepare_request.py create mode 100644 plenum/test/node_request/message_request/test_requested_preprepare_handling.py create mode 100644 plenum/test/node_request/message_request/test_valid_message_request.py create mode 100644 plenum/test/node_request/test_commit/test_commits_recvd_first.py create mode 100644 plenum/test/node_request/test_commit/test_commits_without_prepares.py create mode 100644 plenum/test/node_request/test_commit/test_num_commit_with_2_of_6_faulty.py create mode 100644 plenum/test/node_request/test_different_ledger_request_interleave.py create mode 100644 plenum/test/node_request/test_discard_3pc_for_ordered.py create mode 100644 plenum/test/node_request/test_prepare/test_num_prepare_with_2_of_6_faulty.py create mode 100644 plenum/test/node_request/test_propagate/test_node_lacks_finalised_requests.py create mode 100644 plenum/test/node_request/test_quorum_disconnected.py create mode 100644 plenum/test/node_request/test_quorum_faulty.py create mode 100644 plenum/test/node_request/test_timestamp/__init__.py create mode 100644 plenum/test/node_request/test_timestamp/conftest.py create mode 100644 plenum/test/node_request/test_timestamp/helper.py create mode 100644 plenum/test/node_request/test_timestamp/test_3pc_timestamp.py create mode 100644 plenum/test/node_request/test_timestamp/test_clock_disruption.py create mode 100644 plenum/test/node_request/test_timestamp/test_timestamp_new_node.py create mode 100644 plenum/test/node_request/test_timestamp/test_timestamp_post_view_change.py create mode 100644 plenum/test/pool_transactions/get_txn_request.py create mode 100644 plenum/test/primary_selection/conftest.py create mode 100644 plenum/test/primary_selection/helper.py create mode 100644 plenum/test/primary_selection/test_catchup_after_view_change.py create mode 100644 plenum/test/primary_selection/test_catchup_multiple_rounds.py create mode 100644 plenum/test/primary_selection/test_catchup_needed_check.py create mode 100644 plenum/test/primary_selection/test_primary_selection_pool_txn.py create mode 100644 plenum/test/primary_selection/test_primary_selection_routes.py create mode 100644 plenum/test/primary_selection/test_primary_selector.py create mode 100644 plenum/test/replica/test_buffers_cleaning.py create mode 100644 plenum/test/test_current_state_propagation.py create mode 100644 plenum/test/test_node_genesis.py create mode 100644 plenum/test/test_performance.py create mode 100644 plenum/test/test_state_regenerated_from_ledger.py create mode 100644 plenum/test/view_change/slow_nodes/__init__.py create mode 100644 plenum/test/view_change/slow_nodes/conftest.py create mode 100644 plenum/test/view_change/slow_nodes/test_view_change_2_of_4_nodes_with_new_primary.py create mode 100644 plenum/test/view_change/slow_nodes/test_view_change_2_of_4_nodes_with_non_primary.py create mode 100644 plenum/test/view_change/slow_nodes/test_view_change_2_of_4_nodes_with_old_and_new_primary.py create mode 100644 plenum/test/view_change/slow_nodes/test_view_change_2_of_4_nodes_with_old_primary.py create mode 100644 plenum/test/view_change/slow_nodes/test_view_change_all_nodes.py create mode 100644 plenum/test/view_change/slow_nodes/test_view_change_all_nodes_random_delay.py create mode 100644 plenum/test/view_change/slow_nodes/test_view_change_complex.py create mode 100644 plenum/test/view_change/test_3pc_msgs_during_view_change.py delete mode 100644 plenum/test/view_change/test_elections_after_view_change.py create mode 100644 plenum/test/view_change/test_node_detecting_lag_from_view_change_messages.py create mode 100644 plenum/test/view_change/test_pp_seq_no_starts_from_1.py create mode 100644 plenum/test/view_change/test_view_change_done_delayed.py create mode 100644 plenum/test/view_change/test_view_change_max_catchup_rounds.py create mode 100644 plenum/test/view_change/test_view_change_on_master_degraded.py create mode 100644 plenum/test/view_change/test_view_change_timeout.py create mode 100644 plenum/test/wallet/test_wallet_storage_helper.py delete mode 100644 runner.py create mode 100644 state/__init__.py create mode 100644 state/db/__init__.py create mode 100644 state/db/db.py create mode 100644 state/db/persistent_db.py create mode 100644 state/db/refcount_db.py create mode 100644 state/kv/__init__.py create mode 100644 state/kv/kv_in_memory.py create mode 100644 state/kv/kv_store.py create mode 100644 state/kv/kv_store_leveldb.py create mode 100644 state/kv/kv_store_rocksdb.py create mode 100644 state/pruning_state.py create mode 100644 state/state.py create mode 100644 state/test/__init__.py create mode 100644 state/test/bench.py create mode 100644 state/test/conftest.py create mode 100644 state/test/kv/__init__.py create mode 100644 state/test/kv/test_kv_leveldb.py create mode 100644 state/test/kv/test_kv_memory.py create mode 100644 state/test/test_state_in_memory.py create mode 100644 state/test/test_state_leveldb.py create mode 100644 state/test/trie/__init__.py create mode 100644 state/test/trie/test_trie_values_at_different_roots.py create mode 100644 state/trie/__init__.py create mode 100644 state/trie/pruning_trie.py create mode 100644 state/util/__init__.py create mode 100644 state/util/fast_rlp.py create mode 100644 state/util/utils.py create mode 100644 stp_core/__init__.py create mode 100644 stp_core/common/__init__.py create mode 100644 stp_core/common/config/__init__.py create mode 100644 stp_core/common/config/util.py create mode 100644 stp_core/common/error.py create mode 100644 stp_core/common/log.py create mode 100644 stp_core/common/logging/TimeAndSizeRotatingFileHandler.py create mode 100644 stp_core/common/logging/__init__.py create mode 100644 stp_core/common/logging/handlers.py create mode 100644 stp_core/common/temp_file_util.py create mode 100644 stp_core/common/util.py create mode 100644 stp_core/config.py create mode 100644 stp_core/crypto/__init__.py create mode 100644 stp_core/crypto/encoding.py create mode 100644 stp_core/crypto/nacl_wrappers.py create mode 100644 stp_core/crypto/signer.py create mode 100644 stp_core/crypto/util.py create mode 100644 stp_core/error_codes.py create mode 100644 stp_core/loop/__init__.py create mode 100644 stp_core/loop/eventually.py create mode 100644 stp_core/loop/exceptions.py create mode 100644 stp_core/loop/looper.py create mode 100644 stp_core/loop/motor.py create mode 100644 stp_core/loop/startable.py create mode 100644 stp_core/network/__init__.py create mode 100644 stp_core/network/auth_mode.py create mode 100644 stp_core/network/exceptions.py create mode 100644 stp_core/network/keep_in_touch.py create mode 100644 stp_core/network/network_interface.py create mode 100644 stp_core/network/port_dispenser.py create mode 100644 stp_core/network/util.py create mode 100644 stp_core/ratchet.py create mode 100644 stp_core/test/__init__.py create mode 100644 stp_core/test/conftest.py create mode 100644 stp_core/test/helper.py create mode 100644 stp_core/types.py create mode 100644 stp_raet/__init__.py create mode 100644 stp_raet/rstack.py create mode 100644 stp_raet/test/__init__.py create mode 100644 stp_raet/test/conftest.py create mode 100644 stp_raet/test/helper.py create mode 100644 stp_raet/test/stack_message_loss.py create mode 100644 stp_raet/test/test_communication.py create mode 100644 stp_raet/test/test_kitrstack.py create mode 100644 stp_raet/test/test_raet_comm_with_one_key.py create mode 100644 stp_raet/test/test_rstack.py create mode 100644 stp_raet/util.py create mode 100644 stp_zmq/__init__.py create mode 100644 stp_zmq/authenticator.py create mode 100644 stp_zmq/kit_zstack.py create mode 100644 stp_zmq/remote.py create mode 100644 stp_zmq/simple_zstack.py create mode 100644 stp_zmq/test/__init__.py create mode 100644 stp_zmq/test/conftest.py create mode 100644 stp_zmq/test/helper.py create mode 100644 stp_zmq/test/test_heartbeats.py create mode 100644 stp_zmq/test/test_kitzstack.py create mode 100644 stp_zmq/test/test_large_messages.py create mode 100644 stp_zmq/test/test_quotas.py create mode 100644 stp_zmq/test/test_reconnect.py create mode 100644 stp_zmq/test/test_utils.py create mode 100644 stp_zmq/test/test_zstack.py create mode 100644 stp_zmq/test/test_zstack_communication.py create mode 100644 stp_zmq/util.py create mode 100644 stp_zmq/zstack.py create mode 100644 terminology.md diff --git a/.gitignore b/.gitignore index 81c1cf8e8a..65e635c13e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,16 +2,21 @@ __pycache__/ */__pycache__ *.py[cod] +*$py.class # C extensions *.so # Distribution / packaging +.Python bin/ build/ develop-eggs/ dist/ +downloads/ eggs/ +.eggs/ +env/ lib/ lib64/ parts/ @@ -22,20 +27,34 @@ var/ *.egg *.eggs +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + # Installer logs pip-log.txt pip-delete-this-directory.txt pip-selfcheck.json # testing / coverage reports +htmlcov/ .tox/ .coverage +.coverage.* .cache nosetests.xml coverage.xml +*,cover +.hypothesis/ # Translations *.mo +*.pot + +# Django +*.log # Sphinx documentation docs/_build/ @@ -71,3 +90,15 @@ docs/source/api_docs/ # log files *.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Ipython Notebook +.ipynb_checkpoints + +# Hidden files +.* diff --git a/Jenkinsfile b/Jenkinsfile index 2a47d9cc9a..849b480cc0 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -2,9 +2,9 @@ @Library('SovrinHelpers') _ -def name = 'plenum' +def name = 'indy-plenum' -def testUbuntu = { +def plenumTestUbuntu = { try { echo 'Ubuntu Test: Checkout csm' checkout scm @@ -17,7 +17,7 @@ def testUbuntu = { testHelpers.install() echo 'Ubuntu Test: Test' - testHelpers.testRunner(resFile: "test-result.${NODE_NAME}.txt") + testHelpers.testRunner([resFile: "test-result-plenum.${NODE_NAME}.txt", testDir: 'plenum']) } } finally { @@ -26,7 +26,74 @@ def testUbuntu = { } } -def testWindows = { +def ledgerTestUbuntu = { + try { + echo 'Ubuntu Test: Checkout csm' + checkout scm + + echo 'Ubuntu Test: Build docker image' + def testEnv = dockerHelpers.build(name) + + testEnv.inside { + echo 'Ubuntu Test: Install dependencies' + testHelpers.install() + + echo 'Ubuntu Test: Test' + testHelpers.testJUnit([testDir: 'ledger', resFile: "test-result-legder.${NODE_NAME}.xml"]) + } + } + finally { + echo 'Ubuntu Test: Cleanup' + step([$class: 'WsCleanup']) + } +} + +def stateTestUbuntu = { + try { + echo 'Ubuntu Test: Checkout csm' + checkout scm + + echo 'Ubuntu Test: Build docker image' + def testEnv = dockerHelpers.build(name) + + testEnv.inside { + echo 'Ubuntu Test: Install dependencies' + testHelpers.install() + + echo 'Ubuntu Test: Test' + testHelpers.testJUnit([testDir: 'state', resFile: "test-result-state.${NODE_NAME}.xml"]) + } + } + finally { + echo 'Ubuntu Test: Cleanup' + step([$class: 'WsCleanup']) + } +} + +def stpTestUbuntu = { + try { + echo 'Ubuntu Test: Checkout csm' + checkout scm + + echo 'Ubuntu Test: Build docker image' + def testEnv = dockerHelpers.build(name) + + testEnv.inside { + echo 'Ubuntu Test: Install dependencies' + testHelpers.install() + + echo 'Ubuntu Test: Test' + testHelpers.testJUnit([testDir: 'stp_raet', resFile: "test-result-stp-raet.${NODE_NAME}.xml"]) + testHelpers.testJUnit([testDir: 'stp_zmq', resFile: "test-result-stp-zmq.${NODE_NAME}.xml"]) + } + } + finally { + echo 'Ubuntu Test: Cleanup' + step([$class: 'WsCleanup']) + } +} + +def plenumTestWindows = { echo 'TODO: Implement me' /* win2016 for now (03-23-2017) is not supported by Docker for Windows @@ -52,7 +119,37 @@ def testWindows = { //} } -def testWindowsNoDocker = { +def ledgerTestWindows = { + try { + echo 'Windows Test: Checkout csm' + checkout scm + + echo 'Windows Test: Build docker image' + dockerHelpers.buildAndRunWindows(name, testHelpers.installDepsWindowsCommands() + testHelpers.testJunitWindowsCommands()) + junit 'test-result.xml' + } + finally { + echo 'Windows Test: Cleanup' + step([$class: 'WsCleanup']) + } +} + +def stateTestWindows = { + try { + echo 'Windows Test: Checkout csm' + checkout scm + + echo 'Windows Test: Build docker image' + dockerHelpers.buildAndRunWindows(name, testHelpers.installDepsWindowsCommands() + testHelpers.testJunitWindowsCommands()) + junit 'test-result.xml' + } + finally { + echo 'Windows Test: Cleanup' + step([$class: 'WsCleanup']) + } +} + +def plenumTestWindowsNoDocker = { try { echo 'Windows No Docker Test: Checkout csm' checkout scm @@ -71,5 +168,43 @@ def testWindowsNoDocker = { } } -testAndPublish(name, [ubuntu: testUbuntu]) +def ledgerTestWindowsNoDocker = { + try { + echo 'Windows No Docker Test: Checkout csm' + checkout scm + + testHelpers.createVirtualEnvAndExecute({ python, pip -> + echo 'Windows No Docker Test: Install dependencies' + testHelpers.installDepsBat(python, pip) + + echo 'Windows No Docker Test: Test' + testHelpers.testJunitBat(python, pip) + }) + } + finally { + echo 'Windows No Docker Test: Cleanup' + step([$class: 'WsCleanup']) + } +} + +def stateTestWindowsNoDocker = { + try { + echo 'Windows No Docker Test: Checkout csm' + checkout scm + + testHelpers.createVirtualEnvAndExecute({ python, pip -> + echo 'Windows No Docker Test: Install dependencies' + testHelpers.installDepsBat(python, pip) + + echo 'Windows No Docker Test: Test' + testHelpers.testJunitBat(python, pip) + }) + } + finally { + echo 'Windows No Docker Test: Cleanup' + step([$class: 'WsCleanup']) + } +} +def options = new TestAndPublishOptions() +testAndPublish(name, [ubuntu: [plenum: plenumTestUbuntu, ledger: ledgerTestUbuntu, state: stateTestUbuntu, stp: stpTestUbuntu]], true, options) diff --git a/README.md b/README.md index 1911995fcf..331c071dfc 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,14 @@ # Plenum Byzantine Fault Tolerant Protocol -[![Build Status](https://jenkins.evernym.com/buildStatus/icon?job=Plenum/stable)](https://jenkins.evernym.com/job/Plenum/job/stable/) +[![Build Status](https://jenkins.evernym.com/buildStatus/icon?job=Plenum/master)](https://jenkins.evernym.com/job/Plenum/job/master/) + +Plenum is the heart of the distributed ledger technology inside Hyperledger +Indy. As such, it provides features somewhat similar in scope to those +found in Fabric. However, it is special-purposed for use in an identity +system, whereas Fabric is general purpose. + +You can log bugs against Plenum in [Hyperledger's Jira](https://jira.hyperledger.org); use +project "INDY". Plenum makes extensive use of coroutines and the async/await keywords in Python, and as such, requires Python version 3.5.0 or later. Plenum also @@ -158,3 +166,26 @@ cliNodeReg = OrderedDict([ ('DeltaC', (('127.0.0.1', 9708), '3af81a541097e3e042cacbe8761c0f9e54326049e1ceda38017c95c432312f6f', '8b112025d525c47e9df81a6de2966e1b4ee1ac239766e769f19d831175a04264')) ]) ``` + +# Immutable Ledger used in Plenum. + +This codebase provides a simple, python-based, immutable, ordered log of transactions +backed by a merkle tree. This is an efficient way to generate verifiable proofs of presence +and data consistency. + +The scope of concerns here is fairly narrow; it is not a full-blown +distributed ledger technology like Fabric, but simply the persistence +mechanism that Plenum needs. The repo is intended to be collapsed into the indy-node codebase +over time; hence there is no wiki, no documentation, and no intention to +use github issues to track bugs. + +You can log issues against this codebase in [Hyperledger's Jira](https://jira.hyperledger.org). + +Join us on [Hyperledger's Rocket.Chat](http://chat.hyperledger.org), on the #indy +channel, to discuss. + +# state +Plenum's state storage using python 3 version of Ethereum's Patricia Trie + +# stp +Secure Transport Protocol \ No newline at end of file diff --git a/ci/ubuntu.dockerfile b/ci/ubuntu.dockerfile index 52dfbc2da9..e2eca57f7a 100644 --- a/ci/ubuntu.dockerfile +++ b/ci/ubuntu.dockerfile @@ -23,4 +23,26 @@ USER root RUN ln -sf /home/sovrin/test/bin/python /usr/local/bin/python RUN ln -sf /home/sovrin/test/bin/pip /usr/local/bin/pip USER sovrin +# TODO: Automate dependency collection +RUN pip install jsonpickle \ + ujson \ + prompt_toolkit==0.57 \ + pygments \ + crypto==1.4.1 \ + rlp \ + sha3 \ + leveldb \ + ioflo==1.5.4 \ + semver \ + base58 \ + orderedset \ + sortedcontainers==1.5.7 \ + psutil \ + pip \ + portalocker==0.5.7 \ + pyzmq \ + raet \ + ioflo==1.5.4 \ + psutil \ + intervaltree WORKDIR /home/sovrin diff --git a/ledger/__init__.py b/ledger/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ledger/compact_merkle_tree.py b/ledger/compact_merkle_tree.py new file mode 100644 index 0000000000..8f798d8ee8 --- /dev/null +++ b/ledger/compact_merkle_tree.py @@ -0,0 +1,284 @@ +import functools +from binascii import hexlify +from typing import List, Tuple, Sequence + +import ledger.merkle_tree as merkle_tree +from ledger.stores.hash_store import HashStore +from ledger.stores.memory_hash_store import MemoryHashStore +from ledger.tree_hasher import TreeHasher +from ledger.util import count_bits_set, lowest_bit_set +from ledger.util import ConsistencyVerificationFailed + + +class CompactMerkleTree(merkle_tree.MerkleTree): + """Compact representation of a Merkle Tree that permits only extension. + + Attributes: + tree_size: Number of leaves in this tree. + hashes: That of the full (i.e. size 2^k) subtrees that form this tree, + sorted in descending order of size. + """ + + def __init__(self, hasher=TreeHasher(), tree_size=0, hashes=(), + hashStore=None): + + # These two queues should be written to two simple position-accessible + # arrays (files, database tables, etc.) + self.hashStore = hashStore or MemoryHashStore() # type: HashStore + self.__hasher = hasher + self._update(tree_size, hashes) + + def _update(self, tree_size: int, hashes: Sequence[bytes]): + bits_set = count_bits_set(tree_size) + num_hashes = len(hashes) + if num_hashes != bits_set: + msgfmt = "number of hashes != bits set in tree_size: %s vs %s" + raise ValueError(msgfmt % (num_hashes, bits_set)) + self.__tree_size = tree_size + self.__hashes = tuple(hashes) + # height of the smallest subtree, or 0 if none exists (empty tree) + self.__mintree_height = lowest_bit_set(tree_size) + self.__root_hash = None + + def load(self, other: merkle_tree.MerkleTree): + """Load this tree from a dumb data object for serialisation. + + The object must have attributes tree_size:int and hashes:list. + """ + self._update(other.tree_size, other.hashes) + + def save(self, other: merkle_tree.MerkleTree): + """Save this tree into a dumb data object for serialisation. + + The object must have attributes tree_size:int and hashes:list. + """ + other.__tree_size = self.__tree_size + other.__hashes = self.__hashes + + def __copy__(self): + return self.__class__(self.__hasher, self.__tree_size, self.__hashes) + + def __repr__(self): + return "%s(%r, %r, %r)" % ( + self.__class__.__name__, + self.__hasher, self.__tree_size, self.__hashes) + + def __len__(self): + return self.__tree_size + + @property + def tree_size(self) -> int: + return self.__tree_size + + @property + def hashes(self) -> Tuple[bytes]: + return self.__hashes + + @property + def root_hash(self): + """Returns the root hash of this tree. (Only re-computed on change.)""" + if self.__root_hash is None: + self.__root_hash = ( + self.__hasher._hash_fold(self.__hashes) + if self.__hashes else self.__hasher.hash_empty()) + return self.__root_hash + + @property + def root_hash_hex(self): + """Returns the root hash of this tree. (Only re-computed on change.)""" + return hexlify(self.root_hash) + + def _push_subtree(self, leaves: List[bytes]): + """Extend with a full subtree <= the current minimum subtree. + + The leaves must form a full subtree, i.e. of size 2^k for some k. If + there is a minimum subtree (i.e. __mintree_height > 0), then the input + subtree must be smaller or of equal size to the minimum subtree. + + If the subtree is smaller (or no such minimum exists, in an empty tree), + we can simply append its hash to self.hashes, since this maintains the + invariant property of being sorted in descending size order. + + If the subtree is of equal size, we are in a similar situation to an + addition carry. We handle it by combining the two subtrees into a larger + subtree (of size 2^(k+1)), then recursively trying to add this new + subtree back into the tree. + + Any collection of leaves larger than the minimum subtree must undergo + additional partition to conform with the structure of a merkle tree, + which is a more complex operation, performed by extend(). + """ + size = len(leaves) + if count_bits_set(size) != 1: + raise ValueError("invalid subtree with size != 2^k: %s" % size) + # in general we want the highest bit, but here it's also the lowest bit + # so just reuse that code instead of writing a new highest_bit_set() + subtree_h, mintree_h = lowest_bit_set(size), self.__mintree_height + if mintree_h > 0 and subtree_h > mintree_h: + raise ValueError("subtree %s > current smallest subtree %s" % ( + subtree_h, mintree_h)) + root_hash, hashes = self.__hasher._hash_full(leaves, 0, size) + assert hashes == (root_hash,) + + if self.hashStore: + for h in hashes: + self.hashStore.writeLeaf(h) + + new_node_hashes = self.__push_subtree_hash(subtree_h, root_hash) + + nodes = [(self.tree_size, height, h) for h, height in new_node_hashes] + if self.hashStore: + for node in nodes: + self.hashStore.writeNode(node) + + def __push_subtree_hash(self, subtree_h: int, sub_hash: bytes): + size, mintree_h = 1 << (subtree_h - 1), self.__mintree_height + if subtree_h < mintree_h or mintree_h == 0: + self._update(self.tree_size + size, self.hashes + (sub_hash,)) + return [] + else: + assert subtree_h == mintree_h + # addition carry - rewind the tree and re-try with bigger subtree + prev_hash = self.hashes[-1] + self._update(self.tree_size - size, self.hashes[:-1]) + new_mintree_h = self.__mintree_height + assert mintree_h < new_mintree_h or new_mintree_h == 0 + next_hash = self.__hasher.hash_children(prev_hash, sub_hash) + + return [(next_hash, subtree_h)] + self.__push_subtree_hash( + subtree_h + 1, next_hash) + + def append(self, new_leaf: bytes) -> List[bytes]: + """Append a new leaf onto the end of this tree and return the + audit path""" + auditPath = list(reversed(self.__hashes)) + self._push_subtree([new_leaf]) + return auditPath + + def extend(self, new_leaves: List[bytes]): + """Extend this tree with new_leaves on the end. + + The algorithm works by using _push_subtree() as a primitive, calling + it with the maximum number of allowed leaves until we can add the + remaining leaves as a valid entire (non-full) subtree in one go. + """ + size = len(new_leaves) + final_size = self.tree_size + size + idx = 0 + while True: + # keep pushing subtrees until mintree_size > remaining + max_h = self.__mintree_height + max_size = 1 << (max_h - 1) if max_h > 0 else 0 + if max_h > 0 and size - idx >= max_size: + self._push_subtree(new_leaves[idx:idx+max_size]) + idx += max_size + else: + break + # fill in rest of tree in one go, now that we can + if idx < size: + root_hash, hashes = self.__hasher._hash_full(new_leaves, idx, size) + self._update(final_size, self.hashes + hashes) + assert self.tree_size == final_size + + def extended(self, new_leaves: List[bytes]): + """Returns a new tree equal to this tree extended with new_leaves.""" + new_tree = self.__copy__() + new_tree.extend(new_leaves) + return new_tree + + def merkle_tree_hash_hex(self, start: int, end: int): + mth = self.merkle_tree_hash(start, end) + return hexlify(mth) + + @functools.lru_cache(maxsize=256) + def merkle_tree_hash(self, start: int, end: int): + if not end > start: + raise ValueError("end must be greater than start") + if (end - start) == 1: + return self.hashStore.readLeaf(end) + leafs, nodes = self.hashStore.getPath(end, start) + leafHash = self.hashStore.readLeaf(end) + hashes = [leafHash, ] + for h in leafs: + hashes.append(self.hashStore.readLeaf(h)) + for h in nodes: + hashes.append(self.hashStore.readNode(h)) + foldedHash = self.__hasher._hash_fold(hashes[::-1]) + return foldedHash + + def consistency_proof(self, first: int, second: int): + return [self.merkle_tree_hash(a, b) for a, b in + self._subproof(first, 0, second, True)] + + def inclusion_proof(self, start, end): + return [self.merkle_tree_hash(a, b) for a, b in self._path(start, 0, end)] + + def _subproof(self, m, start_n: int, end_n: int, b: int): + n = end_n - start_n + if m == n: + if b: + return [] + else: + return [(start_n, end_n)] + else: + k = 1 << (len(bin(n - 1)) - 3) + if m <= k: + return self._subproof(m, start_n, start_n + k, b) + [ + (start_n + k, end_n)] + else: + return self._subproof(m - k, start_n + k, end_n, False) + [ + (start_n, start_n + k)] + + def _path(self, m, start_n: int, end_n: int): + n = end_n - start_n + if n == 1: + return [] + else: + # `k` is the largest power of 2 less than `n` + k = 1 << (len(bin(n - 1)) - 3) + if m < k: + return self._path(m, start_n, start_n + k) + [ + (start_n + k, end_n)] + else: + return self._path(m - k, start_n + k, end_n) + [ + (start_n, start_n + k)] + + def get_tree_head(self, seq: int=None): + if seq is None: + seq = self.tree_size + if seq > self.tree_size: + raise IndexError + return { + 'tree_size': seq, + 'sha256_root_hash': self.merkle_tree_hash(0, seq) if seq else None, + } + + @property + def leafCount(self) -> int: + return self.hashStore.leafCount + + @property + def nodeCount(self) -> int: + return self.hashStore.nodeCount + + @staticmethod + def get_expected_node_count(leaf_count): + """ + The number of nodes is the number of full subtrees present + """ + count = 0 + while leaf_count > 1: + leaf_count //= 2 + count += leaf_count + return count + + def verify_consistency(self, expected_leaf_count) -> bool: + """ + Check that the tree has same leaf count as expected and the + number of nodes are also as expected + """ + if expected_leaf_count != self.leafCount: + raise ConsistencyVerificationFailed() + if self.get_expected_node_count(self.leafCount) != self.nodeCount: + raise ConsistencyVerificationFailed() + return True diff --git a/ledger/error.py b/ledger/error.py new file mode 100644 index 0000000000..f22043ba94 --- /dev/null +++ b/ledger/error.py @@ -0,0 +1,156 @@ +"""Status codes are bad, but True/False is not expressive enough. + +Consider a cryptographic signature verification method verify(data, sig) that +returns 1 for valid signatures, 0 for invalid signatures, and -1 to signal some +specific error. This can easily lead to insecure usage such as +if verify(data, sig): + # do stuff on success + +Or, here's another example, borrowed from real code: +r, s = asn1_decode(sig) # raises ASN1Error +return verify_sig(data, r, s) # returns True/False + +A caller may obviously be interested in distinguishing a decoding error from a +signature computation error - but why is a computation error False while a +decoding error is an exception? What other exceptions might this code raise? +This is a nightmare for the caller to handle. + +Therefore, methods in the crypto package that verify a property return True +when verification succeeds and raise an exception on any error. This minimises +the risk of uncaught errors, allows to provide information for callers that care +about the specific failure reason, and makes failure handling easy for callers +that do not care: + +try: + verify(myargs) +except MyError: + # handle specific error here + return +except VerifyError: + # verify failed, we don't care why + return +# do more stuff on success here + +Returning True is strictly speaking not needed but simplifies testing. +We provide a defensive returns_true_or_raises wrapper for ensuring this +behaviour: callers of methods decorated with @returns_true_or_raises can be +certain that the _only_ value the method returns is True - it never returns +None, or False, or [], or anything else. +""" + +import functools + + +class Error(Exception): + """Exceptions raised by the crypto subpackage.""" + pass + + +class UnsupportedAlgorithmError(Error): + """An algorithm is not implemented or supported.""" + pass + + +class VerifyError(Error): + """Some expected property of the input cannot be verified. + + The property either verifiably does not hold, or cannot be conclusively + verified. Domain-specific verification errors inherit from this class. + """ + pass + + +class ConsistencyError(VerifyError): + """There is a (cryptographic) inconsistency in the data.""" + pass + + +class ProofError(VerifyError): + """A cryptographic proof is not valid. + + This error does not necessarily indicate that the sought property does not + hold but rather that the given data is insufficient for verifying the + desired property. + """ + pass + + +# TODO(ekasper): TBD if this hierarchy is appropriate. +class EncodingError(Error): + """Encoding/decoding error. + + Inputs cannot be serialized, or serialized data cannot be parsed. + """ + pass + + +class ASN1Error(EncodingError): + """An ASN1 object cannot be encoded or decoded.""" + pass + + +class ASN1TagError(ASN1Error): + """ASN1 tag mismatch.""" + pass + + +class UnknownASN1TypeError(ASN1Error): + """An OID does not map to a known ASN.1 type.""" + pass + + +class ASN1IllegalCharacter(ASN1Error): + """String contains illegal character.""" + def __init__(self, message, string, index, *args): + self.message = message + self.string = string + self.index = index + super(ASN1Error, self).__init__(message, *args) + + def __str__(self): + return "%s (string: %s, character: %s, index: %d)" % (self.message, + self.string, + self.string[ + self.index], + self.index) + + +class IncompleteChainError(VerifyError): + """A certificate is missing from the chain""" + pass + + +class SignatureError(VerifyError): + """A public-key signature does not verify.""" + pass + + +class UnsupportedVersionError(Error): + """The version of the data structure is unknown.""" + pass + + +class GeneralMissingError(Error): + """Some required attribute is missing""" + pass + + +def returns_true_or_raises(f): + """A safety net. + + Decorator for functions that are only allowed to return True or raise + an exception. + + Args: + f: A function whose only expected return value is True. + + Returns: + A wrapped functions whose guaranteed only return value is True. + """ + @functools.wraps(f) + def wrapped(*args, **kwargs): + ret = f(*args, **kwargs) + if ret is not True: + raise RuntimeError("Unexpected return value %r" % ret) + return True + return wrapped diff --git a/ledger/immutable_store.py b/ledger/immutable_store.py new file mode 100644 index 0000000000..a061e0fac7 --- /dev/null +++ b/ledger/immutable_store.py @@ -0,0 +1,22 @@ +class ImmutableStore: + """ + Interface for immutable stores. + An immutable store is any storage system (database, flatfile, in-memory, + etc.). It stores the transaction data and the relevant info from the + Merkle Tree. + """ + + def start(self, loop): + raise NotImplementedError() + + def stop(self): + raise NotImplementedError() + + def append(self, reply): + raise NotImplementedError() + + def get(self, identifier: str, reqId: int): + raise NotImplementedError() + + def size(self) -> int: + raise NotImplementedError() diff --git a/ledger/ledger.py b/ledger/ledger.py new file mode 100644 index 0000000000..8e5b74852d --- /dev/null +++ b/ledger/ledger.py @@ -0,0 +1,221 @@ +import base64 +import logging +import time + +from ledger.compact_merkle_tree import CompactMerkleTree +from ledger.tree_hasher import TreeHasher +from ledger.merkle_tree import MerkleTree +from ledger.serializers.mapping_serializer import MappingSerializer +from ledger.serializers.json_serializer import JsonSerializer +from ledger.stores.file_store import FileStore +from ledger.stores.text_file_store import TextFileStore +from ledger.immutable_store import ImmutableStore +from ledger.util import F, ConsistencyVerificationFailed + + +class Ledger(ImmutableStore): + + @staticmethod + def _defaultStore(dataDir, + logName, + ensureDurability, + defaultFile) -> FileStore: + + return TextFileStore(dataDir, + logName, + isLineNoKey=True, + storeContentHash=False, + ensureDurability=ensureDurability, + defaultFile=defaultFile) + + def __init__(self, + tree: MerkleTree, + dataDir: str, + serializer: MappingSerializer=None, + fileName: str=None, + ensureDurability: bool=True, + transactionLogStore: FileStore=None, + defaultFile=None): + """ + :param tree: an implementation of MerkleTree + :param dataDir: the directory where the transaction log is stored + :param serializer: an object that can serialize the data before hashing + it and storing it in the MerkleTree + :param fileName: the name of the transaction log file + :param defaultFile: file or dir to use for initialization of transaction log store + """ + assert not transactionLogStore or not defaultFile + self.defaultFile = defaultFile + + self.dataDir = dataDir + self.tree = tree + self.leafSerializer = serializer or \ + JsonSerializer() # type: MappingSerializer + self.hasher = TreeHasher() + self._transactionLog = None # type: FileStore + self._transactionLogName = fileName or "transactions" + self.ensureDurability = ensureDurability + self._customTransactionLogStore = transactionLogStore + self.start() + self.seqNo = 0 + self.recoverTree() + + def recoverTree(self): + # TODO: Should probably have 2 classes of hash store, + # persistent and non persistent + + # TODO: this definitely should be done in a more generic way: + if not isinstance(self.tree, CompactMerkleTree): + logging.error("Do not know how to recover {}".format(self.tree)) + raise TypeError("Merkle tree type {} is not supported" + .format(type(self.tree))) + start = time.perf_counter() + if not self.tree.hashStore \ + or not self.tree.hashStore.is_persistent \ + or self.tree.leafCount == 0: + logging.debug("Recovering tree from transaction log") + self.recoverTreeFromTxnLog() + else: + try: + logging.debug("Recovering tree from hash store of size {}". + format(self.tree.leafCount)) + self.recoverTreeFromHashStore() + except ConsistencyVerificationFailed: + logging.error("Consistency verification of merkle tree " + "from hash store failed, " + "falling back to transaction log") + self.recoverTreeFromTxnLog() + + end = time.perf_counter() + t = end - start + logging.debug("Recovered tree in {} seconds".format(t)) + + def recoverTreeFromTxnLog(self): + # TODO: in this and some other lines specific fields of + # CompactMerkleTree are used, but type of self.tree is MerkleTree + # This must be fixed! + self.tree.hashStore.reset() + for key, entry in self._transactionLog.iterator(): + if isinstance(entry, str): + entry = entry.encode() + self._addToTreeSerialized(entry) + + def recoverTreeFromHashStore(self): + treeSize = self.tree.leafCount + self.seqNo = treeSize + hashes = list(reversed(self.tree.inclusion_proof(treeSize, + treeSize + 1))) + self.tree._update(self.tree.leafCount, hashes) + self.tree.verify_consistency(self._transactionLog.numKeys) + + def add(self, leaf): + self._addToStore(leaf) + merkleInfo = self._addToTree(leaf) + return merkleInfo + + def _addToTree(self, leafData): + serializedLeafData = self.serializeLeaf(leafData) + return self._addToTreeSerialized(serializedLeafData) + + def _addToTreeSerialized(self, serializedLeafData): + audit_path = self.tree.append(serializedLeafData) + self.seqNo += 1 + return self._build_merkle_proof(audit_path) + + def _build_merkle_proof(self, audit_path): + return { + F.seqNo.name: self.seqNo, + F.rootHash.name: self.hashToStr(self.tree.root_hash), + F.auditPath.name: [self.hashToStr(h) for h in audit_path] + } + + def _addToStore(self, data): + key = str(self.seqNo + 1) + self._transactionLog.put(key=key, + value=self.leafSerializer.serialize( + data, toBytes=False)) + + def append(self, txn): + return self.add(txn) + + def get(self, **kwargs): + for seqNo, value in self._transactionLog.iterator(): + data = self.leafSerializer.deserialize(value) + # If `kwargs` is a subset of `data` + if set(kwargs.values()) == {data.get(k) for k in kwargs.keys()}: + data[F.seqNo.name] = int(seqNo) + return data + + def getBySeqNo(self, seqNo): + key = str(seqNo) + value = self._transactionLog.get(key) + if value: + data = self.leafSerializer.deserialize(value) + data[F.seqNo.name] = int(seqNo) + return data + else: + return value + + def __getitem__(self, seqNo): + return self.getBySeqNo(seqNo) + + def lastCount(self): + key = self._transactionLog.lastKey + return 0 if key is None else int(key) + + def serializeLeaf(self, leafData): + return self.leafSerializer.serialize(leafData) + + @property + def size(self) -> int: + return self.tree.tree_size + + def __len__(self): + return self.size + + @property + def root_hash(self) -> str: + return self.hashToStr(self.tree.root_hash) + + def merkleInfo(self, seqNo): + seqNo = int(seqNo) + assert seqNo > 0 + rootHash = self.tree.merkle_tree_hash(0, seqNo) + auditPath = self.tree.inclusion_proof(seqNo-1, seqNo) + return { + F.rootHash.name: self.hashToStr(rootHash), + F.auditPath.name: [self.hashToStr(h) for h in auditPath] + } + + def start(self, loop=None, ensureDurability=True): + if self._transactionLog and not self._transactionLog.closed: + logging.debug("Ledger already started.") + else: + logging.debug("Starting ledger...") + ensureDurability = ensureDurability or self.ensureDurability + self._transactionLog = \ + self._customTransactionLogStore or \ + self._defaultStore(self.dataDir, + self._transactionLogName, + ensureDurability, + self.defaultFile) + self._transactionLog.appendNewLineIfReq() + + def stop(self): + self._transactionLog.close() + + def reset(self): + # THIS IS A DESTRUCTIVE ACTION + self._transactionLog.reset() + + def getAllTxn(self, frm: int=None, to: int=None): + yield from ((seq_no, self.leafSerializer.deserialize(txn)) + for seq_no, txn in self._transactionLog.get_range(frm, to)) + + @staticmethod + def hashToStr(h): + return base64.b64encode(h).decode() + + @staticmethod + def strToHash(s): + return base64.b64decode(s).encode() diff --git a/ledger/merkle_tree.py b/ledger/merkle_tree.py new file mode 100644 index 0000000000..7279191481 --- /dev/null +++ b/ledger/merkle_tree.py @@ -0,0 +1,74 @@ +from abc import abstractmethod +from typing import List, Tuple + + +class MerkleTree: + """ + Interface to be implemented by all Merkle Trees. + """ + + @abstractmethod + def append(self, new_leaf): + """ + """ + + @abstractmethod + def merkle_tree_hash(self, start, end): + """ + """ + + @abstractmethod + def consistency_proof(self, first, second): + """ + """ + + @abstractmethod + def inclusion_proof(self, start, end): + """ + """ + + @abstractmethod + def get_tree_head(self, seq=None): + """ + """ + + @property + @abstractmethod + def hashes(self) -> Tuple[bytes]: + """ + """ + + @property + @abstractmethod + def root_hash(self) -> bytes: + """ + """ + + @property + @abstractmethod + def root_hash_hex(self) -> bytes: + """ + """ + + @property + @abstractmethod + def tree_size(self) -> int: + """ + """ + + @property + @abstractmethod + def leafCount(self) -> int: + """ + """ + + @property + @abstractmethod + def nodeCount(self) -> int: + """ + """ + + @abstractmethod + def verify_consistency(self, expectedLeafCount) -> bool: + """ + """ \ No newline at end of file diff --git a/ledger/merkle_verifier.py b/ledger/merkle_verifier.py new file mode 100644 index 0000000000..48c0d77e4c --- /dev/null +++ b/ledger/merkle_verifier.py @@ -0,0 +1,262 @@ +import logging +from binascii import hexlify +from typing import Sequence, List + +from ledger import error +from ledger.tree_hasher import TreeHasher +from ledger.util import STH + + +class MerkleVerifier(object): + """A utility class for doing Merkle path computations.""" + + def __init__(self, hasher=TreeHasher()): + self.hasher = hasher + + def __repr__(self): + return "%r(hasher: %r)" % (self.__class__.__name__, self.hasher) + + def __str__(self): + return "%s(hasher: %s)" % (self.__class__.__name__, self.hasher) + + @error.returns_true_or_raises + def verify_tree_consistency(self, old_tree_size: int, new_tree_size: int, + old_root: bytes, new_root: bytes, + proof: Sequence[bytes]): + """Verify the consistency between two root hashes. + + old_tree_size must be <= new_tree_size. + + Args: + old_tree_size: size of the older tree. + new_tree_size: size of the newer_tree. + old_root: the root hash of the older tree. + new_root: the root hash of the newer tree. + proof: the consistency proof. + + Returns: + True. The return value is enforced by a decorator and need not be + checked by the caller. + + Raises: + ConsistencyError: the proof indicates an inconsistency + (this is usually really serious!). + ProofError: the proof is invalid. + ValueError: supplied tree sizes are invalid. + """ + old_size = old_tree_size + new_size = new_tree_size + + if old_size < 0 or new_size < 0: + raise ValueError("Negative tree size") + + if old_size > new_size: + raise ValueError("Older tree has bigger size (%d vs %d), did " + "you supply inputs in the wrong order?" % + (old_size, new_size)) + + if old_size == new_size: + if old_root == new_root: + if proof: + logging.warning("Trees are identical, ignoring proof") + return True + else: + raise error.ConsistencyError("Inconsistency: different root " + "hashes for the same tree size") + + if old_size == 0: + if proof: + # A consistency proof with an empty tree is an empty proof. + # Anything is consistent with an empty tree, so ignore whatever + # bogus proof was supplied. Note we do not verify here that the + # root hash is a valid hash for an empty tree. + logging.warning("Ignoring non-empty consistency proof for " + "empty tree.") + return True + + # Now 0 < old_size < new_size + # A consistency proof is essentially an audit proof for the node with + # index old_size - 1 in the newer tree. The sole difference is that + # the path is already hashed together into a single hash up until the + # first audit node that occurs in the newer tree only. + node = old_size - 1 + last_node = new_size - 1 + + # While we are the right child, everything is in both trees, so move one + # level up. + while node % 2: + node //= 2 + last_node //= 2 + + p = iter(proof) + try: + if node: + # Compute the two root hashes in parallel. + new_hash = old_hash = next(p) + else: + # The old tree was balanced (2**k nodes), so we already have + # the first root hash. + new_hash = old_hash = old_root + + while node: + if node % 2: + # node is a right child: left sibling exists in both trees. + next_node = next(p) + old_hash = self.hasher.hash_children(next_node, old_hash) + new_hash = self.hasher.hash_children(next_node, new_hash) + elif node < last_node: + # node is a left child: right sibling only exists in the + # newer tree. + new_hash = self.hasher.hash_children(new_hash, next(p)) + # else node == last_node: node is a left child with no sibling + # in either tree. + node //= 2 + last_node //= 2 + + # Now old_hash is the hash of the first subtree. If the two trees + # have different height, continue the path until the new root. + while last_node: + n = next(p) + new_hash = self.hasher.hash_children(new_hash, n) + last_node //= 2 + + # If the second hash does not match, the proof is invalid for the + # given pair. If, on the other hand, the newer hash matches but the + # older one doesn't, then the proof (together with the signatures + # on the hashes) is proof of inconsistency. + # Continue to find out. + if new_hash != new_root: + raise error.ProofError("Bad Merkle proof: second root hash " + "does not match. Expected hash: %s " + ", computed hash: %s" % + (hexlify(new_root).strip(), + hexlify(new_hash).strip())) + elif old_hash != old_root: + raise error.ConsistencyError("Inconsistency: first root hash " + "does not match. Expected hash: " + "%s, computed hash: %s" % + (hexlify(old_root).strip(), + hexlify(old_hash).strip()) + ) + + except StopIteration: + raise error.ProofError("Merkle proof is too short") + + # We've already verified consistency, so accept the proof even if + # there's garbage left over (but log a warning). + try: + next(p) + except StopIteration: + pass + else: + logging.warning("Proof has extra nodes") + return True + + def _calculate_root_hash_from_audit_path(self, leaf_hash: bytes, + node_index: int, + audit_path: List[bytes], + tree_size: int): + calculated_hash = leaf_hash + last_node = tree_size - 1 + while last_node > 0: + if not audit_path: + raise error.ProofError('Proof too short: left with node index ' + '%d' % node_index) + if node_index % 2: + audit_hash = audit_path.pop(0) + calculated_hash = self.hasher.hash_children( + audit_hash, calculated_hash) + elif node_index < last_node: + audit_hash = audit_path.pop(0) + calculated_hash = self.hasher.hash_children( + calculated_hash, audit_hash) + # node_index == last_node and node_index is even: A sibling does + # not exist. Go further up the tree until node_index is odd so + # calculated_hash will be used as the right-hand operand. + node_index //= 2 + last_node //= 2 + if audit_path: + raise error.ProofError('Proof too long: Left with %d hashes.' % + len(audit_path)) + return calculated_hash + + @classmethod + def audit_path_length(cls, index: int, tree_size: int): + length = 0 + last_node = tree_size - 1 + while last_node > 0: + if index % 2 or index < last_node: + length += 1 + index //= 2 + last_node //= 2 + + return length + + @error.returns_true_or_raises + def verify_leaf_hash_inclusion(self, leaf_hash: bytes, leaf_index: int, + proof: List[bytes], sth: STH): + """Verify a Merkle Audit Path. + + See section 2.1.1 of RFC6962 for the exact path description. + + Args: + leaf_hash: The hash of the leaf for which the proof was provided. + leaf_index: Index of the leaf in the tree. + proof: A list of SHA-256 hashes representing the Merkle audit path. + sth: STH with the same tree size as the one used to fetch the proof. + The sha256_root_hash from this STH will be compared against the + root hash produced from the proof. + + Returns: + True. The return value is enforced by a decorator and need not be + checked by the caller. + + Raises: + ProofError: the proof is invalid. + """ + leaf_index = int(leaf_index) + tree_size = int(sth.tree_size) + #TODO(eranm): Verify signature over STH + if tree_size <= leaf_index: + raise ValueError("Provided STH is for a tree that is smaller " + "than the leaf index. Tree size: %d Leaf " + "index: %d" % (tree_size, leaf_index)) + if tree_size < 0 or leaf_index < 0: + raise ValueError("Negative tree size or leaf index: " + "Tree size: %d Leaf index: %d" % + (tree_size, leaf_index)) + calculated_root_hash = self._calculate_root_hash_from_audit_path( + leaf_hash, leaf_index, proof[:], tree_size) + if calculated_root_hash == sth.sha256_root_hash: + return True + + raise error.ProofError("Constructed root hash differs from provided " + "root hash. Constructed: %s Expected: %s" % + (hexlify(calculated_root_hash).strip(), + hexlify(sth.sha256_root_hash).strip())) + + @error.returns_true_or_raises + def verify_leaf_inclusion(self, leaf: bytes, leaf_index: int, + proof: List[bytes], sth: STH): + """Verify a Merkle Audit Path. + + See section 2.1.1 of RFC6962 for the exact path description. + + Args: + leaf: The leaf for which the proof was provided. + leaf_index: Index of the leaf in the tree. + proof: A list of SHA-256 hashes representing the Merkle audit path. + sth: STH with the same tree size as the one used to fetch the proof. + The sha256_root_hash from this STH will be compared against the + root hash produced from the proof. + + Returns: + True. The return value is enforced by a decorator and need not be + checked by the caller. + + Raises: + ProofError: the proof is invalid. + """ + leaf_hash = self.hasher.hash_leaf(leaf) + return self.verify_leaf_hash_inclusion(leaf_hash, leaf_index, proof, + sth) diff --git a/ledger/serializers/__init__.py b/ledger/serializers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ledger/serializers/compact_serializer.py b/ledger/serializers/compact_serializer.py new file mode 100644 index 0000000000..7f08bad627 --- /dev/null +++ b/ledger/serializers/compact_serializer.py @@ -0,0 +1,77 @@ +from collections import OrderedDict +from typing import Dict + +from ledger.serializers.mapping_serializer import MappingSerializer + + +class CompactSerializer(MappingSerializer): + """ + Serializes a `Mapping` to string. Unlike JSON, does not use field(key) + names. Instead store an ordered dictionary of fields to serialize and + deserialize data. The ordered dictionary specifies in which order the values + of the `Mapping` will appear in the string and also how to convert each type + of value to and from string + """ + def __init__(self, fields: OrderedDict=None): + # TODO: add a special type (class) for fields + + self.fields = fields + self.delimiter = "|" + + def _stringify(self, name, record, fields=None): + fields = fields or self.fields + if record is None or record == {}: + return "" + encoder = fields[name][0] or str + return encoder(record) + + def _destringify(self, name, string, fields=None): + if not string: + return None + fields = fields or self.fields + decoder = fields[name][1] or str + return decoder(string) + + def serialize(self, data: Dict, fields=None, toBytes=True): + fields = fields or self.fields + records = [] + + def _addToRecords(name, record): + records.append(self._stringify(name, record, fields)) + + for name in fields: + if "." in name: + nameParts = name.split(".") + record = data.get(nameParts[0], {}) + for part in nameParts[1:]: + record = record.get(part, {}) + else: + record = data.get(name) + _addToRecords(name, record) + + encoded = self.delimiter.join(records) + if toBytes: + encoded = encoded.encode() + return encoded + + def deserialize(self, data, fields=None): + fields = fields or self.fields + if isinstance(data, (bytes, bytearray)): + data = data.decode() + items = data.split(self.delimiter) + result = {} + for name in fields: + if "." in name: + nameParts = name.split(".") + ref = result + for part in nameParts[:-1]: + if part not in ref: + ref[part] = {} + ref = ref[part] + ref[nameParts[-1]] = self._destringify(name, items.pop(0), fields) + elif items: + result[name] = self._destringify(name, items.pop(0), fields) + else: + # if we have more fields than data available, assume that all missed fields are None + result[name] = None + return result diff --git a/ledger/serializers/field.py b/ledger/serializers/field.py new file mode 100644 index 0000000000..0027686726 --- /dev/null +++ b/ledger/serializers/field.py @@ -0,0 +1,10 @@ +class Field: + + def __init__(self, name, encoder, decoder): + assert name and isinstance(name, str) + assert encoder and callable(encoder) + assert decoder and callable(decoder) + + self.name = name + self.encoder = encoder + self.decoder = decoder \ No newline at end of file diff --git a/ledger/serializers/json_serializer.py b/ledger/serializers/json_serializer.py new file mode 100644 index 0000000000..c1b58e2f19 --- /dev/null +++ b/ledger/serializers/json_serializer.py @@ -0,0 +1,75 @@ +# Consider using bson or ubjson for serializing json + + +import base64 +from typing import Dict + +from ledger.serializers.mapping_serializer import MappingSerializer + + +try: + import ujson as json + from ujson import encode as uencode + + # Older versions of ujson's encode do not support `sort_keys`, if that + # is the case default to using json + uencode({'xx': '123', 'aa': 90}, sort_keys=True) + + class UJsonEncoder: + @staticmethod + def encode(o): + if isinstance(o, (bytes, bytearray)): + return '"{}"'.format(base64.b64encode(o).decode("utf-8")) + else: + return uencode(o, sort_keys=True) + + + JsonEncoder = UJsonEncoder() + +except (ImportError, TypeError): + import json + + class OrderedJsonEncoder(json.JSONEncoder): + def __init__(self, *args, **kwargs): + kwargs['ensure_ascii'] = False + kwargs['sort_keys'] = True + kwargs['separators'] = (',', ':') + super().__init__(*args, **kwargs) + + def encode(self, o): + if isinstance(o, (bytes, bytearray)): + return '"{}"'.format(base64.b64encode(o).decode("utf-8")) + else: + return json.JSONEncoder.encode(self, o) + + JsonEncoder = OrderedJsonEncoder() + + +class JsonSerializer(MappingSerializer): + """ + Class to convert a mapping to json with keys ordered in lexicographical + order + """ + + @staticmethod + def dumps(data, toBytes=True): + encoded = JsonEncoder.encode(data) + if toBytes: + encoded = encoded.encode() + return encoded + + @staticmethod + def loads(data): + if isinstance(data, (bytes, bytearray)): + data = data.decode() + return json.loads(data) + + # The `fields` argument is kept to conform to the interface, its not + # need in this method + def serialize(self, data: Dict, fields=None, toBytes=True): + return self.dumps(data, toBytes) + + # The `fields` argument is kept to conform to the interface, its not + # need in this method + def deserialize(self, data, fields=None): + return self.loads(data) diff --git a/ledger/serializers/mapping_serializer.py b/ledger/serializers/mapping_serializer.py new file mode 100644 index 0000000000..5b2064c336 --- /dev/null +++ b/ledger/serializers/mapping_serializer.py @@ -0,0 +1,7 @@ +class MappingSerializer: + # TODO: Probably don't need `fields` here + def serialize(self, data, fields=None, toBytes=False): + raise NotImplementedError + + def deserialize(self, data, fields=None): + raise NotImplementedError diff --git a/ledger/stores/__init__.py b/ledger/stores/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ledger/stores/binary_file_store.py b/ledger/stores/binary_file_store.py new file mode 100644 index 0000000000..c06925094a --- /dev/null +++ b/ledger/stores/binary_file_store.py @@ -0,0 +1,47 @@ +import os + +from ledger.stores.file_store import FileStore + + +class BinaryFileStore(FileStore): + def __init__(self, dbDir, dbName, isLineNoKey: bool=False, + storeContentHash: bool=True, ensureDurability: bool=True): + # This is the separator between key and value + self.delimiter = b"\t" + # TODO: This line separator might conflict with some data format. + # So prefix the value data in the file with size and only read those + # number of bytes. + self.lineSep = b'\n\x07\n\x01' + super().__init__(dbDir, dbName, isLineNoKey, storeContentHash, + ensureDurability) + self._initDB(dbDir, dbName) + + @staticmethod + def _isBytes(arg): + return isinstance(arg, (bytes, bytearray)) + + def _initDB(self, dbDir, dbName): + super()._initDB(dbDir, dbName) + self.dbPath = os.path.join(dbDir, "{}.bin".format(dbName)) + self.dbFile = open(self.dbPath, mode="a+b", buffering=0) + + def put(self, value, key=None): + if not ((not key or self._isBytes(key)) and self._isBytes(value)): + raise ValueError("key and value need to be bytes-like object") + super().put(key=key, value=value) + + def get(self, key): + if not self._isBytes(key): + raise TypeError("key needs to be a bytes-like object") + return super().get(key) + + def iterator(self, includeKey=True, includeValue=True, prefix=None): + if prefix and not self._isBytes(prefix): + raise TypeError("prefix needs to be a bytes-like object") + + return super().iterator(includeKey, includeValue, prefix) + + def _lines(self): + return (line.strip(self.lineSep) for line in + self.dbFile.read().split(self.lineSep) + if len(line.strip(self.lineSep)) != 0) diff --git a/ledger/stores/chunked_file_store.py b/ledger/stores/chunked_file_store.py new file mode 100644 index 0000000000..52ae7c2485 --- /dev/null +++ b/ledger/stores/chunked_file_store.py @@ -0,0 +1,308 @@ +import os +import shutil +from itertools import chain +from typing import List, Generator +from ledger.stores.file_store import FileStore +from ledger.stores.text_file_store import TextFileStore + + +class ChunkedFileStore(FileStore): + """ + Implements a FileStore with chunking behavior. + + Stores chunks of data into separate files. The chunking of data is + determined by the `chunkSize` parameter. Each chunk of data is written to a + different file. + The naming convention of the files is such that the starting number of each + chunk is the file name, i.e. for a chunkSize of 1000, the first file would + be 1, the second 1001 etc. + + Every instance of ChunkedFileStore maintains its own directory for + storing the chunked data files. + """ + + firstChunkIndex = 1 + + @staticmethod + def _fileNameToChunkIndex(fileName): + try: + return int(fileName) + except: + return None + + @staticmethod + def _chunkIndexToFileName(index): + return str(index) + + def __init__(self, + dbDir, + dbName, + isLineNoKey: bool=False, + storeContentHash: bool=True, + chunkSize: int=1000, + ensureDurability: bool=True, + chunkStoreConstructor=TextFileStore, + defaultFile=None): + """ + + :param chunkSize: number of items in one chunk. Cannot be lower then number of items in defaultFile + :param chunkStoreConstructor: constructor of store for single chunk + """ + + assert chunkStoreConstructor is not None + + super().__init__(dbDir, + dbName, + isLineNoKey, + storeContentHash, + ensureDurability, + defaultFile=defaultFile) + + self.chunkSize = chunkSize + self.itemNum = 1 # chunk size counter + self.dataDir = os.path.join(dbDir, dbName) # chunk files destination + self.currentChunk = None # type: FileStore + self.currentChunkIndex = None # type: int + + self._chunkCreator = lambda name: \ + chunkStoreConstructor(self.dataDir, + name, + isLineNoKey, + storeContentHash, + ensureDurability) + + self._initDB(dbDir, dbName) + + def _prepareFiles(self, dbDir, dbName, defaultFile): + + def getFileSize(file): + with self._chunkCreator(file) as chunk: + return chunk.numKeys + + path = os.path.join(dbDir, dbName) + os.mkdir(path) + if defaultFile: + if self.chunkSize < getFileSize(defaultFile): + raise ValueError("Default file is larger than chunk size") + firstChunk = os.path.join(path, str(self.firstChunkIndex)) + shutil.copy(defaultFile, firstChunk) + + def _initDB(self, dataDir, dbName) -> None: + super()._initDB(dataDir, dbName) + path = os.path.join(dataDir, dbName) + if not os.path.isdir(path): + raise ValueError("Transactions file {} is not directory" + .format(path)) + self._useLatestChunk() + + def _useLatestChunk(self) -> None: + """ + Moves chunk cursor to the last chunk + """ + self._useChunk(self._findLatestChunk()) + + def _findLatestChunk(self) -> int: + """ + Determine which chunk is the latest + :return: index of a last chunk + """ + chunks = self._listChunks() + if len(chunks) > 0: + return chunks[-1] + return ChunkedFileStore.firstChunkIndex + + def _startNextChunk(self) -> None: + """ + Close current and start next chunk + """ + if self.currentChunk is None: + self._useLatestChunk() + else: + self._useChunk(self.currentChunkIndex + self.chunkSize) + + def _useChunk(self, index) -> None: + """ + Switch to specific chunk + + :param index: + """ + + if self.currentChunk is not None: + if self.currentChunkIndex == index and \ + not self.currentChunk.closed: + return + self.currentChunk.close() + + self.currentChunk = self._openChunk(index) + self.currentChunkIndex = index + self.itemNum = self.currentChunk.numKeys + 1 + + def _openChunk(self, index) -> FileStore: + """ + Load chunk from file + + :param index: chunk index + :return: opened chunk + """ + + return self._chunkCreator(ChunkedFileStore._chunkIndexToFileName(index)) + + def _get_key_location(self, key) -> (int, int): + """ + Return chunk no and 1-based offset of key + :param key: + :return: + """ + key = int(key) + if key == 0: + return 1, 0 + remainder = key % self.chunkSize + addend = ChunkedFileStore.firstChunkIndex + chunk_no = key - remainder + addend if remainder \ + else key - self.chunkSize + addend + offset = remainder or self.chunkSize + return chunk_no, offset + + def put(self, value, key=None) -> None: + if self.itemNum > self.chunkSize: + self._startNextChunk() + self.itemNum = 1 + self.itemNum += 1 + self.currentChunk.put(value, key) + + def get(self, key) -> str: + """ + Determines the file to retrieve the data from and retrieves the data. + + :return: value corresponding to specified key + """ + # TODO: get is creating files when a key is given which is more than + # the store size + chunk_no, offset = self._get_key_location(key) + with self._openChunk(chunk_no) as chunk: + return chunk.get(str(offset)) + + def reset(self) -> None: + """ + Clear all data in file storage. + """ + self.close() + for f in os.listdir(self.dataDir): + os.remove(os.path.join(self.dataDir, f)) + self._useLatestChunk() + + def _lines(self): + """ + Lines in a store (all chunks) + + :return: lines + """ + + chunkIndices = self._listChunks() + for chunkIndex in chunkIndices: + with self._openChunk(chunkIndex) as chunk: + yield from chunk._lines() + + def open(self) -> None: + self._useLatestChunk() + + def close(self): + if self.currentChunk is not None: + self.currentChunk.close() + self.currentChunk = None + self.currentChunkIndex = None + self.itemNum = None + + def _listChunks(self): + """ + Lists stored chunks + + :return: sorted list of available chunk indices + """ + chunks = [] + for fileName in os.listdir(self.dataDir): + index = ChunkedFileStore._fileNameToChunkIndex(fileName) + if index is not None: + chunks.append(index) + return sorted(chunks) + + def iterator(self, includeKey=True, includeValue=True, prefix=None): + """ + Store iterator + + :return: Iterator for data in all chunks + """ + + if not (includeKey or includeValue): + raise ValueError("At least one of includeKey or includeValue " + "should be true") + lines = self._lines() + if includeKey and includeValue: + return self._keyValueIterator(lines, prefix=prefix) + if includeValue: + return self._valueIterator(lines, prefix=prefix) + return self._keyIterator(lines, prefix=prefix) + + def get_range(self, start=None, end=None): + self.is_valid_range(start, end) + + if not self.numKeys: + return + + if start and end and start == end: + res = self.get(start) + if res: + yield (start, res) + else: + if start is None: + start = 1 + if end is None: + end = self.numKeys + start_chunk_no, start_offset = self._get_key_location(start) + end_chunk_no, end_offset = self._get_key_location(end) + + if start_chunk_no == end_chunk_no: + # If entries lie in the same range + assert end_offset >= start_offset + with self._openChunk(start_chunk_no) as chunk: + yield from zip(range(start, end+1), + (l for _, l in chunk.get_range(start_offset, + end_offset))) + else: + current_chunk_no = start_chunk_no + while current_chunk_no <= end_chunk_no: + with self._openChunk(current_chunk_no) as chunk: + if current_chunk_no == start_chunk_no: + yield from ((current_chunk_no + k - 1, l) for k, l in + chunk.get_range(start=start_offset)) + elif current_chunk_no == end_chunk_no: + yield from ((current_chunk_no + k - 1, l) + for k, l in chunk.get_range(end=end_offset)) + else: + yield from ((current_chunk_no + k - 1, l) + for k, l in chunk.get_range(1, self.chunkSize)) + current_chunk_no += self.chunkSize + + def appendNewLineIfReq(self): + self._useLatestChunk() + self.currentChunk.appendNewLineIfReq() + + @property + def numKeys(self) -> int: + """ + This will iterate only over the last chunk since the name of the last + chunk indicates how many lines in total exist in all other chunks + """ + chunks = self._listChunks() + num_chunks = len(chunks) + if num_chunks == 0: + return 0 + count = (num_chunks-1)*self.chunkSize + last_chunk = self._openChunk(chunks[-1]) + count += sum(1 for _ in last_chunk._lines()) + last_chunk.close() + return count + + @property + def closed(self): + return self.currentChunk is None diff --git a/ledger/stores/directory_store.py b/ledger/stores/directory_store.py new file mode 100644 index 0000000000..f34d96e106 --- /dev/null +++ b/ledger/stores/directory_store.py @@ -0,0 +1,54 @@ +import os +from pathlib import Path + + +class DirectoryStore: + """ + A file based implementation of a key value store. + """ + def __init__(self, baseDir, dbName): + """ + :param dbDir: The directory where the file storing the data would be + present + :param dbName: The name of the file that is used to store the data + :param isLineNoKey: If false then each line has the key followed by a + delimiter followed by the value + :param storeContentHash: Whether to store a hash of the value or not. + Storing hash can make it really fast to compare the value for equality + """ + self.baseDir = baseDir + self.dbName = dbName + if not os.path.exists(self.baseDir): + os.makedirs(self.baseDir) + self.dbPath = os.path.join(self.baseDir, dbName) + if not os.path.exists(self.dbPath): + os.makedirs(self.dbPath) + + def keyFilePath(self, key: str): + return os.path.join(self.dbPath, key) + + def exists(self, key: str): + return os.path.isfile(self.keyFilePath(key)) + + def get(self, key: str): + if self.exists(key): + with open(self.keyFilePath(key)) as f: + return f.read() + + def put(self, key: str, value: str): + with open(self.keyFilePath(key), "w") as f: + f.write(value) + + def appendToValue(self, key: str, value: str): + with open(self.keyFilePath(key), mode="a+") as f: + f.write(value) + f.write(os.linesep) + + def iterator(self): + path = Path(self.dbPath) + return [(file.name, file.open().read()) for file in path.iterdir()] + + @property + def keys(self): + path = Path(self.dbPath) + return [file.name for file in path.iterdir()] \ No newline at end of file diff --git a/ledger/stores/file_hash_store.py b/ledger/stores/file_hash_store.py new file mode 100644 index 0000000000..6a04975006 --- /dev/null +++ b/ledger/stores/file_hash_store.py @@ -0,0 +1,113 @@ +from ledger.stores.binary_file_store import BinaryFileStore +from ledger.stores.hash_store import HashStore + + +class FileHashStore(HashStore): + # Hashes are stored as raw bytes. By default each leaf hash is of 32 bytes + # and each node hash is too of 32 bytes. The extra 5 bytes in for each node + # are used to store the `start` and `height`. `start` takes 4 bytes so it + # can support upto 1 billion nodes and height takes 1 byte so it can store + # a tree upto the height of 255 + def __init__(self, dataDir, fileNamePrefix="", leafSize=32, nodeSize=32): + self.dataDir = dataDir + self.fileNamePrefix = fileNamePrefix + nodesFileName = fileNamePrefix + "_merkleNodes" + leavesFileName = fileNamePrefix + "_merkleLeaves" + + self.nodesFile = BinaryFileStore(self.dataDir, nodesFileName, + isLineNoKey=True, + storeContentHash=False) + self.leavesFile = BinaryFileStore(self.dataDir, leavesFileName, + isLineNoKey=True, + storeContentHash=False) + + # Do not need line separators since each entry is of fixed size + self.nodesFile.lineSep = b'' + self.leavesFile.lineSep = b'' + self.nodeSize = nodeSize + self.leafSize = leafSize + + @property + def is_persistent(self) -> bool: + return True + + @staticmethod + def write(data, store, size): + if not isinstance(data, bytes): + data = data.encode() + dataSize = len(data) + if dataSize != size: + raise ValueError("Data size not allowed. Size of the data should be " + "{} but instead was {}".format(size, dataSize)) + store.put(value=data) + + @staticmethod + def read(store, entryNo, size): + store.dbFile.seek((entryNo-1) * size) + return store.dbFile.read(size) + + @staticmethod + def dataGen(dataFactory, startpos, endpos): + i = startpos + while True: + data = dataFactory(i) + yield data + i += 1 + if i <= endpos: + break + + def writeNode(self, node): + # TODO: Need to have some exception handling around converting to bytes + # since they can result in `OverflowError` + # start, height, nodeHash = node + # start = start.to_bytes(4, byteorder='little') + # height = height.to_bytes(1, byteorder='little') + # data = start + height + nodeHash + data = node[2] + self.write(data, self.nodesFile, self.nodeSize) + + def writeLeaf(self, leafHash): + self.write(leafHash, self.leavesFile, self.leafSize) + + def readNode(self, pos): + data = self.read(self.nodesFile, pos, self.nodeSize) + if len(data) < self.nodeSize: + raise IndexError("No node at given position") + # start = int.from_bytes(data[:4], byteorder='little') + # height = int.from_bytes(data[4:5], byteorder='little') + # nodeHash = data[5:] + # return start, height, nodeHash + return data + + def readLeaf(self, pos): + data = self.read(self.leavesFile, pos, self.leafSize) + if len(data) < self.leafSize: + raise IndexError("No leaf at given position") + return data + + def readLeafs(self, startpos, endpos): + return self.dataGen(self.readLeaf, startpos, endpos) + + def readNodes(self, startpos, endpos): + return self.dataGen(self.readNode, startpos, endpos) + + @property + def leafCount(self) -> int: + return self.leavesFile.dbFile.seek(0,2) // self.leafSize + + @property + def nodeCount(self) -> int: + return self.nodesFile.dbFile.seek(0, 2) // self.nodeSize + + @property + def closed(self): + return self.nodesFile.dbFile.closed and self.leavesFile.dbFile.closed + + def close(self): + self.nodesFile.close() + self.leavesFile.close() + + def reset(self): + self.nodesFile.reset() + self.leavesFile.reset() + return True \ No newline at end of file diff --git a/ledger/stores/file_store.py b/ledger/stores/file_store.py new file mode 100644 index 0000000000..88cadf021c --- /dev/null +++ b/ledger/stores/file_store.py @@ -0,0 +1,224 @@ +import logging +import os +import shutil +from hashlib import sha256 + + +class FileStore: + """ + A file based implementation of a key value store. + """ + def __init__(self, + dbDir, + dbName, + isLineNoKey: bool=False, + storeContentHash: bool=True, + ensureDurability: bool=True, + delimiter="\t", + lineSep="\r\n", + defaultFile=None): + """ + :param dbDir: The directory where the file storing the data would be + present + :param dbName: The name of the file that is used to store the data + :param isLineNoKey: If false then each line has the key followed by a + delimiter followed by the value + :param storeContentHash: Whether to store a hash of the value or not. + Storing hash can make it really fast to compare the value for equality + :param ensureDurability: Should the file be fysnced after every write. + This can ensure durability in most of the cases, but make + writes extremely slow. See testMeasureWriteTime. For frequent writes, + it makes sense to disable flush and fsync on every write + :param delimiter: delimiter between key and value + :param lineSep: line separator - defaults to \r\n + :param defaultFile: file or dir to use for initialization + + """ + self.delimiter = delimiter + self.lineSep = lineSep + self.isLineNoKey = isLineNoKey + self.storeContentHash = storeContentHash + self.ensureDurability = ensureDurability + self._defaultFile = defaultFile + + def _prepareFiles(self, dbDir, dbName, defaultFile): + if not defaultFile: + return + if not os.path.exists(defaultFile): + errMessage = "File that should be used for " \ + "initialization does not exist: {}"\ + .format(defaultFile) + logging.warning(errMessage) + raise ValueError(errMessage) + dataLocation = os.path.join(self.dbDir, dbName) + copy = shutil.copy if os.path.isfile(defaultFile) else shutil.copytree + copy(defaultFile, dataLocation) + + def _prepareDBLocation(self, dbDir, dbName): + self.dbDir = dbDir + self.dbName = dbName + if not os.path.exists(self.dbDir): + os.makedirs(self.dbDir) + if not os.path.exists(os.path.join(dbDir, dbName)): + self._prepareFiles(dbDir, dbName, self._defaultFile) + + def _initDB(self, dbDir, dbName): + self._prepareDBLocation(dbDir, dbName) + + # noinspection PyUnresolvedReferences + def put(self, value, key=None): + # If line no is not treated as key then write the key and then the + # delimiter + if not self.isLineNoKey: + if key is None: + raise ValueError("Key must be provided for storing the value") + self.dbFile.write(key) + self.dbFile.write(self.delimiter) + + self.dbFile.write(value) + + if self.storeContentHash: + self.dbFile.write(self.delimiter) + if isinstance(value, str): + value = value.encode() + hexedHash = sha256(value).hexdigest() + self.dbFile.write(hexedHash) + self.dbFile.write(self.lineSep) + + # A little bit smart strategy like flush every 2 seconds + # or every 10 writes or every 1 KB may be a better idea + # Make sure data get written to the disk + # Even flush slows down writes significantly + self.dbFile.flush() + + if self.ensureDurability: + # fsync takes too much time on Windows. + # This is the reason of test_merkle_proof tests slowness on Windows. + # Even on Linux using fsync slows down the test by at least 2 + # orders of magnitude. See testMeasureWriteTime + os.fsync(self.dbFile.fileno()) + + def get(self, key): + for k, v in self.iterator(): + if k == key: + return v + + def _keyIterator(self, lines, prefix=None): + return self._baseIterator(lines, prefix, True, False) + + def _valueIterator(self, lines, prefix=None): + return self._baseIterator(lines, prefix, False, True) + + def _keyValueIterator(self, lines, prefix=None): + return self._baseIterator(lines, prefix, True, True) + + def _parse_line(self, line, prefix=None, returnKey: bool=True, + returnValue: bool=True, key=None): + if self.isLineNoKey: + k = key + v = line + else: + k, v = line.split(self.delimiter, 1) + if returnValue: + if self.storeContentHash: + value, _ = v.rsplit(self.delimiter, 1) + else: + value = v + if not prefix or k.startswith(prefix): + if returnKey and returnValue: + return k, value + elif returnKey: + return k + elif returnValue: + return value + + # noinspection PyUnresolvedReferences + def _baseIterator(self, lines, prefix, returnKey: bool, returnValue: bool): + i = 1 + for line in lines: + k = str(i) + yield self._parse_line(line, prefix, returnKey, returnValue, k) + if self.isLineNoKey: + i += 1 + + def _lines(self): + raise NotImplementedError() + + # noinspection PyUnresolvedReferences + def iterator(self, includeKey=True, includeValue=True, prefix=None): + if not (includeKey or includeValue): + raise ValueError("At least one of includeKey or includeValue " + "should be true") + # Move to the beginning of file + self.dbFile.seek(0) + + lines = self._lines() + if includeKey and includeValue: + return self._keyValueIterator(lines, prefix=prefix) + elif includeValue: + return self._valueIterator(lines, prefix=prefix) + else: + return self._keyIterator(lines, prefix=prefix) + + def is_valid_range(self, start=None, end=None): + assert self.isLineNoKey + if start and end: + assert start <= end + + def get_range(self, start=None, end=None): + self.is_valid_range(start, end) + for k, value in self.iterator(): + k = int(k) + if (start is None or k >= start) and (end is None or k <= end): + yield k, value + if end is not None and k > end: + break + + @property + def lastKey(self): + # TODO use the efficient way of seeking to the end and moving back till + # 2nd newline(1 st newline would be encountered immediately until its a + # blank file) is encountered and after newline read ahead till the + # delimiter or split the read string till now on delimiter + k = None + for k, v in self.iterator(): + pass + return k + + def appendNewLineIfReq(self): + try: + logging.debug("new line check for file: {}".format(self.dbPath)) + with open(self.dbPath, 'a+b') as f: + size = f.tell() + if size > 0: + f.seek(-len(self.lineSep), 2) # last character in file + if f.read().decode() != self.lineSep: + linesep = self.lineSep if isinstance(self.lineSep, bytes) else self.lineSep.encode() + f.write(linesep) + logging.debug( + "new line added for file: {}".format(self.dbPath)) + except FileNotFoundError: + pass + + @property + def numKeys(self): + return sum(1 for l in self.iterator()) + + # noinspection PyUnresolvedReferences + def close(self): + self.dbFile.close() + + # noinspection PyUnresolvedReferences + @property + def closed(self): + return self.dbFile.closed + + # noinspection PyUnresolvedReferences + def reset(self): + self.dbFile.truncate(0) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() diff --git a/ledger/stores/hash_store.py b/ledger/stores/hash_store.py new file mode 100644 index 0000000000..f74ce2a76e --- /dev/null +++ b/ledger/stores/hash_store.py @@ -0,0 +1,176 @@ +from abc import abstractmethod, abstractproperty + +from ledger.util import count_bits_set +from ledger.util import highest_bit_set + + +class HashStore: + """ + Store of nodeHashes and leafHashes mapped against their sequence numbers. + """ + @property + @abstractmethod + def is_persistent(self) -> bool: + pass + + @abstractmethod + def writeLeaf(self, leafHash): + """ + append the leafHash to the leaf hash store + + :param leafHash: hash of the leaf + """ + pass + + @abstractmethod + def writeNode(self, node): + """ + append the node to the node hash store. + + :param node: tuple of start, height and nodeHash + """ + pass + + @abstractmethod + def readLeaf(self, pos): + """ + Read the leaf hash at the given position in the merkle tree + + :param pos: the sequence number of the leaf + :return: the leafHash at the specified position + """ + pass + + @abstractmethod + def readNode(self, pos): + """ + Read the node hash at the given position in the merkle tree + + :param pos: the sequence number of the node (as calculated by + getNodePosition) + :return: the nodeHash at the specified position + """ + pass + + @abstractmethod + def readLeafs(self, startpos, endpos): + """ + Read multiple leaves at the given position. + + :param startpos: read from this sequence number (inclusive) + :param endpos: read up to this sequence number (inclusive) + :return: list of leafHashes + """ + pass + + @abstractmethod + def readNodes(self, startpos, endpos): + """ + Read multiple nodes at the given position. Node position can be + calculated using getNodePosition + + :param startpos: read from this sequence number (inclusive) + :param endpos: read up to this sequence number (inclusive) + :return: list of nodeHashes + """ + pass + + @property + @abstractmethod + def leafCount(self) -> int: + pass + + @leafCount.setter + @abstractmethod + def leafCount(self, count: int) -> None: + pass + + @property + @abstractmethod + def nodeCount(self) -> None: + pass + + @classmethod + def getNodePosition(cls, start, height=None) -> int: + """ + Calculates node position based on start and height + + :param start: The sequence number of the first leaf under this tree. + :param height: Height of this node in the merkle tree + :return: the node's position + """ + pwr = highest_bit_set(start) - 1 + height = height or pwr + if count_bits_set(start) == 1: + adj = height - pwr + return start - 1 + adj + else: + c = pow(2, pwr) + return cls.getNodePosition(c, pwr) + \ + cls.getNodePosition(start - c, height) + + @classmethod + def getPath(cls, seqNo, offset=0): + """ + Get the audit path of the leaf at the position specified by serNo. + + :param seqNo: sequence number of the leaf to calculate the path for + :param offset: the sequence number of the node from where the path + should begin. + :return: tuple of leafs and nodes + """ + if offset >= seqNo: + raise ValueError("Offset should be less than serial number") + pwr = highest_bit_set(seqNo - 1 - offset) - 1 + if pwr <= 0: + if seqNo % 2 == 0: + return [seqNo - 1], [] + else: + return [], [] + c = pow(2, pwr) + offset + leafs, nodes = cls.getPath(seqNo, c) + nodes.append(cls.getNodePosition(c, pwr)) + return leafs, nodes + + def readNodeByTree(self, start, height=None): + """ + Fetches nodeHash based on start leaf and height of the node in the tree. + + :return: the nodeHash + """ + pos = self.getNodePosition(start, height) + return self.readNode(pos) + + @property + def is_consistent(self) -> bool: + """ + Returns True if number of nodes are consistent with number of leaves + """ + from ledger.compact_merkle_tree import CompactMerkleTree + return self.nodeCount == CompactMerkleTree.get_expected_node_count(self.leafCount) + + @staticmethod + def _validatePos(start, end=None): + if end: + assert start < end, "start index must be less than end index" + if start < 1: + raise IndexError( + "seqNo starts from 1, index requested: {}".format(start)) + + @abstractmethod + def close(self): + pass + + @property + @abstractmethod + def closed(self): + pass + + @abstractmethod + def reset(self) -> bool: + """ + Removes all data from hash store + + :return: True if completed successfully + """ + pass diff --git a/ledger/stores/memory_hash_store.py b/ledger/stores/memory_hash_store.py new file mode 100644 index 0000000000..7c5c8e9ed7 --- /dev/null +++ b/ledger/stores/memory_hash_store.py @@ -0,0 +1,49 @@ +from ledger.stores.hash_store import HashStore + + +class MemoryHashStore(HashStore): + def __init__(self): + self.reset() + self._closed = False + + @property + def is_persistent(self) -> bool: + return False + + def writeLeaf(self, leafHash): + self._leafs.append(leafHash) + + def writeNode(self, nodeHash): + self._nodes.append(nodeHash) + + def readLeaf(self, pos): + return self._leafs[pos - 1] + + def readNode(self, pos): + return self._nodes[pos - 1] + + def readLeafs(self, startpos, endpos): + return (n for n in self._leafs[startpos-1:endpos-1]) + + def readNodes(self, startpos, endpos): + return (n for n in self._nodes[startpos-1:endpos-1]) + + @property + def leafCount(self) -> int: + return len(self._leafs) + + @property + def nodeCount(self) -> int: + return len(self._nodes) + + def reset(self): + self._nodes = [] + self._leafs = [] + return True + + def close(self): + self._closed = True + + @property + def closed(self): + return self._closed diff --git a/ledger/stores/store_utils.py b/ledger/stores/store_utils.py new file mode 100644 index 0000000000..73da55d26e --- /dev/null +++ b/ledger/stores/store_utils.py @@ -0,0 +1,11 @@ +import os + + +def cleanLines(source, lineSep=os.linesep): + """ + :param source: some iterable source (list, file, etc) + :param lineSep: string of separators (chars) that must be removed + :return: list of non empty lines with removed separators + """ + stripped = (line.strip(lineSep) for line in source) + return (line for line in stripped if len(line) != 0) diff --git a/ledger/stores/text_file_store.py b/ledger/stores/text_file_store.py new file mode 100644 index 0000000000..042a19cbc6 --- /dev/null +++ b/ledger/stores/text_file_store.py @@ -0,0 +1,32 @@ +import os + +from ledger.stores import store_utils +from ledger.stores.file_store import FileStore + + +class TextFileStore(FileStore): + + def __init__(self, + dbDir, + dbName, + isLineNoKey: bool=False, + storeContentHash: bool=True, + ensureDurability: bool=True, + defaultFile=None): + super().__init__(dbDir, + dbName, + isLineNoKey, + storeContentHash, + ensureDurability, + defaultFile=defaultFile) + self._initDB(dbDir, dbName) + + def _initDB(self, dbDir, dbName): + super()._initDB(dbDir, dbName) + self.dbPath = os.path.join(os.path.expanduser(dbDir), dbName) + self.dbFile = open(self.dbPath, mode="a+") + + def _lines(self): + self.dbFile.seek(0) + return store_utils.cleanLines(self.dbFile) + diff --git a/ledger/test/__init__.py b/ledger/test/__init__.py new file mode 100644 index 0000000000..3c37ae15d7 --- /dev/null +++ b/ledger/test/__init__.py @@ -0,0 +1,7 @@ + +def run(): + import pytest + pytest.main() + +if __name__ == "__main__": + run() diff --git a/ledger/test/__main__.py b/ledger/test/__main__.py new file mode 100644 index 0000000000..154a8f252a --- /dev/null +++ b/ledger/test/__main__.py @@ -0,0 +1,3 @@ +from ledger import test + +test.run() diff --git a/ledger/test/conftest.py b/ledger/test/conftest.py new file mode 100644 index 0000000000..8a83f47a56 --- /dev/null +++ b/ledger/test/conftest.py @@ -0,0 +1,11 @@ +import pytest + + +@pytest.fixture(scope='module') +def tdir(tmpdir_factory): + return tmpdir_factory.mktemp('').strpath + + +@pytest.fixture(scope='function') +def tempdir(tmpdir_factory): + return tmpdir_factory.mktemp('').strpath diff --git a/ledger/test/helper.py b/ledger/test/helper.py new file mode 100644 index 0000000000..9274645c97 --- /dev/null +++ b/ledger/test/helper.py @@ -0,0 +1,45 @@ +import os +import types + +from ledger.util import STH +from ledger.ledger import Ledger + + +def checkLeafInclusion(verifier, leafData, leafIndex, proof, treeHead): + assert verifier.verify_leaf_inclusion( + leaf=leafData, + leaf_index=leafIndex, + proof=proof, + sth=STH(**treeHead)) + + +def checkConsistency(tree, verifier): + vectors = [(1, 2), + (1, 3), + (4, 5), + (2, 3), + (3, 8)] + + for oldsize, newsize in vectors: + proof = tree.consistency_proof(oldsize, newsize) + oldroot = tree.merkle_tree_hash(0, oldsize) + newroot = tree.merkle_tree_hash(0, newsize) + + assert verifier.verify_tree_consistency(old_tree_size=oldsize, + new_tree_size=newsize, + old_root=oldroot, + new_root=newroot, + proof=proof) + + +def check_ledger_generator(ledger): + size = ledger.size + assert isinstance(ledger.getAllTxn(frm=1, to=size), types.GeneratorType) + assert isinstance(ledger.getAllTxn(frm=1), types.GeneratorType) + assert isinstance(ledger.getAllTxn(to=size), types.GeneratorType) + assert isinstance(ledger.getAllTxn(), types.GeneratorType) + + +class NoTransactionRecoveryLedger(Ledger): + def recoverTreeFromTxnLog(self): + pass diff --git a/ledger/test/merkle_test.py b/ledger/test/merkle_test.py new file mode 100644 index 0000000000..0ec318a5e1 --- /dev/null +++ b/ledger/test/merkle_test.py @@ -0,0 +1,505 @@ +#!/usr/bin/env python + +import hashlib +import math +import unittest +from binascii import hexlify, unhexlify +from collections import namedtuple + +import ledger.merkle_verifier +import ledger.tree_hasher +from ledger import compact_merkle_tree +from ledger import error + + +class TreeHasherTest(unittest.TestCase): + sha256_empty_hash = \ + b"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + sha256_leaves = [ + (b"", + b"6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d"), + (b"101112131415161718191a1b1c1d1e1f", + b"3bfb960453ebaebf33727da7a1f4db38acc051d381b6da20d6d4e88f0eabfd7a") + ] + sha256_nodes = [ + (b"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f", + b"202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f", + b"1a378704c17da31e2d05b6d121c2bb2c7d76f6ee6fa8f983e596c2d034963c57")] + + # array of bytestrings of the following literals in hex + test_vector_leaves = ["".join(chr(int(n, 16)) for n in s.split()).encode() + for s in [ + "", + "00", + "10", + "20 21", + "30 31", + "40 41 42 43", + "50 51 52 53 54 55 56 57", + "60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f"]] + + test_vector_hashes = [ + b"6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d", + b"fac54203e7cc696cf0dfcb42c92a1d9dbaf70ad9e621f4bd8d98662f00e3c125", + b"aeb6bcfe274b70a14fb067a5e5578264db0fa9b51af5e0ba159158f329e06e77", + b"d37ee418976dd95753c1c73862b9398fa2a2cf9b4ff0fdfe8b30cd95209614b7", + b"4e3bbb1f7b478dcfe71fb631631519a3bca12c9aefca1612bfce4c13a86264d4", + b"76e67dadbcdf1e10e1b74ddc608abd2f98dfb16fbce75277b5232a127f2087ef", + b"ddb89be403809e325750d3d263cd78929c2942b7942a34b77e122c9594a74c8c", + b"5dc9da79a70659a9ad559cb701ded9a2ab9d823aad2f4960cfe370eff4604328", + ] + + def test_empty_hash(self): + hasher = ledger.tree_hasher.TreeHasher() + self.assertEqual(hexlify(hasher.hash_empty()), + TreeHasherTest.sha256_empty_hash) + + def test_hash_leaves(self): + hasher = ledger.tree_hasher.TreeHasher() + for leaf, val in TreeHasherTest.sha256_leaves: + self.assertEqual(hasher.hash_leaf(unhexlify(leaf)), + unhexlify(val)) + + def test_hash_children(self): + hasher = ledger.tree_hasher.TreeHasher() + for left, right, val in TreeHasherTest.sha256_nodes: + x = hasher.hash_children(unhexlify(left), + unhexlify(right)) + self.assertEqual(hexlify(x), val) + + def test_hash_full_invalid_index(self): + hasher = ledger.tree_hasher.TreeHasher() + self.assertRaises(IndexError, hasher._hash_full, "abcd", -5, -1) + self.assertRaises(IndexError, hasher._hash_full, "abcd", -1, 1) + self.assertRaises(IndexError, hasher._hash_full, "abcd", 1, 5) + self.assertRaises(IndexError, hasher._hash_full, "abcd", 2, 1) + + def test_hash_full_empty(self): + hasher = ledger.tree_hasher.TreeHasher() + for i in range(0, 5): + self.assertEqual(hexlify(hasher._hash_full("abcd", i, i)[0]), + TreeHasherTest.sha256_empty_hash) + + def test_hash_full_tree(self): + hasher = ledger.tree_hasher.TreeHasher() + self.assertEqual(hasher.hash_full_tree([]), hasher.hash_empty()) + leaves = [c.encode() for c in "abcde"] + a, b, c, d, e = [hasher.hash_leaf(c) for c in leaves] + h = hasher.hash_children + root_hash = h(h(h(a, b), + h(c, d)), + e) + self.assertEqual(hasher.hash_full_tree(leaves), root_hash) + + def test_hash_full_tree_test_vector(self): + hasher = ledger.tree_hasher.TreeHasher() + for i in range(len(TreeHasherTest.test_vector_leaves)): + test_vector = TreeHasherTest.test_vector_leaves[:i+1] + expected_hash = unhexlify(TreeHasherTest.test_vector_hashes[i]) + self.assertEqual(hasher.hash_full_tree(test_vector), expected_hash) + + +class HexTreeHasher(ledger.tree_hasher.TreeHasher): + def __init__(self, hashfunc=hashlib.sha256): + self.hasher = ledger.tree_hasher.TreeHasher(hashfunc) + + def hash_empty(self): + return hexlify(self.hasher.hash_empty()) + + def hash_leaf(self, data): + return hexlify(self.hasher.hash_leaf(unhexlify(data))) + + def hash_children(self, left, right): + return hexlify(self.hasher.hash_children(unhexlify(left), + unhexlify(right))) + + +class CompactMerkleTreeTest(unittest.TestCase): + + def setUp(self): + self.tree = compact_merkle_tree.CompactMerkleTree(HexTreeHasher()) + + def test_extend_from_empty(self): + for i in range(len(TreeHasherTest.test_vector_leaves)): + test_vector = TreeHasherTest.test_vector_leaves[:i+1] + expected_hash = TreeHasherTest.test_vector_hashes[i] + self.tree = compact_merkle_tree.CompactMerkleTree() + self.tree.extend(test_vector) + root = hexlify(self.tree.root_hash) + self.assertEqual(root, expected_hash) + + def test_push_subtree_1(self): + for i in range(len(TreeHasherTest.test_vector_leaves)): + test_vector = TreeHasherTest.test_vector_leaves[:i+1] + self.tree = compact_merkle_tree.CompactMerkleTree() + self.tree.extend(test_vector) + self.tree._push_subtree([b"test leaf"]) + self.assertEqual(len(self.tree), len(test_vector) + 1) + + def test_extend_from_partial(self): + z = len(TreeHasherTest.test_vector_leaves) + for i in range(z): + self.tree = compact_merkle_tree.CompactMerkleTree() + # add up to i + test_vector = TreeHasherTest.test_vector_leaves[:i+1] + expected_hash = TreeHasherTest.test_vector_hashes[i] + self.tree.extend(test_vector) + self.assertEqual(self.tree.root_hash_hex, expected_hash) + # add up to z + test_vector = TreeHasherTest.test_vector_leaves[i+1:] + expected_hash = TreeHasherTest.test_vector_hashes[z-1] + self.tree.extend(test_vector) + self.assertEqual(self.tree.root_hash_hex, expected_hash) + + +class MerkleVerifierTest(unittest.TestCase): + # (old_tree_size, new_tree_size, old_root, new_root, proof) + # Test vectors lifted from the C++ branch. + sha256_proofs = [ + (1, 1, + b"6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d", + b"6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d", + []), + (1, 8, + b"6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d", + b"5dc9da79a70659a9ad559cb701ded9a2ab9d823aad2f4960cfe370eff4604328", + [b"96a296d224f285c67bee93c30f8a309157f0daa35dc5b87e410b78630a09cfc7", + b"5f083f0a1a33ca076a95279832580db3e0ef4584bdff1f54c8a360f50de3031e", + b"6b47aaf29ee3c2af9af889bc1fb9254dabd31177f16232dd6aab035ca39bf6e4"]), + (6, 8, + b"76e67dadbcdf1e10e1b74ddc608abd2f98dfb16fbce75277b5232a127f2087ef", + b"5dc9da79a70659a9ad559cb701ded9a2ab9d823aad2f4960cfe370eff4604328", + [b"0ebc5d3437fbe2db158b9f126a1d118e308181031d0a949f8dededebc558ef6a", + b"ca854ea128ed050b41b35ffc1b87b8eb2bde461e9e3b5596ece6b9d5975a0ae0", + b"d37ee418976dd95753c1c73862b9398fa2a2cf9b4ff0fdfe8b30cd95209614b7"]), + (2, 5, + b"fac54203e7cc696cf0dfcb42c92a1d9dbaf70ad9e621f4bd8d98662f00e3c125", + b"4e3bbb1f7b478dcfe71fb631631519a3bca12c9aefca1612bfce4c13a86264d4", + [b"5f083f0a1a33ca076a95279832580db3e0ef4584bdff1f54c8a360f50de3031e", + b"bc1a0643b12e4d2d7c77918f44e0f4f79a838b6cf9ec5b5c283e1f4d88599e6b"]) + ] + + # Data for leaf inclusion proof test + sha256_audit_path = [ + b"1a208aeebcd1b39fe2de247ee8db9454e1e93a312d206b87f6ca9cc6ec6f1ddd", + b"0a1b78b383f580856f433c01a5741e160d451c185910027f6cc9f828687a40c4", + b"3d1745789bc63f2da15850de1c12a5bf46ed81e1cc90f086148b1662e79aab3d", + b"9095b61e14d8990acf390905621e62b1714fb8e399fbb71de5510e0aef45affe", + b"0a332b91b8fab564e6afd1dd452449e04619b18accc0ff9aa8393cd4928451f2", + b"2336f0181d264aed6d8f3a6507ca14a8d3b3c3a23791ac263e845d208c1ee330", + b"b4ce56e300590500360c146c6452edbede25d4ed83919278749ee5dbe178e048", + b"933f6ddc848ea562e4f9c5cfb5f176941301dad0c6fdb9d1fbbe34fac1be6966", + b"b95a6222958a86f74c030be27c44f57dbe313e5e7c7f4ffb98bcbd3a03bb52f2", + b"daeeb3ce5923defd0faeb8e0c210b753b85b809445d7d3d3cd537a9aabaa9c45", + b"7fadd0a13e9138a2aa6c3fdec4e2275af233b94812784f66bcca9aa8e989f2bc", + b"1864e6ba3e32878610546539734fb5eeae2529991f130c575c73a7e25a2a7c56", + b"12842d1202b1dc6828a17ab253c02e7ce9409b5192430feba44189f39cc02d66", + b"29af64b16fa3053c13d02ac63aa75b23aa468506e44c3a2315edc85d2dc22b11", + b"b527b99934a0bd9edd154e449b0502e2c499bba783f3bc3dfe23364b6b532009", + b"4584db8ae8e351ace08e01f306378a92bfd43611714814f3d834a2842d69faa8", + b"86a9a41573b0d6e4292f01e93243d6cc65b30f06606fc6fa57390e7e90ed580f", + b"a88b98fbe84d4c6aae8db9d1605dfac059d9f03fe0fcb0d5dff1295dacba09e6", + b"06326dc617a6d1f7021dc536026dbfd5fffc6f7c5531d48ef6ccd1ed1569f2a1", + b"f41fe8fdc3a2e4e8345e30216e7ebecffee26ff266eeced208a6c2a3cf08f960", + b"40cf5bde8abb76983f3e98ba97aa36240402975674e120f234b3448911090f8d", + b"b3222dc8658538079883d980d7fdc2bef9285344ea34338968f736b04aeb387a"] + + raw_hex_leaf = ( + b"00000000013de9d2b29b000000055b308205573082043fa00302010202072b777b56df" + b"7bc5300d06092a864886f70d01010505003081ca310b30090603550406130255533110" + b"300e060355040813074172697a6f6e61311330110603550407130a53636f7474736461" + b"6c65311a3018060355040a1311476f44616464792e636f6d2c20496e632e3133303106" + b"0355040b132a687474703a2f2f6365727469666963617465732e676f64616464792e63" + b"6f6d2f7265706f7369746f72793130302e06035504031327476f204461646479205365" + b"637572652043657274696669636174696f6e20417574686f726974793111300f060355" + b"040513083037393639323837301e170d3133303131343038353035305a170d31353031" + b"31343038353035305a305331163014060355040a130d7777772e69646e65742e6e6574" + b"3121301f060355040b1318446f6d61696e20436f6e74726f6c2056616c696461746564" + b"311630140603550403130d7777772e69646e65742e6e657430820122300d06092a8648" + b"86f70d01010105000382010f003082010a0282010100d4e4a4b1bbc981c9b8166f0737" + b"c113000aa5370b21ad86a831a379de929db258f056ba0681c50211552b249a02ec00c5" + b"37e014805a5b5f4d09c84fdcdfc49310f4a9f9004245d119ce5461bc5c42fd99694b88" + b"388e035e333ac77a24762d2a97ea15622459cc4adcd37474a11c7cff6239f810120f85" + b"e014d2066a3592be604b310055e84a74c91c6f401cb7f78bdb45636fb0b1516b04c5ee" + b"7b3fa1507865ff885d2ace21cbb28fdaa464efaa1d5faab1c65e4c46d2139175448f54" + b"b5da5aea956719de836ac69cd3a74ca049557cee96f5e09e07ba7e7b4ebf9bf167f4c3" + b"bf8039a4cab4bec068c899e997bca58672bd7686b5c85ea24841e48c46f76830390203" + b"010001a38201b6308201b2300f0603551d130101ff04053003010100301d0603551d25" + b"0416301406082b0601050507030106082b06010505070302300e0603551d0f0101ff04" + b"04030205a030330603551d1f042c302a3028a026a0248622687474703a2f2f63726c2e" + b"676f64616464792e636f6d2f676473312d38332e63726c30530603551d20044c304a30" + b"48060b6086480186fd6d010717013039303706082b06010505070201162b687474703a" + b"2f2f6365727469666963617465732e676f64616464792e636f6d2f7265706f7369746f" + b"72792f30818006082b0601050507010104743072302406082b06010505073001861868" + b"7474703a2f2f6f6373702e676f64616464792e636f6d2f304a06082b06010505073002" + b"863e687474703a2f2f6365727469666963617465732e676f64616464792e636f6d2f72" + b"65706f7369746f72792f67645f696e7465726d6564696174652e637274301f0603551d" + b"23041830168014fdac6132936c45d6e2ee855f9abae7769968cce730230603551d1104" + b"1c301a820d7777772e69646e65742e6e6574820969646e65742e6e6574301d0603551d" + b"0e041604144d3ae8a87ddcf046764021b87e7d8d39ddd18ea0300d06092a864886f70d" + b"01010505000382010100ad651b199f340f043732a71178c0af48e22877b9e5d99a70f5" + b"d78537c31d6516e19669aa6bfdb8b2cc7a145ba7d77b35101f9519e03b58e692732314" + b"1383c3ab45dc219bd5a584a2b6333b6e1bbef5f76e89b3c187ef1d3b853b4910e895a4" + b"57dbe7627e759f56c8484c30b22a74fb00f7b1d7c41533a1fd176cd2a2b06076acd7ca" + b"ddc6ca6d0c2a815f9eb3ef0d03d27e7eebd7824c78fdb51679c03278cfbb2d85ae65a4" + b"7485cb733fc1c7407834f7471ababd68f140983817c6f388b2f2e2bfe9e26608f9924f" + b"16473462d136427d1f2801e4b870b078c20ec4ba21e22ab32a00b76522d523825bcabb" + b"8c7b6142d624be8d2af69ecc36fb5689572a0f59c00000") + + leaf_hash = ( + b"7a395c866d5ecdb0cccb623e011dbc392cd348d1d1d72776174e127a24b09c78") + leaf_index = 848049 + tree_size = 3630887 + expected_root_hash = ( + b"78316a05c9bcf14a3a4548f5b854a9adfcd46a4c034401b3ce7eb7ac2f1d0ecb") + + def setUp(self): + self.verifier = ledger.merkle_verifier.MerkleVerifier(HexTreeHasher()) + self.STH = namedtuple("STH", ["sha256_root_hash", "tree_size"]) + self.ones = b"11" * 32 + self.zeros = b"00" * 32 + + def test_verify_tree_consistency(self): + verifier = ledger.merkle_verifier.MerkleVerifier(HexTreeHasher()) + for test_vector in MerkleVerifierTest.sha256_proofs: + self.assertTrue(verifier.verify_tree_consistency(*test_vector)) + + def test_verify_tree_consistency_always_accepts_empty_tree(self): + verifier = ledger.merkle_verifier.MerkleVerifier(HexTreeHasher()) + # Give some bogus proof too; it should be ignored. + self.assertTrue(verifier.verify_tree_consistency( + 0, 1, + b"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + b"6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d", + [b"6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d"] + )) + + def test_verify_tree_consistency_for_equal_tree_sizes(self): + verifier = ledger.merkle_verifier.MerkleVerifier(HexTreeHasher()) + # Equal tree sizes and hashes, and a bogus proof that should be ignored. + self.assertTrue(verifier.verify_tree_consistency( + 3, 3, + b"6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d", + b"6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d", + [b"6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d"] + )) + + # Equal tree sizes but different hashes. + self.assertRaises( + error.ConsistencyError, verifier.verify_tree_consistency, 3, 3, + b"6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01e", + b"6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d", + []) + + def test_verify_tree_consistency_newer_tree_is_smaller(self): + verifier = ledger.merkle_verifier.MerkleVerifier(HexTreeHasher()) + self.assertRaises( + ValueError, verifier.verify_tree_consistency, 5, 2, + b"4e3bbb1f7b478dcfe71fb631631519a3bca12c9aefca1612bfce4c13a86264d4", + b"fac54203e7cc696cf0dfcb42c92a1d9dbaf70ad9e621f4bd8d98662f00e3c125", + [b"5f083f0a1a33ca076a95279832580db3e0ef4584bdff1f54c8a360f50de3031e", + b"bc1a0643b12e4d2d7c77918f44e0f4f79a838b6cf9ec5b5c283e1f4d88599e6b"] + ) + + def test_verify_tree_consistency_proof_too_short(self): + verifier = ledger.merkle_verifier.MerkleVerifier(HexTreeHasher()) + self.assertRaises( + error.ProofError, verifier.verify_tree_consistency, 6, 8, + b"76e67dadbcdf1e10e1b74ddc608abd2f98dfb16fbce75277b5232a127f2087ef", + b"5dc9da79a70659a9ad559cb701ded9a2ab9d823aad2f4960cfe370eff4604328", + [b"0ebc5d3437fbe2db158b9f126a1d118e308181031d0a949f8dededebc558ef6a", + b"ca854ea128ed050b41b35ffc1b87b8eb2bde461e9e3b5596ece6b9d5975a0ae0"] + ) + + def test_verify_tree_consistency_bad_second_hash(self): + verifier = ledger.merkle_verifier.MerkleVerifier(HexTreeHasher()) + # A bit has been flipped in the second hash. + self.assertRaises( + error.ProofError, verifier.verify_tree_consistency, 6, 8, + b"76e67dadbcdf1e10e1b74ddc608abd2f98dfb16fbce75277b5232a127f2087ef", + b"5dc9da79a70659a9ad559cb701ded9a2ab9d823aad2f4960cfe370eff4604329", + [b"0ebc5d3437fbe2db158b9f126a1d118e308181031d0a949f8dededebc558ef6a", + b"ca854ea128ed050b41b35ffc1b87b8eb2bde461e9e3b5596ece6b9d5975a0ae0", + b"d37ee418976dd95753c1c73862b9398fa2a2cf9b4ff0fdfe8b30cd95209614b7"] + ) + + def test_verify_tree_consistency_both_hashes_bad(self): + verifier = ledger.merkle_verifier.MerkleVerifier(HexTreeHasher()) + # A bit has been flipped in both hashes. + self.assertRaises( + error.ProofError, verifier.verify_tree_consistency, 6, 8, + b"76e67dadbcdf1e10e1b74ddc608abd2f98dfb16fbce75277b5232a127f2087ee", + b"5dc9da79a70659a9ad559cb701ded9a2ab9d823aad2f4960cfe370eff4604329", + [b"0ebc5d3437fbe2db158b9f126a1d118e308181031d0a949f8dededebc558ef6a", + b"ca854ea128ed050b41b35ffc1b87b8eb2bde461e9e3b5596ece6b9d5975a0ae0", + b"d37ee418976dd95753c1c73862b9398fa2a2cf9b4ff0fdfe8b30cd95209614b7"] + ) + + def test_verify_tree_consistency_bad_first_hash(self): + verifier = ledger.merkle_verifier.MerkleVerifier(HexTreeHasher()) + # A bit has been flipped in the first hash. + self.assertRaises( + error.ConsistencyError, verifier.verify_tree_consistency, 6, 8, + b"76e67dadbcdf1e10e1b74ddc608abd2f98dfb16fbce75277b5232a127f2087ee", + b"5dc9da79a70659a9ad559cb701ded9a2ab9d823aad2f4960cfe370eff4604328", + [b"0ebc5d3437fbe2db158b9f126a1d118e308181031d0a949f8dededebc558ef6a", + b"ca854ea128ed050b41b35ffc1b87b8eb2bde461e9e3b5596ece6b9d5975a0ae0", + b"d37ee418976dd95753c1c73862b9398fa2a2cf9b4ff0fdfe8b30cd95209614b7"] + ) + + def test_calculate_root_hash_good_proof(self): + verifier = ledger.merkle_verifier.MerkleVerifier(HexTreeHasher()) + root_hash = verifier._calculate_root_hash_from_audit_path( + self.leaf_hash, self.leaf_index, self.sha256_audit_path[:], + self.tree_size) + self.assertEqual(root_hash, self.expected_root_hash) + + def test_calculate_root_too_short_proof(self): + verifier = ledger.merkle_verifier.MerkleVerifier(HexTreeHasher()) + leaf_index = self.leaf_index + int( + math.pow(2, len(self.sha256_audit_path) + 1)) + self.assertRaises( + error.ProofError, + verifier._calculate_root_hash_from_audit_path, + self.leaf_hash, leaf_index, self.sha256_audit_path[:], + self.tree_size) + + def test_verify_leaf_inclusion_good_proof(self): + verifier = ledger.merkle_verifier.MerkleVerifier(HexTreeHasher()) + sth = self.STH(self.expected_root_hash, self.tree_size) + self.assertTrue( + verifier.verify_leaf_inclusion( + self.raw_hex_leaf, self.leaf_index, self.sha256_audit_path, + sth)) + + def test_verify_leaf_inclusion_bad_proof(self): + verifier = ledger.merkle_verifier.MerkleVerifier(HexTreeHasher()) + # Expect this test to fail by providing an incorrect root hash. + sth = self.STH(self.zeros, self.tree_size) + self.assertRaises( + error.ProofError, verifier.verify_leaf_inclusion, + self.raw_hex_leaf, self.leaf_index, self.sha256_audit_path, sth) + + def test_verify_leaf_inclusion_incorrect_length_proof(self): + verifier = ledger.merkle_verifier.MerkleVerifier(HexTreeHasher()) + sth = self.STH(self.zeros, 4) + # Too long a proof + self.assertRaises( + error.ProofError, verifier.verify_leaf_inclusion, + self.ones, 0, [self.zeros, self.zeros, self.zeros], sth) + # Too short a proof + self.assertRaises( + error.ProofError, verifier.verify_leaf_inclusion, + self.ones, 0, [self.zeros], sth) + + def test_verify_leaf_inclusion_single_node_in_tree(self): + # If there is only one entry in the tree, the tree root hash should be + # equal to the leaf hash. + verifier = ledger.merkle_verifier.MerkleVerifier(HexTreeHasher()) + sth = self.STH(self.leaf_hash, 1) + self.assertTrue( + verifier.verify_leaf_inclusion(self.raw_hex_leaf, 0, [], sth)) + + def test_verify_leaf_inclusion_rightmost_node_in_tree(self): + # Show that verify_leaf_inclusion works when required to check a proof + # for the right-most node: In a tree of 8 nodes, ask for inclusion + # proof check for leaf 7. + verifier = ledger.merkle_verifier.MerkleVerifier(HexTreeHasher()) + hh = HexTreeHasher() + h_s1 = hh.hash_leaf(self.ones) + h_c3 = hh.hash_children(self.zeros, h_s1) + h_c2 = hh.hash_children(self.zeros, h_c3) + h_root = hh.hash_children(self.zeros, h_c2) + sth = self.STH(h_root, 8) + self.assertTrue( + verifier.verify_leaf_inclusion( + self.ones, 7, [self.zeros, self.zeros, self.zeros], sth)) + + def test_verify_leaf_inclusion_rightmost_node_in_unbalanced_odd_tree( + self): + # Show that verify_leaf_inclusion works when required to check a proof + # for the right-most, even-indexed node: In a tree of 5 nodes, ask for + # inclusion proof check for leaf 4 (the 5th). + verifier = ledger.merkle_verifier.MerkleVerifier(HexTreeHasher()) + hh = HexTreeHasher() + h_s1 = hh.hash_leaf(self.ones) + h_root = hh.hash_children(self.zeros, h_s1) + sth = self.STH(h_root, 5) + self.assertTrue( + verifier.verify_leaf_inclusion(self.ones, 4, [self.zeros, ], sth)) + + def test_verify_leaf_inclusion_rightmost_node_in_unbalanced_tree_odd_node( + self): + # Show that verify_leaf_inclusion works when required to check a proof + # for the right-most, odd-indexed node: In a tree of 6 nodes, ask for + # inclusion proof check for leaf 5 (the 6th). + verifier = ledger.merkle_verifier.MerkleVerifier(HexTreeHasher()) + hh = HexTreeHasher() + h_s1 = hh.hash_leaf(self.ones) + h_l2 = hh.hash_children(self.zeros, h_s1) + h_root = hh.hash_children(self.zeros, h_l2) + sth = self.STH(h_root, 6) + self.assertTrue( + verifier.verify_leaf_inclusion( + self.ones, 5, [self.zeros, self.zeros], sth)) + + def test_verify_leaf_inclusion_rightmost_node_in_unbalanced_even_tree( + self): + # Show that verify_leaf_inclusion works when required to check a proof + # for the right-most, odd-indexed node: In a tree of 6 nodes, ask for + # inclusion proof check for leaf 4 (the 5th). + verifier = ledger.merkle_verifier.MerkleVerifier(HexTreeHasher()) + hh = HexTreeHasher() + h_s1 = hh.hash_leaf(self.ones) + h_l2 = hh.hash_children(h_s1, self.zeros) + h_root = hh.hash_children(self.zeros, h_l2) + sth = self.STH(h_root, 6) + self.assertTrue( + verifier.verify_leaf_inclusion( + self.ones, 4, [self.zeros, self.zeros], sth)) + + def test_verify_leaf_inclusion_throws_on_bad_indices(self): + verifier = ledger.merkle_verifier.MerkleVerifier(HexTreeHasher()) + sth = self.STH("", 6) + self.assertRaises(ValueError, + verifier.verify_leaf_inclusion, "", -3, [], sth) + negative_sth = self.STH("", -3) + self.assertRaises(ValueError, + verifier.verify_leaf_inclusion, "", 3, [], + negative_sth) + + def test_verify_leaf_inclusion_all_nodes_all_tree_sizes_up_to_4(self): + leaves = ["aa", "bb", "cc", "dd"] + hh = HexTreeHasher() + leaf_hashes = [hh.hash_leaf(l) for l in leaves] + hc = hh.hash_children + proofs_per_tree_size = { + 1: [[] ], + 2: [[leaf_hashes[1]], [leaf_hashes[0]]], + 3: [[leaf_hashes[1], leaf_hashes[2]], # leaf 0 + [leaf_hashes[0], leaf_hashes[2]], # leaf 1 + [hc(leaf_hashes[0], leaf_hashes[1])]], # leaf 2 + 4: [[leaf_hashes[1], hc(leaf_hashes[2], leaf_hashes[3])], # leaf 0 + [leaf_hashes[0], hc(leaf_hashes[2], leaf_hashes[3])], # leaf 1 + [leaf_hashes[3], hc(leaf_hashes[0], leaf_hashes[1])], # leaf 2 + [leaf_hashes[2], hc(leaf_hashes[0], leaf_hashes[1])], # leaf 3 + ] + } + tree = compact_merkle_tree.CompactMerkleTree(hasher=HexTreeHasher()) + verifier = ledger.merkle_verifier.MerkleVerifier(HexTreeHasher()) + # Increase the tree by one leaf each time + for i in range(4): + tree.append(leaves[i]) + tree_size = i + 1 + # ... and check inclusion proof validates for each node + # of the tree + for j in range(tree_size): + proof = proofs_per_tree_size[tree_size][j] + sth = self.STH(tree.root_hash, tree_size) + self.assertTrue( + verifier.verify_leaf_inclusion( + leaves[j], j, proof, sth)) + + +if __name__ == "__main__": + unittest.main() diff --git a/ledger/test/test_chunked_file_store.py b/ledger/test/test_chunked_file_store.py new file mode 100644 index 0000000000..343dff2a73 --- /dev/null +++ b/ledger/test/test_chunked_file_store.py @@ -0,0 +1,153 @@ +import os +import random + +import math +from time import perf_counter + +import itertools +import pytest + +from ledger.stores.chunked_file_store import ChunkedFileStore +from ledger.stores.text_file_store import TextFileStore + + +def countLines(fname) -> int: + with open(fname) as f: + return sum(1 for _ in f) + + +def getValue(key) -> str: + return str(key) + " Some data" + + +chunkSize = 3 +dataSize = 101 +data = [getValue(i) for i in range(1, dataSize+1)] + + +@pytest.fixture(scope="module") +def chunkedTextFileStore() -> ChunkedFileStore: + return ChunkedFileStore("/tmp", "chunked_data", True, True, chunkSize, + chunkStoreConstructor=TextFileStore) + + +@pytest.yield_fixture(scope="module") +def populatedChunkedFileStore(chunkedTextFileStore) -> ChunkedFileStore: + store = chunkedTextFileStore + store.reset() + dirPath = "/tmp/chunked_data" + for d in data: + store.put(d) + assert len(os.listdir(dirPath)) == math.ceil(dataSize / chunkSize) + assert all(countLines(dirPath + os.path.sep + f) <= chunkSize + for f in os.listdir(dirPath)) + yield store + store.close() + + +def testWriteToNewFileOnceChunkSizeIsReached(populatedChunkedFileStore): + pass + + +def testRandomRetrievalFromChunkedFiles(populatedChunkedFileStore): + keys = [2*chunkSize, + 3*chunkSize+1, + 3*chunkSize+chunkSize, + random.randrange(1, dataSize + 1)] + for key in keys: + value = getValue(key) + assert populatedChunkedFileStore.get(key) == value + + +def testSizeChunkedFileStore(populatedChunkedFileStore): + """ + Check performance of `numKeys` + """ + s = perf_counter() + c1 = sum(1 for l in populatedChunkedFileStore.iterator()) + e = perf_counter() + t1 = e - s + s = perf_counter() + c2 = populatedChunkedFileStore.numKeys + e = perf_counter() + t2 = e - s + # It should be faster to use ChunkedStore specific implementation + # of `numKeys` + assert t1 > t2 + assert c1 == c2 + assert c2 == dataSize + + +def testIterateOverChunkedFileStore(populatedChunkedFileStore): + store = populatedChunkedFileStore + for k, v in store.iterator(): + assert data[int(k)-1] == v + + +def test_get_range(populatedChunkedFileStore): + # Test for range spanning multiple chunks + + # Range begins and ends at chunk boundaries + num = 0 + for k, v in populatedChunkedFileStore.get_range(chunkSize+1, 2*chunkSize): + assert data[int(k) - 1] == v + num += 1 + assert num == chunkSize + + # Range does not begin or end at chunk boundaries + num = 0 + for k, v in populatedChunkedFileStore.get_range(chunkSize+2, 2*chunkSize+1): + assert data[int(k) - 1] == v + num += 1 + assert num == chunkSize + + # Range spans multiple full chunks + num = 0 + for k, v in populatedChunkedFileStore.get_range(chunkSize + 2, + 5 * chunkSize + 1): + assert data[int(k) - 1] == v + num += 1 + assert num == 4*chunkSize + + with pytest.raises(AssertionError): + list(populatedChunkedFileStore.get_range(5, 1)) + + for frm, to in [(i, j) for i, j in itertools.permutations( + range(1, dataSize+1), 2) if i <= j]: + for k, v in populatedChunkedFileStore.get_range(frm, to): + assert data[int(k) - 1] == v + + +def test_chunk_size_limitation_when_default_file_used(tmpdir): + """ + This test checks that chunk size can not be lower then a number of items + in default file, used for initialization of ChunkedFileStore + """ + + isLineNoKey = True + storeContentHash = False + ensureDurability = True + dbDir = str(tmpdir) + defaultFile = os.path.join(dbDir, "template") + + lines = [ + "FirstLine\n", + "OneMoreLine\n", + "AnotherLine\n", + "LastDefaultLine\n" + ] + with open(defaultFile, "w") as f: + f.writelines(lines) + + chunkSize = len(lines) - 1 + + with pytest.raises(ValueError) as err: + ChunkedFileStore(dbDir=dbDir, + dbName="chunked_data", + isLineNoKey=isLineNoKey, + storeContentHash=storeContentHash, + chunkSize=chunkSize, + ensureDurability=ensureDurability, + chunkStoreConstructor=TextFileStore, + defaultFile=defaultFile) + assert "Default file is larger than chunk size" in str(err) diff --git a/ledger/test/test_compact_serializer.py b/ledger/test/test_compact_serializer.py new file mode 100644 index 0000000000..b2cf8adfd5 --- /dev/null +++ b/ledger/test/test_compact_serializer.py @@ -0,0 +1,207 @@ +from collections import OrderedDict + +import pytest +from ledger.serializers.compact_serializer import CompactSerializer + +fields = OrderedDict([ + ("f1", (str, str)), + ("f2", (str, str)), + ("f3", (str, int)), + ("f4", (str, float)), + ("f5", (str, eval)) +]) + +@pytest.fixture(scope='function') +def serializer(): + return CompactSerializer(fields) + +def testInitCompactSerializerWithCorrectFileds(): + CompactSerializer(fields) + +def testInitCompactSerializerNoFileds(): + CompactSerializer() + +def testInitCompactSerializerEmptyFileds(): + fields = OrderedDict([]) + CompactSerializer(fields) + +# TODO: add tests to distinguish None and empty values + +def testSerializeSimpleJson(serializer): + assert b"v1|v2|3|4.0|True" == \ + serializer.serialize( + {"f1": "v1", "f2": "v2", "f3": 3, "f4": 4.0, "f5": True} + ) + + +def testDeserializeSimpleJson(serializer): + assert {"f1": "v1", "f2": "v2", "f3": 3, "f4": 4.0, "f5": True} == \ + serializer.deserialize(b"v1|v2|3|4.0|True") + + +def testSerializeDeserializeSimpleJson(serializer): + json = {"f1": "v1", "f2": "v2", "f3": 3, "f4": 4.0, "f5": True} + serialized = serializer.serialize(json) + deserialized = serializer.deserialize(serialized) + + assert json == deserialized + + +def testSerializeToBytes(serializer): + assert b"v1|v2|3|4.0|True" == \ + serializer.serialize( + {"f1": "v1", "f2": "v2", "f3": 3, "f4": 4.0, "f5": True}, + toBytes=True + ) + +def testSerializeToString(serializer): + assert "v1|v2|3|4.0|True" == \ + serializer.serialize( + {"f1": "v1", "f2": "v2", "f3": 3, "f4": 4.0, "f5": True}, + toBytes=False + ) + +def testDeserializeFromBytes(serializer): + assert {"f1": "v1", "f2": "v2", "f3": 3, "f4": 4.0, "f5": True} == \ + serializer.deserialize(b"v1|v2|3|4.0|True") + + +def testSerializeToFields(serializer): + newFields = OrderedDict([ + ("ff1", (str, str)), + ("ff2", (str, str)), + ("ff3", (str, int)) + ]) + assert b"v1|v2|3" == \ + serializer.serialize( + {"ff1": "v1", "ff2": "v2", "ff3": 3}, + fields=newFields + ) + +def testDeserializeFromString(serializer): + assert {"f1": "v1", "f2": "v2", "f3": 3, "f4": 4.0, "f5": True} == \ + serializer.deserialize("v1|v2|3|4.0|True") + +def testDeserializeForFields(serializer): + newFields = OrderedDict([ + ("ff1", (str, str)), + ("ff2", (str, str)), + ("ff3", (str, int)) + ]) + assert {"ff1": "v1", "ff2": "v2", "ff3": 3} == \ + serializer.deserialize("v1|v2|3", fields=newFields) + + +def testSerializeLessFields(serializer): + assert b"|v1|2||" == serializer.serialize({"f2": "v1", "f3": 2}) + assert b"v1||||" == serializer.serialize({"f1": "v1"}) + assert b"||||" == serializer.serialize({}) + assert b"||||3" == serializer.serialize({"f5": 3}) + assert b"v1||||3" == serializer.serialize({"f1": "v1", "f5": 3}) + + +def testDeserializeLessFields(serializer): + assert {"f1": None, "f2": "v1", "f3": 2, "f4": None, "f5": None} == serializer.deserialize(b"|v1|2||") + assert {"f1": "v1", "f2": None, "f3": None, "f4": None, "f5": None} == serializer.deserialize(b"v1||||") + assert {"f1": None, "f2": None, "f3": None, "f4": None, "f5": None} == serializer.deserialize(b"|||||") + assert {"f1": None, "f2": None, "f3": None, "f4": None, "f5": True} == serializer.deserialize(b"||||True" ) + assert {"f1": "v1", "f2": None, "f3": None, "f4": None, "f5": False} == serializer.deserialize(b"v1||||False") + + +def testSerializeLessFieldsWithNone(serializer): + assert b"|v1|2||" == serializer.serialize({"f1": None, "f2": "v1", "f3": 2}) + assert b"v1||||" == serializer.serialize({"f1": "v1", "f2": None, "f3": None, "f4": None, "f5": None}) + assert b"||||" == serializer.serialize({"f1": None, "f2": None, "f3": None, "f4": None, "f5": None}) + assert b"||||3" == serializer.serialize({"f1": None, "f5": 3}) + assert b"||||3" == serializer.serialize({"f1": None, "f2": None, "f3": None, "f4": None, "f5": 3}) + assert b"v1||||3" == serializer.serialize({"f1": "v1", "f2": None, "f3": None, "f4": None, "f5": 3}) + assert b"v1||||3" == serializer.serialize({"f1": "v1", "f3": None, "f5": 3}) + +def testSerializeInAnyOrder(serializer): + assert b"|v1|2||" == serializer.serialize(OrderedDict([("f3", 2), ("f2", "v1")])) + assert b"v1||||3" == serializer.serialize(OrderedDict([("f5", 3), ("f1", "v1")])) + assert b"v1|v2|3|4.0|True" == \ + serializer.serialize( + OrderedDict([("f2", "v2"), ("f5", True), ("f3", 3), ("f1", "v1"), ("f4", 4.0)]) + ) + +def testSerializeSubfields(): + fields = OrderedDict([ + ("f1.a", (str, str)), + ("f1.b", (str, int)), + ("f1.c", (str, float)), + ("f2.d", (str, str)), + ("f2.e", (str, int)), + ("f2.f", (str, float)), + + ]) + serializer = CompactSerializer(fields) + + json = { + "f1":{"a": "v1", "b": 2, "c": 3.0}, + "f2": {"d": "v1", "e": 3, "f": 4.0}, + } + assert b"v1|2|3.0|v1|3|4.0" == serializer.serialize(json) + + +def testDeserializeSubfields(): + fields = OrderedDict([ + ("f1.a", (str, str)), + ("f1.b", (str, int)), + ("f1.c", (str, float)), + ("f2.d", (str, str)), + ("f2.e", (str, int)), + ("f2.f", (str, float)), + + ]) + serializer = CompactSerializer(fields) + + json = { + "f1":{"a": "v1", "b": 2, "c": 3.0}, + "f2": {"d": "v1", "e": 3, "f": 4.0}, + } + assert json == serializer.deserialize(b"v1|2|3.0|v1|3|4.0") + + +def testSerializeWrongFields(serializer): + assert b"||||" == serializer.serialize({"wrongField": "v1"}) + assert b"||||" == serializer.serialize({"wrongField1": "v1", "wrongField2": "v2"}) + assert b"v1|v2|3|4.0|True" == \ + serializer.serialize( + {"f1": "v1", "f2": "v2", "f3": 3, "f4": 4.0, "f5": True, "wrongField": "vvv"} + ) + assert b"v1|v2|3|4.0|True" == \ + serializer.serialize( + {"wrongField": "vvv", "f1": "v1", "f2": "v2", "f3": 3, "f4": 4.0, "f5": True} + ) + assert b"v1||||" == \ + serializer.serialize( + {"wrongField": "vvv", "f1": "v1"} + ) + + +def testDeserializeNewFieldsAdded(serializer): + assert {"f1": "v1", "f2": "v2", "f3": 2, "f4": None, "f5": None} == serializer.deserialize(b"v1|v2|2") + assert {"f1": "v1", "f2": "v2", "f3": None, "f4": None, "f5": None} == serializer.deserialize(b"v1|v2|") + assert {"f1": "v1", "f2": None, "f3": None, "f4": None, "f5": None} == serializer.deserialize(b"v1||") + assert {"f1": None, "f2": None, "f3": 3, "f4": None, "f5": None} == serializer.deserialize(b"||3") + assert {"f1": None, "f2": "v2", "f3": None, "f4": None, "f5": None} == serializer.deserialize(b"|v2|") + + assert {"f1": None, "f2": None, "f3": None, "f4": None, "f5": None} == serializer.deserialize(b"||") + assert {"f1": None, "f2": None, "f3": None, "f4": None, "f5": None} == serializer.deserialize(b"") + assert {"f1": None, "f2": None, "f3": None, "f4": None, "f5": None} == serializer.deserialize(b"|") + assert {"f1": None, "f2": None, "f3": None, "f4": None, "f5": None} == serializer.deserialize(b"||||") + + +def testDeserializeFieldsRemoved(serializer): + assert {"f1": "v1", "f2": "v2", "f3": 2, "f4": 4.0, "f5": True} == serializer.deserialize(b"v1|v2|2|4.0|True|fff") + assert {"f1": "v1", "f2": "v2", "f3": 2, "f4": 4.0, "f5": True} == serializer.deserialize(b"v1|v2|2|4.0|True|fff|dddd|eeee") + assert {"f1": "v1", "f2": "v2", "f3": None, "f4": None, "f5": None} == serializer.deserialize(b"v1|v2||||gggg") + assert {"f1": "v1", "f2": None, "f3": None, "f4": None, "f5": None} == serializer.deserialize(b"v1|||||||||||||") + assert {"f1": None, "f2": None, "f3": 3, "f4": None, "f5": None} == serializer.deserialize(b"||3||||||sdsd|||eee") + assert {"f1": None, "f2": "v2", "f3": None, "f4": None, "f5": None} == serializer.deserialize(b"|v2|||||||dfdds||sdsd|") + + assert {"f1": None, "f2": None, "f3": None, "f4": None, "f5": None} == serializer.deserialize(b"||||||") + assert {"f1": None, "f2": None, "f3": None, "f4": None, "f5": None} == serializer.deserialize(b"|||||||||||||||") + + diff --git a/ledger/test/test_fields.py b/ledger/test/test_fields.py new file mode 100644 index 0000000000..5a303091e7 --- /dev/null +++ b/ledger/test/test_fields.py @@ -0,0 +1,34 @@ +import pytest +from ledger.serializers.field import Field + + +def testCorrectField(): + Field("field1", str, str) + Field("field1", str, int) + Field("field1", int, str) + Field("field1", str, float) + Field("field1", str, bool) + +def testNoName(): + with pytest.raises(AssertionError): + Field(None, str, str) + +def testInvalidName(): + with pytest.raises(AssertionError): + Field(1111, str, str) + +def testNoEncoder(): + with pytest.raises(AssertionError): + Field("field1", None, str) + +def testInvalidEncoder(): + with pytest.raises(AssertionError): + Field("field1", "encoder", str) + +def testNoDecoder(): + with pytest.raises(AssertionError): + Field("field1", str, None) + +def testInvalidDecoder(): + with pytest.raises(AssertionError): + Field("field1", str, "decoder") \ No newline at end of file diff --git a/ledger/test/test_file_hash_store.py b/ledger/test/test_file_hash_store.py new file mode 100644 index 0000000000..c3f41774a9 --- /dev/null +++ b/ledger/test/test_file_hash_store.py @@ -0,0 +1,100 @@ +import string +from hashlib import sha256 +from random import choice, randint + +import pytest + +from ledger.stores.file_hash_store import FileHashStore + + +@pytest.fixture(scope="module") +def nodesLeaves(): + return [(randint(0, 1000000), randint(0, 255), h) + for h in generateHashes(10)], generateHashes(10) + + +def writtenFhs(tempdir, nodes, leaves): + fhs = FileHashStore(tempdir) + assert fhs.is_persistent + for leaf in leaves: + fhs.writeLeaf(leaf) + for node in nodes: + fhs.writeNode(node) + return fhs + + +def generateHashes(count=10): + return [sha256( + (choice(string.ascii_letters) * (randint(i, 1000) if i < 1000 + else randint(1000, i))).encode() + ).digest() for i in range(count)] + + +def testSimpleReadWrite(nodesLeaves, tempdir): + nodes, leaves = nodesLeaves + fhs = FileHashStore(tempdir) + + for leaf in leaves: + fhs.writeLeaf(leaf) + for i, leaf in enumerate(leaves): + assert leaf == fhs.readLeaf(i + 1) + + for node in nodes: + fhs.writeNode(node) + for i, node in enumerate(nodes): + assert node[2] == fhs.readNode(i + 1) + + lvs = fhs.readLeafs(1, len(leaves)) + for i, l in enumerate(lvs): + assert leaves[i] == l + + nds = fhs.readNodes(1, len(nodes)) + for i, n in enumerate(nds): + assert nodes[i][2] == n + + # Check that hash store can be closed and re-opened and the contents remain same + leaf_count = fhs.leafCount + node_count = fhs.nodeCount + fhs.close() + reopened_hash_store = FileHashStore(tempdir) + assert reopened_hash_store.leafCount == leaf_count + assert reopened_hash_store.nodeCount == node_count + + +def testIncorrectWrites(tempdir): + fhs = FileHashStore(tempdir, leafSize=50, nodeSize=50) + + with pytest.raises(ValueError): + fhs.writeLeaf(b"less than 50") + with pytest.raises(ValueError): + fhs.writeNode((8, 1, b"also less than 50")) + + with pytest.raises(ValueError): + fhs.writeLeaf(b"more than 50" + b'1'*50) + with pytest.raises(ValueError): + fhs.writeNode((4, 1, b"also more than 50" + b'1'*50)) + + +def testRandomAndRepeatedReads(nodesLeaves, tempdir): + nodes, leaves = nodesLeaves + fhs = writtenFhs(tempdir=tempdir, nodes=nodes, leaves=leaves) + + for i in range(10): + idx = choice(range(len(leaves))) + assert leaves[idx] == fhs.readLeaf(idx + 1) + + for i in range(10): + idx = choice(range(len(nodes))) + assert nodes[idx][2] == fhs.readNode(idx + 1) + + idx = len(leaves) // 2 + # Even if same leaf is read more than once it should return the + # same value. It checks for proper uses of `seek` method + assert leaves[idx] == fhs.readLeaf(idx + 1) + assert leaves[idx] == fhs.readLeaf(idx + 1) + + # Even after writing some data, the data at a previous index should not + # change + fhs.writeLeaf(leaves[-1]) + fhs.writeLeaf(leaves[0]) + assert leaves[idx] == fhs.readLeaf(idx + 1) diff --git a/ledger/test/test_file_store_perf.py b/ledger/test/test_file_store_perf.py new file mode 100644 index 0000000000..1867f2c981 --- /dev/null +++ b/ledger/test/test_file_store_perf.py @@ -0,0 +1,32 @@ +import time +from binascii import hexlify + +from ledger.stores.text_file_store import TextFileStore +from ledger.test.test_file_hash_store import generateHashes + + +def testMeasureWriteTime(tempdir): + store = TextFileStore(tempdir, 'benchWithSync', isLineNoKey=True, + storeContentHash=False) + hashes = [hexlify(h).decode() for h in generateHashes(1000)] + start = time.time() + for h in hashes: + store.put(value=h) + timeTakenWithSync = time.time() - start + store = TextFileStore(tempdir, 'benchWithoutSync', isLineNoKey=True, + storeContentHash=False, ensureDurability=False) + start = time.time() + for h in hashes: + store.put(value=h) + timeTakenWithoutSync = time.time() - start + print("Time taken to write {} entries to file with fsync is {} " + "seconds".format(len(hashes), timeTakenWithSync)) + print("Time taken to write {} entries to file without fsync is {} " + "seconds".format(len(hashes), timeTakenWithoutSync)) + print("So the difference is {} seconds". + format(timeTakenWithSync-timeTakenWithoutSync)) + # On most platforms the ratio between write time with fsync and + # write time without fsync typically must be greater than 100. + # But on Windows Server 2012 this ratio may be less - down to 30. + assert timeTakenWithoutSync*10 < timeTakenWithSync, "ratio is {}".\ + format(timeTakenWithSync/timeTakenWithoutSync) diff --git a/ledger/test/test_file_stores_equailty.py b/ledger/test/test_file_stores_equailty.py new file mode 100644 index 0000000000..0a29f773dd --- /dev/null +++ b/ledger/test/test_file_stores_equailty.py @@ -0,0 +1,56 @@ +import os +import pytest +from ledger.stores.chunked_file_store import ChunkedFileStore +from ledger.stores.text_file_store import TextFileStore + + +def test_equality_to_text_file_store(tmpdir): + """ + This test verifies that TextFileStore and ChunkedFileStore behave equally + """ + isLineNoKey = True + storeContentHash = False + ensureDurability = True + dbDir = str(tmpdir) + + + defaultFile = os.path.join(dbDir, "template") + + lines = [ + "FirstLine\n", + "OneMoreLine\n", + "AnotherLine\n", + "LastDefaultLine\n" + ] + + with open(defaultFile, "w") as f: + f.writelines(lines) + + chunkSize = len(lines) + + chunkedStore = ChunkedFileStore(dbDir=dbDir, + dbName="chunked_data", + isLineNoKey=isLineNoKey, + storeContentHash=storeContentHash, + chunkSize=chunkSize, + ensureDurability=ensureDurability, + chunkStoreConstructor=TextFileStore, + defaultFile=defaultFile) + + textStore = TextFileStore(dbDir=dbDir, + dbName="text_data", + isLineNoKey=isLineNoKey, + storeContentHash=storeContentHash, + ensureDurability=ensureDurability, + defaultFile=defaultFile) + + for i in range(1, 5 * chunkSize): + value = str(i) + chunkedStore.put(value) + textStore.put(value) + assert textStore.get(value) == chunkedStore.get(value) + + assert list(chunkedStore.iterator()) == \ + list(textStore.iterator()) + + diff --git a/ledger/test/test_json_serializer.py b/ledger/test/test_json_serializer.py new file mode 100644 index 0000000000..9a55de63a6 --- /dev/null +++ b/ledger/test/test_json_serializer.py @@ -0,0 +1,43 @@ +from ledger.serializers.json_serializer import JsonSerializer + + +def testJsonSerializer(): + sz = JsonSerializer() + m1 = {'integer': 36, 'name': 'Foo', 'surname': 'Bar', 'float': 14.8639, + 'index': 1, 'index_start_at': 56, 'email': 'foo@bar.com', + 'fullname': 'Foo Bar', 'bool': False} + m1s = '{"bool":false,"email":"foo@bar.com","float":14.8639,"fullname":"Foo Bar","index":1,"index_start_at":56,"integer":36,"name":"Foo","surname":"Bar"}' + + m2 = {'latitude': 31.351883, 'longitude': -97.466179, + 'tags': ['foo', 'bar', 'baz', 'alice', 'bob', + 'carol', 'dave']} + m2s = '{"latitude":31.351883,"longitude":-97.466179,"tags":["foo","bar","baz","alice","bob","carol","dave"]}' + + m3 = {'name': 'Alice Bob', 'website': 'example.com', 'friends': [ + { + 'id': 0, + 'name': 'Dave' + }, + { + 'id': 1, + 'name': 'Carol' + }, + { + 'id': 2, + 'name': 'Dave' + }]} + m3s = '{"friends":[{"id":0,"name":"Dave"},{"id":1,"name":"Carol"},{"id":2,"name":"Dave"}],"name":"Alice Bob","website":"example.com"}' + + assert sz.serialize(m1) == m1s.encode() + assert sz.serialize(m1, toBytes=False) == m1s + assert sz.serialize(m2) == m2s.encode() + assert sz.serialize(m2, toBytes=False) == m2s + assert sz.serialize(m3) == m3s.encode() + assert sz.serialize(m3, toBytes=False) == m3s + + assert sz.deserialize(m1s) == m1 + assert sz.deserialize(m1s.encode()) == m1 + assert sz.deserialize(m2s) == m2 + assert sz.deserialize(m2s.encode()) == m2 + assert sz.deserialize(m3s) == m3 + assert sz.deserialize(m3s.encode()) == m3 diff --git a/ledger/test/test_ledger.py b/ledger/test/test_ledger.py new file mode 100644 index 0000000000..d41d26ba8a --- /dev/null +++ b/ledger/test/test_ledger.py @@ -0,0 +1,222 @@ +import base64 +import os +import random +import string +from collections import OrderedDict + +import pytest +from ledger.ledger import Ledger +from ledger.serializers.json_serializer import JsonSerializer +from ledger.serializers.compact_serializer import CompactSerializer +from ledger.compact_merkle_tree import CompactMerkleTree +from ledger.stores.file_hash_store import FileHashStore +from ledger.test.helper import NoTransactionRecoveryLedger, \ + check_ledger_generator +from ledger.util import ConsistencyVerificationFailed, F + + +def b64e(s): + return base64.b64encode(s).decode("utf-8") + + +def b64d(s): + return base64.b64decode(s) + + +def lst2str(l): + return ",".join(l) + + +orderedFields = OrderedDict([ + ("identifier", (str, str)), + ("reqId", (str, int)), + ("op", (str, str)) +]) + +ledgerSerializer = CompactSerializer(orderedFields) + + +@pytest.fixture(scope="function") +def ledger(tempdir): + ledger = Ledger(CompactMerkleTree(hashStore=FileHashStore(dataDir=tempdir)), + dataDir=tempdir, serializer=ledgerSerializer) + ledger.reset() + return ledger + + +def testAddTxn(tempdir, ledger): + txn1 = { + 'identifier': 'cli1', + 'reqId': 1, + 'op': 'do something' + } + ledger.add(txn1) + + txn2 = { + 'identifier': 'cli1', + 'reqId': 2, + 'op': 'do something else' + } + ledger.add(txn2) + + # Check that the transaction is added to the Merkle Tree + assert ledger.size == 2 + + # Check that the data is appended to the immutable store + txn1[F.seqNo.name] = 1 + txn2[F.seqNo.name] = 2 + assert txn1 == ledger[1] + assert txn2 == ledger[2] + check_ledger_generator(ledger) + + +def testQueryMerkleInfo(tempdir, ledger): + merkleInfo = {} + for i in range(100): + txn = { + 'identifier': 'cli' + str(i), + 'reqId': i+1, + 'op': ''.join([random.choice(string.printable) for i in range( + random.randint(i+1, 100))]) + } + mi = ledger.add(txn) + seqNo = mi.pop(F.seqNo.name) + assert i+1 == seqNo + merkleInfo[seqNo] = mi + + for i in range(100): + assert merkleInfo[i+1] == ledger.merkleInfo(i+1) + + +""" +If the server holding the ledger restarts, the ledger should be fully rebuilt +from persisted data. Any incoming commands should be stashed. (Does this affect +creation of Signed Tree Heads? I think I don't really understand what STHs are.) +""" + + +def testRecoverMerkleTreeFromLedger(tempdir): + ledger2 = Ledger(CompactMerkleTree(), dataDir=tempdir, + serializer=ledgerSerializer) + assert ledger2.tree.root_hash is not None + ledger2.reset() + ledger2.stop() + + +def testRecoverLedgerFromHashStore(tempdir): + fhs = FileHashStore(tempdir) + tree = CompactMerkleTree(hashStore=fhs) + ledger = Ledger(tree=tree, dataDir=tempdir) + for d in range(100): + ledger.add(str(d).encode()) + updatedTree = ledger.tree + ledger.stop() + + tree = CompactMerkleTree(hashStore=fhs) + restartedLedger = Ledger(tree=tree, dataDir=tempdir) + assert restartedLedger.size == ledger.size + assert restartedLedger.root_hash == ledger.root_hash + assert restartedLedger.tree.hashes == updatedTree.hashes + assert restartedLedger.tree.root_hash == updatedTree.root_hash + + +def testRecoverLedgerNewFieldsToTxnsAdded(tempdir): + fhs = FileHashStore(tempdir) + tree = CompactMerkleTree(hashStore=fhs) + ledger = Ledger(tree=tree, dataDir=tempdir, serializer=ledgerSerializer) + for d in range(10): + ledger.add({"identifier": "i{}".format(d), "reqId": d, "op": "operation"}) + updatedTree = ledger.tree + ledger.stop() + + newOrderedFields = OrderedDict([ + ("identifier", (str, str)), + ("reqId", (str, int)), + ("op", (str, str)), + ("newField", (str, str)) + ]) + newLedgerSerializer = CompactSerializer(newOrderedFields) + + tree = CompactMerkleTree(hashStore=fhs) + restartedLedger = Ledger(tree=tree, dataDir=tempdir, serializer=newLedgerSerializer) + assert restartedLedger.size == ledger.size + assert restartedLedger.root_hash == ledger.root_hash + assert restartedLedger.tree.hashes == updatedTree.hashes + assert restartedLedger.tree.root_hash == updatedTree.root_hash + + +def testConsistencyVerificationOnStartupCase1(tempdir): + """ + One more node was added to nodes file + """ + fhs = FileHashStore(tempdir) + tree = CompactMerkleTree(hashStore=fhs) + ledger = Ledger(tree=tree, dataDir=tempdir) + tranzNum = 10 + for d in range(tranzNum): + ledger.add(str(d).encode()) + ledger.stop() + + # Writing one more node without adding of it to leaf and transaction logs + badNode = (None, None, ('X' * 32)) + fhs.writeNode(badNode) + + with pytest.raises(ConsistencyVerificationFailed): + tree = CompactMerkleTree(hashStore=fhs) + ledger = NoTransactionRecoveryLedger(tree=tree, dataDir=tempdir) + ledger.recoverTreeFromHashStore() + ledger.stop() + + +def testConsistencyVerificationOnStartupCase2(tempdir): + """ + One more transaction added to transactions file + """ + fhs = FileHashStore(tempdir) + tree = CompactMerkleTree(hashStore=fhs) + ledger = Ledger(tree=tree, dataDir=tempdir) + tranzNum = 10 + for d in range(tranzNum): + ledger.add(str(d).encode()) + + # Adding one more entry to transaction log without adding it to merkle tree + badData = 'X' * 32 + value = ledger.leafSerializer.serialize(badData, toBytes=False) + key = str(tranzNum + 1) + ledger._transactionLog.put(key=key, value=value) + + ledger.stop() + + with pytest.raises(ConsistencyVerificationFailed): + tree = CompactMerkleTree(hashStore=fhs) + ledger = NoTransactionRecoveryLedger(tree=tree, dataDir=tempdir) + ledger.recoverTreeFromHashStore() + ledger.stop() + + +def testStartLedgerWithoutNewLineAppendedToLastRecord(ledger): + txnStr = '{"data":{"alias":"Node1","client_ip":"127.0.0.1","client_port":9702,"node_ip":"127.0.0.1",' \ + '"node_port":9701,"services":["VALIDATOR"]},"dest":"Gw6pDLhcBcoQesN72qfotTgFa7cbuqZpkX3Xo6pLhPhv",' \ + '"identifier":"FYmoFw55GeQH7SRFa37dkx1d2dZ3zUF8ckg7wmL7ofN4",' \ + '"txnId":"fea82e10e894419fe2bea7d96296a6d46f50f93f9eeda954ec461b2ed2950b62","type":"0"}' + lineSep = ledger._transactionLog.lineSep + lineSep = lineSep if isinstance(lineSep, bytes) else lineSep.encode() + ledger.start() + ledger._transactionLog.put(txnStr) + ledger._transactionLog.put(txnStr) + ledger._transactionLog.dbFile.write(txnStr) # here, we just added data without adding new line at the end + size1 = ledger._transactionLog.numKeys + assert size1 == 3 + ledger.stop() + newLineCounts = open(ledger._transactionLog.dbPath, 'rb').read().count(lineSep) + 1 + assert newLineCounts == 3 + + # now start ledger, and it should add the missing new line char at the end of the file, so + # if next record gets written, it will be still in proper format and won't break anything. + ledger.start() + size2 = ledger._transactionLog.numKeys + assert size2 == size1 + newLineCountsAferLedgerStart = open(ledger._transactionLog.dbPath, 'rb').read().count(lineSep) + 1 + assert newLineCountsAferLedgerStart == 4 + ledger._transactionLog.put(txnStr) + assert ledger._transactionLog.numKeys == 4 diff --git a/ledger/test/test_ledger_chunked_store.py b/ledger/test/test_ledger_chunked_store.py new file mode 100644 index 0000000000..db0ea2979a --- /dev/null +++ b/ledger/test/test_ledger_chunked_store.py @@ -0,0 +1,77 @@ +from binascii import hexlify + +import itertools +import pytest + +from ledger.compact_merkle_tree import CompactMerkleTree +from ledger.ledger import Ledger +from ledger.serializers.json_serializer import JsonSerializer +from ledger.stores.chunked_file_store import ChunkedFileStore +from ledger.stores.file_hash_store import FileHashStore +from ledger.test.helper import check_ledger_generator +from ledger.test.test_file_hash_store import generateHashes + +chunk_size = 5 + + +@pytest.fixture(scope="function") +def ledger(tempdir): + store = ChunkedFileStore(tempdir, + 'transactions', + isLineNoKey=True, + chunkSize=chunk_size, + storeContentHash=False, + ensureDurability=False) + ledger = Ledger(CompactMerkleTree(hashStore=FileHashStore(dataDir=tempdir)), + dataDir=tempdir, serializer=JsonSerializer(), + transactionLogStore=store) + ledger.reset() + return ledger + + +def test_add_get_txns(tempdir, ledger): + txns = [] + hashes = [hexlify(h).decode() for h in generateHashes(60)] + for i in range(20): + txns.append({ + 'a': hashes.pop(), + 'b': hashes.pop(), + 'c': hashes.pop() + }) + + for txn in txns: + ledger.add(txn) + + check_ledger_generator(ledger) + + for s, t in ledger.getAllTxn(frm=1, to=20): + assert txns[s-1] == t + + for s, t in ledger.getAllTxn(frm=3, to=8): + assert txns[s-1] == t + + for s, t in ledger.getAllTxn(frm=5, to=17): + assert txns[s-1] == t + + for s, t in ledger.getAllTxn(frm=6, to=10): + assert txns[s-1] == t + + for s, t in ledger.getAllTxn(frm=3, to=3): + assert txns[s-1] == t + + for s, t in ledger.getAllTxn(frm=3): + assert txns[s-1] == t + + for s, t in ledger.getAllTxn(to=10): + assert txns[s-1] == t + + for s, t in ledger.getAllTxn(): + assert txns[s-1] == t + + with pytest.raises(AssertionError): + list(ledger.getAllTxn(frm=3, to=1)) + + for frm, to in [(i, j) for i, j in itertools.permutations(range(1, 21), + 2) if i <= j]: + for s, t in ledger.getAllTxn(frm=frm, to=to): + assert txns[s-1] == t diff --git a/ledger/test/test_merkle_proof.py b/ledger/test/test_merkle_proof.py new file mode 100644 index 0000000000..56c53aee71 --- /dev/null +++ b/ledger/test/test_merkle_proof.py @@ -0,0 +1,349 @@ +import time +from binascii import hexlify, unhexlify +from copy import copy +from tempfile import TemporaryDirectory + +import pytest + +from ledger.compact_merkle_tree import CompactMerkleTree +from ledger.merkle_verifier import MerkleVerifier +from ledger.stores.hash_store import HashStore +from ledger.tree_hasher import TreeHasher +from ledger.stores.memory_hash_store import MemoryHashStore +from ledger.stores.file_hash_store import FileHashStore +from ledger.test.helper import checkConsistency +from ledger.util import STH + +""" +1: 221 + + [221] + / +221 + +2: e8b + + [e8b] + / \ +221 fa6 + +3: e8b, 906 + + fe6 + / \ + [e8b] [906] + / \ +221 fa6 + + +4: 4c4 + + [4c4] + / \ + e8b 9c7 + / \ / \ +221 fa6 906 11e + + +5: 4c4, 533 + + e10 + / \ + [4c4] [533] + / \ + e8b 9c7 + / \ / \ +221 fa6 906 11e + + +6: 4c4, 2b1 + + ecc + / \ + [4c4] [2b1] + / \ / \ + e8b 9c7 533 3bf + / \ / \ +221 fa6 906 11e + + +7: 4c4, 2b1, 797 + + 74f + / \ + [4c4] 429 + / \ / \ + e8b 9c7 [2b1] [797] + / \ / \ / \ +221 fa6 906 11e 533 3bf + + +8: 50f + [50f] + / \ + 4c4 fed + / \ / \ + e8b 9c7 2b1 800 + / \ / \ / \ / \ +221 fa6 906 11e 533 3bf 797 754 + + +""" + +""" +hexlify(c( + c( + c( + l(d[0]), l(d[1]) + ), + c( + l(d[2]), l(d[3]) + ) + ), + c( + c( + l(d[4]),l(d[5]) + ), + l(d[6]) + ) +)) +""" + + +TXN_COUNT = 1000 + + +@pytest.yield_fixture(scope="module", params=['File', 'Memory']) +def hashStore(request, tdir): + if request.param == 'File': + fhs = FileHashStore(tdir) + assert fhs.is_persistent + yield fhs + elif request.param == 'Memory': + mhs = MemoryHashStore() + assert not mhs.is_persistent + yield mhs + + +@pytest.fixture() +def hasher(): + return TreeHasher() + + +@pytest.fixture() +def verifier(hasher): + return MerkleVerifier(hasher=hasher) + + +@pytest.fixture() +def hasherAndTree(hasher): + tdir = TemporaryDirectory().name + store = FileHashStore(tdir) + m = CompactMerkleTree(hasher=hasher, hashStore=store) + return hasher, m + + +@pytest.fixture() +def addTxns(hasherAndTree): + h, m = hasherAndTree + + auditPaths = [] + for d in range(TXN_COUNT): + serNo = d+1 + data = str(serNo).encode() + auditPaths.append([hexlify(h) for h in m.append(data)]) + return TXN_COUNT, auditPaths + + +@pytest.fixture() +def storeHashes(hasherAndTree, addTxns, hashStore): + h, m = hasherAndTree + mhs = m.hashStore + return mhs + + +''' +14 +pwr = 3 +c = 8 + +14,8 +pwr = 2 +c = 4 + 8 = 12 + +12,2 + +14, 12 +pwr = 1 +c = 2 + 12 = 14 + +14,1 +''' + + +def show(h, m, data): + print("-" * 60) + print("appended : {}".format(data)) + print("hash : {}".format(hexlify(h.hash_leaf(data))[:3])) + print("tree size : {}".format(m.tree_size)) + print("root hash : {}".format(m.root_hash_hex[:3])) + for i, hash in enumerate(m.hashes): + lead = "Hashes" if i == 0 else " " + print("{} : {}".format(lead, hexlify(hash)[:3])) + + +def testCompactMerkleTree2(hasherAndTree, verifier): + h, m = hasherAndTree + v = verifier + for serNo in range(1, 4): + data = hexlify(str(serNo).encode()) + m.append(data) + + +def testCompactMerkleTree(hasherAndTree, verifier): + h, m = hasherAndTree + printEvery = 1000 + count = TXN_COUNT + for d in range(count): + data = str(d + 1).encode() + data_hex = hexlify(data) + audit_path = m.append(data) + audit_path_hex = [hexlify(h) for h in audit_path] + incl_proof = m.inclusion_proof(d, d+1) + assert audit_path == incl_proof + assert m.nodeCount == m.get_expected_node_count(m.leafCount) + assert m.hashStore.is_consistent + if d % printEvery == 0: + show(h, m, data_hex) + print("audit path is {}".format(audit_path_hex)) + print("audit path length is {}".format(verifier.audit_path_length( + d, d+1))) + print("audit path calculated length is {}".format( + len(audit_path))) + calculated_root_hash = verifier._calculate_root_hash_from_audit_path( + h.hash_leaf(data), d, audit_path[:], d+1) + if d % printEvery == 0: + print("calculated root hash is {}".format(calculated_root_hash)) + sth = STH(d+1, m.root_hash) + verifier.verify_leaf_inclusion(data, d, audit_path, sth) + + checkConsistency(m, verifier=verifier) + + for d in range(1, count): + verifier.verify_tree_consistency(d, d + 1, + m.merkle_tree_hash(0, d), + m.merkle_tree_hash(0, d + 1), + m.consistency_proof(d, d + 1)) + + newTree = CompactMerkleTree(hasher=h) + m.save(newTree) + assert m.root_hash == newTree.root_hash + assert m.hashes == newTree.hashes + + newTree = CompactMerkleTree(hasher=h) + newTree.load(m) + assert m.root_hash == newTree.root_hash + assert m.hashes == newTree.hashes + + newTree = copy(m) + assert m.root_hash == newTree.root_hash + assert m.hashes == newTree.hashes + + +def testEfficientHashStore(hasherAndTree, addTxns, storeHashes): + h, m = hasherAndTree + + mhs = storeHashes # type: HashStore + txnCount, auditPaths = addTxns + + for leaf_ptr in range(1, txnCount + 1): + print("leaf hash: {}".format(hexlify(mhs.readLeaf(leaf_ptr)))) + + # make sure that there are not more leafs than we expect + try: + mhs.readLeaf(txnCount + 1) + assert False + except Exception as ex: + assert isinstance(ex, IndexError) + + node_ptr = 0 + while True: + node_ptr += 1 + try: + # start, height, node_hash = mhs.readNode(node_ptr) + node_hash = mhs.readNode(node_ptr) + except IndexError: + break + print("node hash: {}".format(hexlify(node_hash))) + # TODO: The api has changed for FileHashStore and OrientDBStore, + # HashStore should implement methods for calculating start and + # height of a node + # end = start - pow(2, height) + 1 + # print("node hash start-end: {}-{}".format(start, end)) + # print("node hash height: {}".format(height)) + # print("node hash end: {}".format(end)s) + # _, _, nhByTree = mhs.readNodeByTree(start, height) + # assert nhByTree == node_hash + + +def testLocate(hasherAndTree, addTxns, storeHashes): + h, m = hasherAndTree + + mhs = storeHashes + txnCount, auditPaths = addTxns + + verifier = MerkleVerifier() + startingTime = time.perf_counter() + for d in range(50): + print() + pos = d+1 + print("Audit Path for Serial No: {}".format(pos)) + leafs, nodes = mhs.getPath(pos) + calculatedAuditPath = [] + for i, leaf_pos in enumerate(leafs): + hexLeafData = hexlify(mhs.readLeaf(leaf_pos)) + print("leaf: {}".format(hexLeafData)) + calculatedAuditPath.append(hexLeafData) + for node_pos in nodes: + node = mhs.readNode(node_pos) + hexNodeData = hexlify(node) + print("node: {}".format(hexNodeData)) + calculatedAuditPath.append(hexNodeData) + print("{} -> leafs: {}, nodes: {}".format(pos, leafs, nodes)) + print("Audit path built using formula {}".format(calculatedAuditPath)) + print("Audit path received while appending leaf {}".format(auditPaths[d])) + + # Testing equality of audit path calculated using formula and audit path + # received while inserting leaf into the tree + assert calculatedAuditPath == auditPaths[d] + auditPathLength = verifier.audit_path_length(d, d+1) + assert auditPathLength == len(calculatedAuditPath) + + # Testing root hash generation + leafHash = storeHashes.readLeaf(d + 1) + rootHashFrmCalc = hexlify(verifier._calculate_root_hash_from_audit_path( + leafHash, d, [unhexlify(h) for h in calculatedAuditPath], d+1)) + rootHash = hexlify(verifier._calculate_root_hash_from_audit_path( + leafHash, d, [unhexlify(h) for h in auditPaths[d]], d + 1)) + assert rootHash == rootHashFrmCalc + + print("Root hash from audit path built using formula {}". + format(calculatedAuditPath)) + print("Root hash from audit path received while appending leaf {}". + format(auditPaths[d])) + + print("Leaf hash length is {}".format(len(leafHash))) + print("Root hash length is {}".format(len(rootHash))) + + # Testing verification, do not need `assert` since + # `verify_leaf_hash_inclusion` will throw an exception + sthFrmCalc = STH(d + 1, unhexlify(rootHashFrmCalc)) + verifier.verify_leaf_hash_inclusion( + leafHash, d, + [unhexlify(h) for h in calculatedAuditPath], + sthFrmCalc) + sth = STH(d + 1, unhexlify(rootHash)) + verifier.verify_leaf_hash_inclusion( + leafHash, d, + [unhexlify(h) for h in auditPaths[d]], sth) + print(time.perf_counter()-startingTime) diff --git a/ledger/test/test_txn_persistence.py b/ledger/test/test_txn_persistence.py new file mode 100644 index 0000000000..ead1d31f8d --- /dev/null +++ b/ledger/test/test_txn_persistence.py @@ -0,0 +1,47 @@ +import asyncio +import time +from collections import OrderedDict, namedtuple +from ledger.util import F + +from ledger.ledger import Ledger +from ledger.serializers.compact_serializer import CompactSerializer +from ledger.compact_merkle_tree import CompactMerkleTree + +Reply = namedtuple("REPLY", ['result']) + + +def testTxnPersistence(tempdir): + tdir = tempdir + loop = asyncio.get_event_loop() + fields = OrderedDict([ + ("identifier", (str, str)), + ("reqId", (str, int)), + ("txnId", (str, str)), + ("txnTime", (str, float)), + ("txnType", (str, str)), + ]) + ldb = Ledger(CompactMerkleTree(), tdir, + serializer=CompactSerializer(fields=fields)) + + def go(): + identifier = "testClientId" + txnId = "txnId" + reply = Reply(result={ + "identifier": identifier, + "reqId": 1, + "txnId": txnId, + "txnTime": time.time(), + "txnType": "buy" + }) + sizeBeforeInsert = ldb.size + ldb.append(reply.result) + txn_in_db = ldb.get(identifier=identifier, + reqId=reply.result['reqId']) + txn_in_db.pop(F.seqNo.name) + assert txn_in_db == reply.result + assert ldb.size == sizeBeforeInsert + 1 + ldb.reset() + ldb.stop() + + go() + loop.close() diff --git a/ledger/tree_hasher.py b/ledger/tree_hasher.py new file mode 100644 index 0000000000..3c1baaeb2f --- /dev/null +++ b/ledger/tree_hasher.py @@ -0,0 +1,76 @@ +import hashlib + +from ledger.util import count_bits_set + + +class TreeHasher(object): + """Merkle hasher with domain separation for leaves and nodes.""" + + def __init__(self, hashfunc=hashlib.sha256): + self.hashfunc = hashfunc + + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, self.hashfunc) + + def __str__(self): + return repr(self) + + def hash_empty(self): + hasher = self.hashfunc() + return hasher.digest() + + def hash_leaf(self, data): + hasher = self.hashfunc() + hasher.update(b"\x00" + data) + return hasher.digest() + + def hash_children(self, left, right): + hasher = self.hashfunc() + hasher.update(b"\x01" + left + right) + return hasher.digest() + + def _hash_full(self, leaves, l_idx, r_idx): + """Hash the leaves between (l_idx, r_idx) as a valid entire tree. + + Note that this is only valid for certain combinations of indexes, + depending on where the leaves are meant to be located in a parent tree. + + Returns: + (root_hash, hashes): where root_hash is that of the entire tree, + and hashes are that of the full (i.e. size 2^k) subtrees that form + the entire tree, sorted in descending order of size. + """ + width = r_idx - l_idx + if width < 0 or l_idx < 0 or r_idx > len(leaves): + raise IndexError("%s,%s not a valid range over [0,%s]" % ( + l_idx, r_idx, len(leaves))) + elif width == 0: + return self.hash_empty(), () + elif width == 1: + leaf_hash = self.hash_leaf(leaves[l_idx]) + return leaf_hash, (leaf_hash,) + else: + # next smallest power of 2 + split_width = 2**((width - 1).bit_length() - 1) + assert split_width < width <= 2*split_width + l_root, l_hashes = self._hash_full(leaves, l_idx, l_idx+split_width) + assert len(l_hashes) == 1 # left tree always full + r_root, r_hashes = self._hash_full(leaves, l_idx+split_width, r_idx) + root_hash = self.hash_children(l_root, r_root) + return (root_hash, (root_hash,) if split_width*2 == width else + l_hashes + r_hashes) + + def hash_full_tree(self, leaves): + """Hash a set of leaves representing a valid full tree.""" + root_hash, hashes = self._hash_full(leaves, 0, len(leaves)) + assert len(hashes) == count_bits_set(len(leaves)) + assert (self._hash_fold(hashes) == root_hash if hashes else + root_hash == self.hash_empty()) + return root_hash + + def _hash_fold(self, hashes): + rev_hashes = iter(hashes[::-1]) + accum = next(rev_hashes) + for cur in rev_hashes: + accum = self.hash_children(cur, accum) + return accum diff --git a/ledger/util.py b/ledger/util.py new file mode 100644 index 0000000000..b3c2f459a3 --- /dev/null +++ b/ledger/util.py @@ -0,0 +1,56 @@ +from collections import namedtuple +from enum import Enum + + +def count_bits_set(i): + # from https://wiki.python.org/moin/BitManipulation + count = 0 + while i: + i &= i - 1 + count += 1 + return count + + +def isPowerOf2(i): + return count_bits_set(i) == 1 + + +def lowest_bit_set(i): + # from https://wiki.python.org/moin/BitManipulation + # but with 1-based indexing like in ffs(3) POSIX + return highest_bit_set(i & -i) + + +def highest_bit_set(i): + # from https://wiki.python.org/moin/BitManipulation + # but with 1-based indexing like in ffs(3) POSIX + hi = i + hiBit = 0 + while hi: + hi >>= 1 + hiBit += 1 + return hiBit + + +def highestPowerOf2LessThan(n): + return n.bit_length() - 1 + + +class F(Enum): + clientId = 1 + requestId = 2 + rootHash = 3 + created = 4 + addedToTree = 5 + auditPath = 6 + seqNo = 7 + treeSize = 8 + leafHash = 9 + nodeHash = 10 + height = 11 + + +STH = namedtuple("STH", ["tree_size", "sha256_root_hash"]) + +class ConsistencyVerificationFailed(Exception): + pass diff --git a/plenum/__metadata__.py b/plenum/__metadata__.py index a397681c3b..c66281f480 100644 --- a/plenum/__metadata__.py +++ b/plenum/__metadata__.py @@ -1,7 +1,7 @@ """ plenum package metadata """ -__version_info__ = (0, 3) +__version_info__ = (0, 4) __version__ = '.'.join(map(str, __version_info__)) __author__ = "Evernym, Inc." __license__ = "Apache 2.0" diff --git a/plenum/cli/cli.py b/plenum/cli/cli.py index d2b4fe93f5..a85e6a1dfe 100644 --- a/plenum/cli/cli.py +++ b/plenum/cli/cli.py @@ -26,10 +26,11 @@ from plenum.cli.helper import getUtilGrams, getNodeGrams, getClientGrams, \ getAllGrams from plenum.cli.phrase_word_completer import PhraseWordCompleter -from plenum.client.wallet import Wallet +from plenum.client.wallet import Wallet, WalletStorageHelper from plenum.common.exceptions import NameAlreadyExists, KeysNotFoundException from plenum.common.keygen_utils import learnKeysFromOthers, tellKeysToOthers, areKeysSetup from plenum.common.plugin_helper import loadPlugins +from plenum.common.signer_did import DidSigner from stp_core.crypto.util import cleanSeed, seedFromHex from stp_raet.util import getLocalEstateData from plenum.common.signer_simple import SimpleSigner @@ -74,8 +75,8 @@ from plenum.client.client import Client from plenum.common.util import getMaxFailures, \ firstValue, randomString, bootstrapClientKeys, \ - getFriendlyIdentifier, saveGivenWallet, \ - normalizedWalletFileName, getWalletFilePath, getWalletByPath, \ + getFriendlyIdentifier, \ + normalizedWalletFileName, getWalletFilePath, \ getLastSavedWalletFileName from stp_core.common.log import \ getlogger, Logger, getRAETLogFilePath, getRAETLogLevelFromConfig @@ -178,6 +179,9 @@ def __init__(self, looper, basedirpath, nodeReg=None, cliNodeReg=None, self._wallets = {} # type: Dict[str, Wallet] self._activeWallet = None # type: Wallet self.keyPairs = {} + + self._walletSaver = None + ''' examples: status @@ -248,17 +252,19 @@ def __init__(self, looper, basedirpath, nodeReg=None, cliNodeReg=None, eventloop=eventloop, output=out) - RAETVerbosity = getRAETLogLevelFromConfig("RAETLogLevelCli", - Console.Wordage.mute, - self.config) - RAETLogFile = getRAETLogFilePath("RAETLogFilePathCli", self.config) + # RAETVerbosity = getRAETLogLevelFromConfig("RAETLogLevelCli", + # Console.Wordage.mute, + # self.config) + # RAETLogFile = getRAETLogFilePath("RAETLogFilePathCli", self.config) # Patch stdout in something that will always print *above* the prompt # when something is written to stdout. sys.stdout = self.cli.stdout_proxy() if logFileName: Logger().enableFileLogging(logFileName) - Logger().setupRaet(RAETVerbosity, RAETLogFile) + + # TODO: If we want RAET logging in CLI we need fix this. See INDY-315. + #Logger().setupRaet(RAETVerbosity, RAETLogFile) self.logger = getlogger("cli") self.print("\n{}-CLI (c) 2017 Evernym, Inc.".format(self.properName)) @@ -341,6 +347,16 @@ def config(self): self._config = getConfig() return self._config + + @property + def walletSaver(self): + if self._walletSaver is None: + self._walletSaver = WalletStorageHelper( + self.getKeyringsBaseDir(), + dmode=self.config.KEYRING_DIR_MODE, + fmode=self.config.KEYRING_FILE_MODE) + return self._walletSaver + @property def allGrams(self): if not self._allGrams: @@ -826,7 +842,7 @@ def getStatus(self): self.print("Clients: " + clients) f = getMaxFailures(len(self.nodes)) self.print("f-value (number of possible faulty nodes): {}".format(f)) - if f != 0 and len(self.nodes) >= 2 * f + 1: + if f != 0: node = list(self.nodes.values())[0] mPrimary = node.replicas[node.instances.masterId].primaryName bPrimary = node.replicas[node.instances.backupIds[0]].primaryName @@ -1297,9 +1313,10 @@ def _newSigner(self, cseed = cleanSeed(seed) - signer = SimpleSigner(identifier=identifier, seed=cseed, alias=alias) + signer = DidSigner(identifier=identifier, seed=cseed, alias=alias) self._addSignerToGivenWallet(signer, wallet, showMsg=True) self.print("Identifier for key is {}".format(signer.identifier)) + self.print("Verification key is {}".format(signer.verkey)) if alias: self.print("Alias for identifier is {}".format(signer.alias)) self._setActiveIdentifier(signer.identifier) @@ -1371,6 +1388,7 @@ def _newWallet(self, walletName=None): def _listKeyringsAction(self, matchedVars): if matchedVars.get('list_krs') == 'list keyrings': + # TODO move file system related routine to WalletStorageHelper keyringBaseDir = self.getKeyringsBaseDir() contextDirPath = self.getContextBasedKeyringsBaseDir() dirs_to_scan = self.getAllSubDirNamesForKeyrings() @@ -1731,7 +1749,7 @@ def performValidationCheck(self, wallet, walletFilePath, override=False): def restoreWalletByPath(self, walletFilePath, copyAs=None, override=False): try: - wallet = getWalletByPath(walletFilePath) + wallet = self.walletSaver.loadWallet(walletFilePath) if copyAs: wallet.name=copyAs @@ -1834,6 +1852,7 @@ def isAnyWalletFileExistsForGivenEnv(self, env): return self.isAnyWalletFileExistsForGivenContext(pattern) def isAnyWalletFileExistsForGivenContext(self, pattern): + # TODO move that to WalletStorageHelper files = glob.glob(pattern) if files: return True @@ -1867,8 +1886,9 @@ def performCompatibilityCheckBeforeSave(self): def _saveActiveWalletInDir(self, contextDir, printMsgs=True): try: - walletFilePath = saveGivenWallet(self._activeWallet, - self.walletFileName, contextDir) + walletFilePath = self.walletSaver.saveWallet( + self._activeWallet, + getWalletFilePath(contextDir, self.walletFileName)) if printMsgs: self.print('Active keyring "{}" saved'.format( self._activeWallet.name), newline=False) diff --git a/plenum/client/client.py b/plenum/client/client.py index ac5a7a3c58..b84d4b8b5c 100644 --- a/plenum/client/client.py +++ b/plenum/client/client.py @@ -13,7 +13,9 @@ Iterable from plenum.common.ledger import Ledger +from plenum.common.messages.node_message_factory import node_message_factory from plenum.common.stacks import nodeStackClass +from plenum.server.quorums import Quorums from stp_core.crypto.nacl_wrappers import Signer from stp_core.network.auth_mode import AuthMode from stp_core.network.network_interface import NetworkInterface @@ -37,7 +39,8 @@ LEDGER_STATUS, CONSISTENCY_PROOF, CATCHUP_REP, REQACK, REQNACK, REJECT, OP_FIELD_NAME, \ POOL_LEDGER_ID, TXN_TIME, LedgerState from plenum.common.txn_util import getTxnOrderedFields -from plenum.common.types import Reply, f, LedgerStatus, TaggedTuples +from plenum.common.types import f +from plenum.common.messages.node_messages import Reply, LedgerStatus from plenum.common.util import getMaxFailures, checkIfMoreThanFSameItems, rawToFriendly from plenum.common.message_processor import MessageProcessor from plenum.persistence.client_req_rep_store_file import ClientReqRepStoreFile @@ -188,8 +191,7 @@ def postPoolLedgerCaughtUp(self): self.mode = Mode.discovered # For the scenario where client has already connected to nodes reading # the genesis pool transactions and that is enough - if self.hasSufficientConnections: - self.flushMsgsPendingConnection() + self.flushMsgsPendingConnection() def postTxnFromCatchupAddedToLedger(self, ledgerType: int, txn: Any): if ledgerType != 0: @@ -204,6 +206,7 @@ def setF(self): self.f = getMaxFailures(nodeCount) self.minNodesToConnect = self.f + 1 self.totalNodes = nodeCount + self.quorums = Quorums(nodeCount) @staticmethod def exists(name, basedirpath): @@ -249,7 +252,8 @@ async def prod(self, limit) -> int: def submitReqs(self, *reqs: Request) -> List[Request]: requests = [] for request in reqs: - if self.mode == Mode.discovered and self.hasSufficientConnections: + if (self.mode == Mode.discovered and self.hasSufficientConnections) or \ + (request.isForced() and self.hasAnyConnections): logger.debug('Client {} sending request {}'.format(self, request)) self.send(request) self.expectingFor(request) @@ -280,14 +284,7 @@ def handleOneNodeMsg(self, wrappedMsg, excludeFromCli=None) -> None: extra={"cli": printOnCli}) if OP_FIELD_NAME in msg: if msg[OP_FIELD_NAME] in ledgerTxnTypes and self._ledger: - op = msg.get(OP_FIELD_NAME, None) - if not op: - raise MissingNodeOp - # TODO: Refactor this copying - cls = TaggedTuples.get(op, None) - t = copy.deepcopy(msg) - t.pop(OP_FIELD_NAME, None) - cMsg = cls(**t) + cMsg = node_message_factory.get_instance(**msg) if msg[OP_FIELD_NAME] == POOL_LEDGER_TXNS: self.poolTxnReceived(cMsg, frm) if msg[OP_FIELD_NAME] == LEDGER_STATUS: @@ -383,9 +380,7 @@ def hasConsensus(self, identifier: str, reqId: int) -> Optional[str]: if not replies: raise KeyError('{}{}'.format(identifier, reqId)) # NOT_FOUND # Check if at least f+1 replies are received or not. - if self.f + 1 > len(replies): - return False # UNCONFIRMED - else: + if self.quorums.reply.is_reached(len(replies)): onlyResults = {frm: reply["result"] for frm, reply in replies.items()} resultsList = list(onlyResults.values()) @@ -397,6 +392,8 @@ def hasConsensus(self, identifier: str, reqId: int) -> Optional[str]: logger.error( "Received a different result from at least one of the nodes..") return checkIfMoreThanFSameItems(resultsList, self.f) + else: + return False # UNCONFIRMED def showReplyDetails(self, identifier: str, reqId: int): """ @@ -423,8 +420,7 @@ def onConnsChanged(self, joined: Set[str], left: Set[str]): self.status = Status.started elif len(self.nodestack.conns) >= self.minNodesToConnect: self.status = Status.started_hungry - if self.hasSufficientConnections and self.mode == Mode.discovered: - self.flushMsgsPendingConnection() + self.flushMsgsPendingConnection() if self._ledger: for n in joined: self.sendLedgerStatus(n) @@ -439,6 +435,10 @@ def replyIfConsensus(self, identifier, reqId: int): def hasSufficientConnections(self): return len(self.nodestack.conns) >= self.minNodesToConnect + @property + def hasAnyConnections(self): + return len(self.nodestack.conns) > 0 + def hasMadeRequest(self, identifier, reqId: int): return self.reqRepStore.hasRequest(identifier, reqId) @@ -469,9 +469,15 @@ def flushMsgsPendingConnection(self): if queueSize > 0: logger.debug("Flushing pending message queue of size {}" .format(queueSize)) + tmp = deque() while self.reqsPendingConnection: req, signer = self.reqsPendingConnection.popleft() - self.send(req, signer=signer) + if (self.hasSufficientConnections and self.mode == Mode.discovered) or \ + (req.isForced() and self.hasAnyConnections): + self.send(req, signer=signer) + else: + tmp.append((req, signer)) + self.reqsPendingConnection.extend(tmp) def expectingFor(self, request: Request, nodes: Optional[Set[str]]=None): nodes = nodes or {r.name for r in self.nodestack.remotes.values() @@ -601,7 +607,7 @@ def resendRequests(self, keys): queue[key] = (nodes, now, retries + 1) def sendLedgerStatus(self, nodeName: str): - ledgerStatus = LedgerStatus(POOL_LEDGER_ID, self.ledger.size, + ledgerStatus = LedgerStatus(POOL_LEDGER_ID, self.ledger.size, None, None, self.ledger.root_hash) rid = self.nodestack.getRemote(nodeName).uid self.send(ledgerStatus, rid) @@ -628,7 +634,7 @@ def verifyMerkleProof(*replies: Tuple[Reply]) -> bool: verifier = MerkleVerifier() fields = getTxnOrderedFields() serializer = CompactSerializer(fields=fields) - ignored = {F.auditPath.name, F.seqNo.name, F.rootHash.name, TXN_TIME} + ignored = {F.auditPath.name, F.seqNo.name, F.rootHash.name} for r in replies: seqNo = r[f.RESULT.nm][F.seqNo.name] rootHash = Ledger.strToHash( diff --git a/plenum/client/pool_manager.py b/plenum/client/pool_manager.py index a1c986a1e0..4591b11819 100644 --- a/plenum/client/pool_manager.py +++ b/plenum/client/pool_manager.py @@ -8,7 +8,8 @@ from plenum.common.stack_manager import TxnStackManager from plenum.common.constants import TXN_TYPE, NODE, ALIAS, DATA, TARGET_NYM, NODE_IP,\ NODE_PORT, CLIENT_IP, CLIENT_PORT, VERKEY, SERVICES, VALIDATOR, CLIENT_STACK_SUFFIX -from plenum.common.types import PoolLedgerTxns, f, HA +from plenum.common.types import f, HA +from plenum.common.messages.node_messages import PoolLedgerTxns from plenum.common.util import getMaxFailures from stp_core.common.log import getlogger diff --git a/plenum/client/wallet.py b/plenum/client/wallet.py index fdf36dc9d1..8133e7c894 100644 --- a/plenum/client/wallet.py +++ b/plenum/client/wallet.py @@ -1,4 +1,8 @@ from typing import Optional, Dict, NamedTuple +import os +import sys +import stat +from pathlib import Path import jsonpickle from libnacl import crypto_secretbox_open, randombytes, \ @@ -265,3 +269,131 @@ def _getIdData(self, signer = self.idsToSigners.get(idr) idData = self.ids.get(idr) return IdData(signer, idData.lastReqId if idData else None) + + +class WalletStorageHelper: + """Manages wallets + + :param ``keyringsBaseDir``: keyrings base directory + :param dmode: (optional) permissions for directories inside + including the base one, default is 0700 + :param fmode: (optional) permissions for files inside, + default is 0600 + """ + def __init__(self, keyringsBaseDir, dmode=0o700, fmode=0o600): + self.dmode = dmode + self.fmode = fmode + self.keyringsBaseDir = keyringsBaseDir + + @property + def keyringsBaseDir(self): + return str(self._baseDir) + + @keyringsBaseDir.setter + def keyringsBaseDir(self, path): + self._baseDir = self._resolve(Path(path)) + + self._createDirIfNotExists(self._baseDir) + self._ensurePermissions(self._baseDir, self.dmode) + + def _ensurePermissions(self, path, mode): + if stat.S_IMODE(path.stat().st_mode) != mode: + path.chmod(mode) + + def _createDirIfNotExists(self, dpath): + if dpath.exists(): + if not dpath.is_dir(): + raise NotADirectoryError("{}".format(dpath)) + else: + dpath.mkdir(parents=True, exist_ok=True) + + def _resolve(self, path): + # ``strict`` argument appeared only version 3.6 of python + if sys.version_info < (3, 6, 0): + return Path(os.path.realpath(str(path))) + else: + return path.resolve(strict=False) + + def _normalize(self, fpath): + return self._resolve(self._baseDir / fpath) + + def encode(self, data): + return jsonpickle.encode(data, keys=True) + + def decode(self, data): + return jsonpickle.decode(data, keys=True) + + def saveWallet(self, wallet, fpath): + """Save wallet into specified localtion. + + Returns the canonical path for the ``fpath`` where ``wallet`` + has been stored. + + Error cases: + - ``fpath`` is not inside the keyrings base dir - ValueError raised + - directory part of ``fpath`` exists and it's not a directory - + NotADirectoryError raised + - ``fpath`` exists and it's a directory - IsADirectoryError raised + + :param wallet: wallet to save + :param fpath: wallet file path, absolute or relative to + keyrings base dir + """ + if not fpath: + raise ValueError("empty path") + + _fpath = self._normalize(fpath) + _dpath = _fpath.parent + + try: + _dpath.relative_to(self._baseDir) + except ValueError: + raise ValueError( + "path {} is not is not relative to the keyrings {}".format( + fpath, self._baseDir)) + + self._createDirIfNotExists(_dpath) + + # ensure permissions from the bottom of the directory hierarchy + while _dpath != self._baseDir: + self._ensurePermissions(_dpath, self.dmode) + _dpath = _dpath.parent + + with _fpath.open("w") as wf: + self._ensurePermissions(_fpath, self.fmode) + encodedWallet = self.encode(wallet) + wf.write(encodedWallet) + logger.debug("stored wallet '{}' in {}".format( + wallet.name, _fpath)) + + return str(_fpath) + + def loadWallet(self, fpath): + """Load wallet from specified localtion. + + Returns loaded wallet. + + Error cases: + - ``fpath`` is not inside the keyrings base dir - ValueError raised + - ``fpath`` exists and it's a directory - IsADirectoryError raised + + :param fpath: wallet file path, absolute or relative to + keyrings base dir + """ + if not fpath: + raise ValueError("empty path") + + _fpath = self._normalize(fpath) + _dpath = _fpath.parent + + try: + _dpath.relative_to(self._baseDir) + except ValueError: + raise ValueError( + "path {} is not is not relative to the keyrings {}".format( + fpath, self._baseDir)) + + with _fpath.open() as wf: + wallet = self.decode(wf.read()) + + return wallet diff --git a/plenum/common/batched.py b/plenum/common/batched.py index 2f821e6740..0773975358 100644 --- a/plenum/common/batched.py +++ b/plenum/common/batched.py @@ -2,10 +2,11 @@ from typing import Any, Iterable from typing import Dict -from plenum.common.constants import BATCH +from plenum.common.constants import BATCH, OP_FIELD_NAME from stp_core.crypto.signer import Signer from stp_core.common.log import getlogger -from plenum.common.types import Batch, OP_FIELD_NAME, f +from plenum.common.types import f +from plenum.common.messages.node_messages import Batch from plenum.common.message_processor import MessageProcessor logger = getlogger() diff --git a/plenum/common/config_util.py b/plenum/common/config_util.py index f68396401b..cf9afda5c3 100644 --- a/plenum/common/config_util.py +++ b/plenum/common/config_util.py @@ -1,6 +1,7 @@ import os from importlib import import_module from importlib.util import module_from_spec, spec_from_file_location +from stp_core.common.config.util import getConfig as STPConfig CONFIG = None @@ -35,7 +36,10 @@ def getConfig(homeDir=None): """ global CONFIG if not CONFIG: - refConfig = import_module("plenum.config") + stp_config = STPConfig(homeDir) + plenum_config = import_module("plenum.config") + refConfig = stp_config + refConfig.__dict__.update(plenum_config.__dict__) try: homeDir = os.path.expanduser(homeDir or "~") diff --git a/plenum/common/constants.py b/plenum/common/constants.py index 344a95c2e8..3a23052b58 100644 --- a/plenum/common/constants.py +++ b/plenum/common/constants.py @@ -33,12 +33,15 @@ REQKEY = "REQKEY" INSTANCE_CHANGE = "INSTANCE_CHANGE" +VIEW_CHANGE_DONE = "VIEW_CHANGE_DONE" +CURRENT_STATE = "CURRENT_STATE" LEDGER_STATUS = "LEDGER_STATUS" CONSISTENCY_PROOF = "CONSISTENCY_PROOF" CATCHUP_REQ = "CATCHUP_REQ" CATCHUP_REP = "CATCHUP_REP" -CONS_PROOF_REQUEST = "CONS_PROOF_REQUEST" +MESSAGE_REQUEST = 'MESSAGE_REQUEST' +MESSAGE_RESPONSE = 'MESSAGE_RESPONSE' BLACKLIST = "BLACKLIST" @@ -75,11 +78,13 @@ NONCE = 'nonce' ATTRIBUTES = 'attributes' VERIFIABLE_ATTRIBUTES = 'verifiableAttributes' +PREDICATES = 'predicates' TXN_TIME = 'txnTime' TXN_DATA = "txnData" LAST_TXN = "lastTxn" TXNS = "Txns" BY = "by" +FORCE = 'force' # ROLES STEWARD = Roles.STEWARD.value @@ -88,6 +93,9 @@ # TXNs NODE = PlenumTransactions.NODE.value NYM = PlenumTransactions.NYM.value +GET_TXN = PlenumTransactions.GET_TXN.value + +openTxns = (GET_TXN) POOL_TXN_TYPES = {NODE, } diff --git a/plenum/common/ledger.py b/plenum/common/ledger.py index 7bc4d345e0..4cdef17175 100644 --- a/plenum/common/ledger.py +++ b/plenum/common/ledger.py @@ -7,6 +7,10 @@ from ledger.stores.file_store import FileStore from ledger.ledger import Ledger as _Ledger +from stp_core.common.log import getlogger + + +logger = getlogger() class Ledger(_Ledger): @@ -81,6 +85,7 @@ def discardTxns(self, count: int): :param count: :return: """ + old_hash = self.uncommittedRootHash self.uncommittedTxns = self.uncommittedTxns[:-count] if not self.uncommittedTxns: self.uncommittedTree = None @@ -88,6 +93,8 @@ def discardTxns(self, count: int): else: self.uncommittedTree = self.treeWithAppliedTxns(self.uncommittedTxns) self.uncommittedRootHash = self.uncommittedTree.root_hash + logger.debug('Discarding {} txns and root hash {} and new root hash ' + 'is {}'.format(count, old_hash, self.uncommittedRootHash)) def treeWithAppliedTxns(self, txns: List, currentTree=None): """ @@ -104,6 +111,11 @@ def treeWithAppliedTxns(self, txns: List, currentTree=None): tempTree.append(self.serializeLeaf(txn)) return tempTree + def reset_uncommitted(self): + self.uncommittedTxns = [] + self.uncommittedRootHash = None + self.uncommittedTree = None + @staticmethod def hashToStr(h): return base58.b58encode(h) diff --git a/plenum/common/ledger_info.py b/plenum/common/ledger_info.py index 0fc7c60e84..5fe62eb95b 100644 --- a/plenum/common/ledger_info.py +++ b/plenum/common/ledger_info.py @@ -4,11 +4,12 @@ from plenum.common.ledger import Ledger +# TODO: Choose a better name, its not just information about a ledger, its more +# of a handle class LedgerInfo: def __init__(self, + id: int, ledger: Ledger, - state: LedgerState, - canSync, preCatchupStartClbk, postCatchupStartClbk, preCatchupCompleteClbk, @@ -16,10 +17,9 @@ def __init__(self, postTxnAddedToLedgerClbk, verifier): + self.id = id self.ledger = ledger - self.state = state - self.canSync = canSync self.preCatchupStartClbk = preCatchupStartClbk self.postCatchupStartClbk = postCatchupStartClbk self.preCatchupCompleteClbk = preCatchupCompleteClbk @@ -31,6 +31,15 @@ def __init__(self, # (`canSync` was set to False) self.stashedLedgerStatuses = deque() + self.set_defaults() + + # noinspection PyAttributeOutsideInit + def set_defaults(self): + self.state = LedgerState.not_synced + # Setting `canSync` to False since each ledger is synced in an + # established order so `canSync` will be set to True accordingly. + self.canSync = False + # Tracks which nodes claim that this node's ledger status is ok # If a quorum of nodes (2f+1) say its up to date then mark the catchup # process as completed @@ -41,6 +50,7 @@ def __init__(self, # Key is the node name and value is a consistency proof self.recvdConsistencyProofs = {} + # Tracks the consistency proof till which the node has to catchup self.catchUpTill = None # Catchup replies that need to be applied to the ledger @@ -60,3 +70,23 @@ def __init__(self, # the catchup process even after the timer expires then it requests # missing transactions. self.catchupReplyTimer = None + + # Number of transactions caught up + self.num_txns_caught_up = 0 + + # noinspection PyAttributeOutsideInit + def done_syncing(self): + self.canSync = False + self.state = LedgerState.synced + self.ledgerStatusOk = set() + self.recvdConsistencyProofs = {} + self.postCatchupCompleteClbk() + self.catchupReplyTimer = None + if self.catchUpTill: + cp = self.catchUpTill + self.num_txns_caught_up = cp.seqNoEnd - cp.seqNoStart + self.catchUpTill = None + + @property + def ledger_summary(self): + return self.id, len(self.ledger), self.ledger.root_hash diff --git a/plenum/common/ledger_manager.py b/plenum/common/ledger_manager.py index 926c371f78..18c1ac4cec 100644 --- a/plenum/common/ledger_manager.py +++ b/plenum/common/ledger_manager.py @@ -12,14 +12,18 @@ from ledger.merkle_verifier import MerkleVerifier from ledger.util import F -from plenum.common.types import LedgerStatus, CatchupRep, \ - ConsistencyProof, f, CatchupReq, ConsProofRequest -from plenum.common.constants import POOL_LEDGER_ID, LedgerState, DOMAIN_LEDGER_ID -from plenum.common.util import getMaxFailures +from plenum.common.messages.node_messages import LedgerStatus, CatchupRep, \ + ConsistencyProof, f, CatchupReq +from plenum.common.constants import POOL_LEDGER_ID, LedgerState, DOMAIN_LEDGER_ID, \ + CONSISTENCY_PROOF +from plenum.common.util import compare_3PC_keys from plenum.common.config_util import getConfig +from plenum.server.quorums import Quorums from stp_core.common.log import getlogger from plenum.server.has_action_queue import HasActionQueue from plenum.common.ledger_info import LedgerInfo +from plenum.common.txn_util import reqToTxn + logger = getlogger() @@ -45,11 +49,11 @@ def __init__(self, # their info like callbacks, state, etc self.ledgerRegistry = {} # type: Dict[int, LedgerInfo] - # Largest Pre-Prepare sequence number received during catchup. + # Largest 3 phase key received during catchup. # This field is needed to discard any stashed 3PC messages or # ordered messages since the transactions part of those messages # will be applied when they are received through the catchup process - self.lastCaughtUpPpSeqNo = -1 + self.last_caught_up_3PC = (0, 0) def __repr__(self): return self.owner.name @@ -70,9 +74,8 @@ def addLedger(self, iD: int, ledger: Ledger, return self.ledgerRegistry[iD] = LedgerInfo( + iD, ledger=ledger, - state=LedgerState.not_synced, - canSync=False, preCatchupStartClbk=preCatchupStartClbk, postCatchupStartClbk=postCatchupStartClbk, preCatchupCompleteClbk=preCatchupCompleteClbk, @@ -81,36 +84,35 @@ def addLedger(self, iD: int, ledger: Ledger, verifier=MerkleVerifier(ledger.hasher) ) - def checkIfCPsNeeded(self, ledgerId): - # TODO: this one not just checks it also initiates - # consistency proof exchange process - # It should be renamed or splat on two different methods - + def request_CPs_if_needed(self, ledgerId): ledgerInfo = self.getLedgerInfoByType(ledgerId) - if ledgerInfo.consistencyProofsTimer is None: return logger.debug("{} requesting consistency " "proofs after timeout".format(self)) - adjustedF = getMaxFailures(self.owner.totalNodes - 1) + quorum = Quorums(self.owner.totalNodes - 1) proofs = ledgerInfo.recvdConsistencyProofs - groupedProofs, nullProofs = self._groupConsistencyProofs(proofs) - if nullProofs > adjustedF: + groupedProofs, null_proofs_count = self._groupConsistencyProofs(proofs) + if quorum.same_consistency_proof.is_reached(null_proofs_count): return result = self._latestReliableProof(groupedProofs, ledgerInfo.ledger) if not result: - cpReq = self.getConsistencyProofRequest(ledgerId, groupedProofs) + ledger_id, start, end = self.get_consistency_proof_request_params(ledgerId, groupedProofs) logger.debug("{} sending consistency proof request: {}". - format(self, cpReq)) - self.send(cpReq) + format(self, ledger_id, start, end)) + self.owner.request_msg(CONSISTENCY_PROOF, + {f.LEDGER_ID.nm: ledger_id, + f.SEQ_NO_START.nm: start, + f.SEQ_NO_END.nm: end}, + self.nodes_to_request_txns_from) + ledgerInfo.recvdConsistencyProofs = {} ledgerInfo.consistencyProofsTimer = None ledgerInfo.recvdCatchupRepliesFrm = {} - def checkIfTxnsNeeded(self, ledgerId): - + def request_txns_if_needed(self, ledgerId): ledgerInfo = self.ledgerRegistry.get(ledgerId) ledger = ledgerInfo.ledger if ledgerInfo.catchupReplyTimer is None: @@ -128,8 +130,9 @@ def checkIfTxnsNeeded(self, ledgerId): logger.debug("{} requesting {} missing transactions " "after timeout".format(self, totalMissing)) - eligibleNodes = list(self.nodestack.conns - - self.blacklistedNodes) + # eligibleNodes = list(self.nodestack.conns - + # self.blacklistedNodes) + eligibleNodes = self.nodes_to_request_txns_from if not eligibleNodes: # TODO: What if all nodes are blacklisted so `eligibleNodes` @@ -200,11 +203,12 @@ def addReqsForMissing(frm, to): numElgNodes = len(eligibleNodes) for i, req in enumerate(cReqs): nodeName = eligibleNodes[i%numElgNodes] - self.send(req, self.nodestack.getRemote(nodeName).uid) + # self.send(req, self.nodestack.getRemote(nodeName).uid) + self.sendTo(req, nodeName) ledgerInfo.catchupReplyTimer = time.perf_counter() timeout = int(self._getCatchupTimeout(len(cReqs), batchSize)) - self._schedule(partial(self.checkIfTxnsNeeded, ledgerId), timeout) + self._schedule(partial(self.request_txns_if_needed, ledgerId), timeout) def setLedgerState(self, ledgerType: int, state: LedgerState): if ledgerType not in self.ledgerRegistry: @@ -220,6 +224,10 @@ def setLedgerCanSync(self, ledgerType: int, canSync: bool): return self.getLedgerInfoByType(ledgerType).canSync = canSync + def prepare_ledgers_for_sync(self): + for ledger_info in self.ledgerRegistry.values(): + ledger_info.set_defaults() + def processLedgerStatus(self, status: LedgerStatus, frm: str): logger.debug("{} received ledger status: {} from {}". format(self, status, frm)) @@ -229,7 +237,7 @@ def processLedgerStatus(self, status: LedgerStatus, frm: str): if ledgerStatus.txnSeqNo < 0: self.discard(status, reason="Received negative sequence number " "from {}".format(frm), - logMethod=logger.warn) + logMethod=logger.warning) if not status: logger.debug("{} found ledger status to be null from {}". format(self, frm)) @@ -239,7 +247,7 @@ def processLedgerStatus(self, status: LedgerStatus, frm: str): # If this is a node's ledger manager and sender of this ledger status # is a client and its pool ledger is same as this node's pool ledger # then send the pool ledger status since client wont be receiving the - # consistency proof: + # consistency proof in this case: statusFromClient = self.getStack(frm) == self.clientstack if self.ownedByNode and statusFromClient: if ledgerId != POOL_LEDGER_ID: @@ -248,12 +256,8 @@ def processLedgerStatus(self, status: LedgerStatus, frm: str): .format(self, status, frm)) return if self.isLedgerSame(ledgerStatus): - ledgerInfo = self.getLedgerInfoByType(POOL_LEDGER_ID) - poolLedger = ledgerInfo.ledger - ledgerStatus = LedgerStatus(POOL_LEDGER_ID, - poolLedger.size, - poolLedger.root_hash) - self.sendTo(ledgerStatus, frm) + ledger_status = self.owner.build_ledger_status(POOL_LEDGER_ID) + self.sendTo(ledger_status, frm) # If a ledger is yet to sync and cannot sync right now, # then stash the ledger status to be processed later @@ -262,7 +266,7 @@ def processLedgerStatus(self, status: LedgerStatus, frm: str): self.stashLedgerStatus(ledgerId, status, frm) return - # If this manager is owned by a node and the node's ledger is ahead of + # If this manager is owned by a node and this node's ledger is ahead of # the received ledger status if self.ownedByNode and self.isLedgerNew(ledgerStatus): consistencyProof = self.getConsistencyProof(ledgerStatus) @@ -271,13 +275,11 @@ def processLedgerStatus(self, status: LedgerStatus, frm: str): self.sendTo(consistencyProof, frm) if self.isLedgerOld(ledgerStatus): - if ledgerInfo.state == LedgerState.synced: + # if ledgerInfo.state == LedgerState.synced: + if ledgerInfo.state != LedgerState.syncing: self.setLedgerCanSync(ledgerId, True) - ledger = self.getLedgerForMsg(ledgerStatus) - ledgerStatus = LedgerStatus(ledgerId, - ledger.size, - ledger.root_hash) - self.sendTo(ledgerStatus, frm) + ledger_status = self.owner.build_ledger_status(ledgerId) + self.sendTo(ledger_status, frm) return if statusFromClient: @@ -288,12 +290,24 @@ def processLedgerStatus(self, status: LedgerStatus, frm: str): # post sending this ledger status ledgerInfo.recvdConsistencyProofs[frm] = None ledgerInfo.ledgerStatusOk.add(frm) - if len(ledgerInfo.ledgerStatusOk) == 2 * self.owner.f: + quorum = Quorums(self.owner.totalNodes).ledger_status.value + if len(ledgerInfo.ledgerStatusOk) == quorum: logger.debug("{} found out from {} that its " "ledger of type {} is latest". format(self, ledgerInfo.ledgerStatusOk, ledgerId)) if ledgerInfo.state != LedgerState.synced: - self.catchupCompleted(ledgerId) + logger.debug('{} found from ledger status {} that it does ' + 'not need catchup'.format(self, ledgerStatus)) + # If this node's ledger is same as the ledger status (which is + # also the majority of the pool), then set the last ordered + # 3PC key + key = (ledgerStatus.viewNo, ledgerStatus.ppSeqNo) + if self.isLedgerSame(ledgerStatus) and key != (None, None): + # Any state cleaup that is part of pre-catchup should be done + self.do_pre_catchup(ledgerId) + self.catchupCompleted(ledgerId, key) + else: + self.catchupCompleted(ledgerId) def processConsistencyProof(self, proof: ConsistencyProof, frm: str): logger.debug("{} received consistency proof: {} from {}". @@ -334,7 +348,6 @@ def canProcessConsistencyProof(self, proof: ConsistencyProof) -> bool: self.owner.totalNodes, ledgerInfo.state, LedgerState.not_synced)) self.setLedgerState(ledgerId, LedgerState.not_synced) - self.preCatchupClbk(ledgerId) if ledgerId == DOMAIN_LEDGER_ID and ledgerInfo.preCatchupStartClbk: ledgerInfo.preCatchupStartClbk() return self.canProcessConsistencyProof(proof) @@ -347,50 +360,59 @@ def canProcessConsistencyProof(self, proof: ConsistencyProof) -> bool: self.discard(proof, reason="Start {} is greater than " "ledger size {}". format(start, ledgerSize), - logMethod=logger.warn) + logMethod=logger.warning) return False if end <= start: self.discard(proof, reason="End {} is not greater than " "start {}".format(end, start), - logMethod=logger.warn) + logMethod=logger.warning) return False return True def checkLedgerIsOutOfSync(self, ledgerInfo) -> bool: recvdConsProof = ledgerInfo.recvdConsistencyProofs - # Consider an f value when this node was not connected - currTotalNodes = self.owner.totalNodes - 1 - adjustedF = getMaxFailures(currTotalNodes) - filtered = self._getNotEmptyProofs(recvdConsProof) - return len(filtered) >= (currTotalNodes - adjustedF) + # Consider an f value when this node had not been added + adjustedQuorum = Quorums(self.owner.totalNodes - 1) + equal_state_proofs = self.__get_equal_state_proofs_count(recvdConsProof) + return not adjustedQuorum.same_consistency_proof.is_reached(equal_state_proofs) def processCatchupReq(self, req: CatchupReq, frm: str): logger.debug("{} received catchup request: {} from {}". format(self, req, frm)) if not self.ownedByNode: self.discard(req, reason="Only node can serve catchup requests", - logMethod=logger.warn) + logMethod=logger.warning) return start = getattr(req, f.SEQ_NO_START.nm) end = getattr(req, f.SEQ_NO_END.nm) ledger = self.getLedgerForMsg(req) if end < start: - self.discard(req, reason="Invalid range", logMethod=logger.warn) + self.discard(req, reason="Invalid range", logMethod=logger.warning) + return + + ledger_size = ledger.size + + if start > ledger_size: + self.discard(req, reason="{} not able to service since " + "ledger size is {} and start is {}" + .format(self, ledger_size, start), + logMethod=logger.debug) return - if start > ledger.size: + + if req.catchupTill > ledger_size: self.discard(req, reason="{} not able to service since " - "ledger size is {}" - .format(self, ledger.size), + "ledger size is {} and catchupTill is {}" + .format(self, ledger_size, req.catchupTill), logMethod=logger.debug) return # Adjusting for end greater than ledger size - if end > ledger.size: + if end > ledger_size: logger.debug("{} does not have transactions till {} " "so sending only till {}" - .format(self, end, ledger.size)) - end = ledger.size + .format(self, end, ledger_size)) + end = ledger_size logger.debug("node {} requested catchup for {} from {} to {}" .format(frm, end - start+1, start, end)) @@ -399,10 +421,10 @@ def processCatchupReq(self, req: CatchupReq, frm: str): consProof = [Ledger.hashToStr(p) for p in ledger.tree.consistency_proof(end, req.catchupTill)] - # TODO: This is very inefficient for long ledgers if the ledger does not use `ChunkedFileStore` - txns = ledger.getAllTxn(start, end) - for seq_no in txns: - txns[seq_no] = self.owner.update_txn_with_extra_data(txns[seq_no]) + + txns = {} + for seq_no, txn in ledger.getAllTxn(start, end): + txns[seq_no] = self.owner.update_txn_with_extra_data(txn) self.sendTo(msg=CatchupRep(getattr(req, f.LEDGER_ID.nm), txns, consProof), to=frm) @@ -418,39 +440,40 @@ def processCatchupRep(self, rep: CatchupRep, frm: str): return ledgerId = getattr(rep, f.LEDGER_ID.nm) - ledger = self.getLedgerInfoByType(ledgerId) - - reallyLedger = self.getLedgerForMsg(rep) - - if frm not in ledger.recvdCatchupRepliesFrm: - ledger.recvdCatchupRepliesFrm[frm] = [] - - ledger.recvdCatchupRepliesFrm[frm].append(rep) - - catchUpReplies = ledger.receivedCatchUpReplies - # Creating a list of txns sorted on the basis of sequence - # numbers - logger.debug("{} merging all received catchups".format(self)) - catchUpReplies = list(heapq.merge(catchUpReplies, txns, - key=operator.itemgetter(0))) - logger.debug( - "{} merged catchups, there are {} of them now, from {} to {}" - .format(self, len(catchUpReplies), catchUpReplies[0][0], - catchUpReplies[-1][0])) - - numProcessed = self._processCatchupReplies(ledgerId, reallyLedger, - catchUpReplies) - logger.debug( - "{} processed {} catchup replies with sequence numbers {}" - .format(self, numProcessed, [seqNo for seqNo, _ in - catchUpReplies[ - :numProcessed]])) - - ledger.receivedCatchUpReplies = catchUpReplies[numProcessed:] - if getattr(ledger.catchUpTill, f.SEQ_NO_END.nm) == reallyLedger.size: - cp = ledger.catchUpTill - ledger.catchUpTill = None - self.catchupCompleted(ledgerId, cp.ppSeqNo) + ledger_info = self.getLedgerInfoByType(ledgerId) + ledger = ledger_info.ledger + + if txns: + if frm not in ledger_info.recvdCatchupRepliesFrm: + ledger_info.recvdCatchupRepliesFrm[frm] = [] + + ledger_info.recvdCatchupRepliesFrm[frm].append(rep) + + catchUpReplies = ledger_info.receivedCatchUpReplies + # Creating a list of txns sorted on the basis of sequence + # numbers + logger.debug("{} merging all received catchups".format(self)) + catchUpReplies = list(heapq.merge(catchUpReplies, txns, + key=operator.itemgetter(0))) + logger.debug( + "{} merged catchups, there are {} of them now, from {} to {}" + .format(self, len(catchUpReplies), catchUpReplies[0][0], + catchUpReplies[-1][0])) + + numProcessed = self._processCatchupReplies(ledgerId, ledger, + catchUpReplies) + logger.debug( + "{} processed {} catchup replies with sequence numbers {}" + .format(self, numProcessed, [seqNo for seqNo, _ in + catchUpReplies[ + :numProcessed]])) + + ledger_info.receivedCatchUpReplies = catchUpReplies[numProcessed:] + + # This check needs to happen anyway since it might be the case that + # just before sending requests for catchup, it might have processed + # some ordered requests which might have removed the need for catchup + self.mark_catchup_completed_if_possible(ledger_info) def _processCatchupReplies(self, ledgerId, ledger: Ledger, catchUpReplies: List): @@ -471,7 +494,7 @@ def _processCatchupReplies(self, ledgerId, ledger: Ledger, if result: ledgerInfo = self.getLedgerInfoByType(ledgerId) for _, txn in catchUpReplies[:toBeProcessed]: - self._add_txn(ledgerId, ledger, ledgerInfo, txn) + self._add_txn(ledgerId, ledger, ledgerInfo, reqToTxn(txn)) self._removePrcdCatchupReply(ledgerId, nodeName, seqNo) return numProcessed + toBeProcessed + \ self._processCatchupReplies(ledgerId, ledger, @@ -505,10 +528,10 @@ def _removePrcdCatchupReply(self, ledgerId, node, seqNo): def _transform(self, txn): # Certain transactions other than pool ledger might need to be # transformed to certain format before applying to the ledger - if not self.ownedByNode: - return txn - else: - return self.owner.transform_txn_for_ledger(txn) + txn = reqToTxn(txn) + z = txn if not self.ownedByNode else \ + self.owner.transform_txn_for_ledger(txn) + return z def hasValidCatchupReplies(self, ledgerId, ledger, seqNo, catchUpReplies): # Here seqNo has to be the seqNo of first transaction of @@ -524,8 +547,9 @@ def hasValidCatchupReplies(self, ledgerId, ledger, seqNo, catchUpReplies): # Add only those transaction in the temporary tree from the above # batch - # Transfers of odcits in RAET converts integer keys to string - txns = [self._transform(txn) for s, txn in catchUpReplies[:len(txns)] + # Integer keys being converted to strings when marshaled to JSON + txns = [self._transform(txn) + for s, txn in catchUpReplies[:len(txns)] if str(s) in txns] # Creating a temporary tree which will be used to verify consistency @@ -568,25 +592,21 @@ def _getCatchupReplyForSeqNo(self, ledgerId, seqNo): if str(seqNo) in txns: return k, rep - def processConsistencyProofReq(self, req: ConsProofRequest, frm: str): - logger.debug("{} received consistency proof request: {} from {}". - format(self, req, frm)) - if not self.ownedByNode: - self.discard(req, - reason='Only nodes can service this request', - logMethod=logger.warning) - return - - ledgerId = getattr(req, f.LEDGER_ID.nm) - seqNoStart = getattr(req, f.SEQ_NO_START.nm) - seqNoEnd = getattr(req, f.SEQ_NO_END.nm) - consistencyProof = self._buildConsistencyProof(ledgerId, - seqNoStart, - seqNoEnd) - # TODO: Build a test for this scenario where a node cannot service a - # consistency proof request - if consistencyProof: - self.sendTo(consistencyProof, frm) + def mark_catchup_completed_if_possible(self, ledger_info: LedgerInfo): + """ + Checks if the ledger is caught up to the the sequence number + specified in the ConsistencyProof, if yes then mark the catchup as + done for this ledger. + :param ledger_info: + :return: True if catchup is done, false otherwise + """ + if ledger_info.state != LedgerState.synced: + cp = ledger_info.catchUpTill + assert cp + if getattr(cp, f.SEQ_NO_END.nm) <= ledger_info.ledger.size: + self.catchupCompleted(ledger_info.id, (cp.viewNo, cp.ppSeqNo)) + return True + return False def canProcessCatchupReply(self, catchupReply: CatchupRep) -> List[Tuple]: ledgerId = getattr(catchupReply, f.LEDGER_ID.nm) @@ -626,25 +646,25 @@ def canStartCatchUpProcess(self, ledgerId: int): ledgerInfo = self.getLedgerInfoByType(ledgerId) recvdConsProof = ledgerInfo.recvdConsistencyProofs # Consider an f value when this node was not connected - adjustedF = getMaxFailures(self.owner.totalNodes - 1) - if len(recvdConsProof) == (adjustedF+1): + adjustedQuorum = Quorums(self.owner.totalNodes - 1) + if len(recvdConsProof) == adjustedQuorum.f + 1: # At least once correct node believes that this node is behind. # Start timer that will expire in some time and if till that time # enough CPs are not received, then explicitly request CPs - # from other nodes, see `checkIfCPsNeeded` + # from other nodes, see `request_CPs_if_needed` ledgerInfo.consistencyProofsTimer = time.perf_counter() - self._schedule(partial(self.checkIfCPsNeeded, ledgerId), + self._schedule(partial(self.request_CPs_if_needed, ledgerId), self.config.ConsistencyProofsTimeout * ( self.owner.totalNodes - 1)) - if len(recvdConsProof) > 2 * adjustedF: + if adjustedQuorum.consistency_proof.is_reached(len(recvdConsProof)): logger.debug("{} deciding on the basis of CPs {} and f {}". - format(self, recvdConsProof, adjustedF)) - grpdPrf, nullProofs = self._groupConsistencyProofs(recvdConsProof) + format(self, recvdConsProof, adjustedQuorum.f)) + grpdPrf, null_proofs_count = self._groupConsistencyProofs(recvdConsProof) # If more than f nodes were found to be at the same state then this # node's state is good too - if nullProofs > adjustedF: + if adjustedQuorum.same_consistency_proof.is_reached(null_proofs_count): return True, None result = self._latestReliableProof(grpdPrf, ledgerInfo.ledger) @@ -653,7 +673,7 @@ def canStartCatchUpProcess(self, ledgerId: int): logger.debug("{} cannot start catchup since received only {} " "consistency proofs but need at least {}". - format(self, len(recvdConsProof), 2*adjustedF + 1)) + format(self, len(recvdConsProof), adjustedQuorum.consistency_proof.value)) return False, None def _groupConsistencyProofs(self, proofs): @@ -667,10 +687,13 @@ def _groupConsistencyProofs(self, proofs): getattr(proof, f.SEQ_NO_END.nm) if (start, end) not in recvdPrf: recvdPrf[(start, end)] = {} - key = (getattr(proof, f.PP_SEQ_NO.nm), + key = ( + getattr(proof, f.VIEW_NO.nm), + getattr(proof, f.PP_SEQ_NO.nm), getattr(proof, f.OLD_MERKLE_ROOT.nm), getattr(proof, f.NEW_MERKLE_ROOT.nm), - tuple(getattr(proof, f.HASHES.nm))) + tuple(getattr(proof, f.HASHES.nm)) + ) recvdPrf[(start, end)][key] = recvdPrf[(start, end)]. \ get(key, 0) + 1 else: @@ -680,13 +703,13 @@ def _groupConsistencyProofs(self, proofs): return recvdPrf, nullProofs def _reliableProofs(self, groupedProofs): - adjustedF = getMaxFailures(self.owner.totalNodes - 1) + adjustedQuorum = Quorums(self.owner.totalNodes - 1) result = {} for (start, end), val in groupedProofs.items(): - for (lastPpSeqNo, oldRoot, newRoot, hashes), count in val.items(): - if count > adjustedF: - result[(start, end)] = (lastPpSeqNo, oldRoot, newRoot, - hashes) + for (view_no, lastPpSeqNo, oldRoot, newRoot, hashes), count in val.items(): + if adjustedQuorum.same_consistency_proof.is_reached(count): + result[(start, end)] = (view_no, lastPpSeqNo, oldRoot, + newRoot, hashes) # There would be only one correct proof for a range of # sequence numbers break @@ -695,7 +718,7 @@ def _reliableProofs(self, groupedProofs): def _latestReliableProof(self, groupedProofs, ledger): reliableProofs = self._reliableProofs(groupedProofs) latest = None - for (start, end), (lastPpSeqNo, oldRoot, newRoot, hashes) in \ + for (start, end), (view_no, last_pp_seq_no, oldRoot, newRoot, hashes) in \ reliableProofs.items(): # TODO: Can we do something where consistency proof's start is older # than the current ledger's size and proof's end is larger @@ -703,13 +726,11 @@ def _latestReliableProof(self, groupedProofs, ledger): # Ignore if proof's start is not the same as the ledger's end if start != ledger.size: continue - if latest is None: - latest = (start, end) + (lastPpSeqNo, oldRoot, newRoot, hashes) - elif latest[1] < end: - latest = (start, end) + (lastPpSeqNo, oldRoot, newRoot, hashes) + if latest is None or latest[1] < end: + latest = (start, end) + (view_no, last_pp_seq_no, oldRoot, newRoot, hashes) return latest - def getConsistencyProofRequest(self, ledgerId, groupedProofs): + def get_consistency_proof_request_params(self, ledgerId, groupedProofs): # Choose the consistency proof which occurs median number of times in # grouped proofs. Not choosing the highest since some malicious nodes # might be sending non-existent sequence numbers and not choosing the @@ -717,67 +738,75 @@ def getConsistencyProofRequest(self, ledgerId, groupedProofs): # behind a lot or some malicious nodes might send low sequence numbers. proofs = sorted(groupedProofs.items(), key=lambda t: max(t[1].values())) ledger = self.getLedgerInfoByType(ledgerId).ledger - return ConsProofRequest(ledgerId, - ledger.size, - proofs[len(proofs) // 2][0][1]) + return ledgerId, ledger.size, proofs[len(proofs) // 2][0][1] + + def do_pre_catchup(self, ledger_id): + if self.preCatchupClbk: + self.preCatchupClbk(ledger_id) def startCatchUpProcess(self, ledgerId: int, proof: ConsistencyProof): - logger.debug("{} started catching up with consistency proof {}". - format(self, proof)) if ledgerId not in self.ledgerRegistry: self.discard(proof, reason="Unknown ledger type {}". format(ledgerId)) return + self.do_pre_catchup(ledgerId) + logger.debug("{} started catching up with consistency proof {}". + format(self, proof)) + if proof is None: self.catchupCompleted(ledgerId) return - + ledgerInfo = self.getLedgerInfoByType(ledgerId) ledgerInfo.state = LedgerState.syncing ledgerInfo.consistencyProofsTimer = None ledgerInfo.recvdConsistencyProofs = {} p = ConsistencyProof(*proof) - rids = [self.nodestack.getRemote(nm).uid for nm in - self.nodestack.conns] - reqs = self.getCatchupReqs(p) - for req in zip(reqs, rids): - self.send(*req) ledgerInfo.catchUpTill = p - if reqs: - ledgerInfo.catchupReplyTimer = time.perf_counter() - batchSize = getattr(reqs[0], f.SEQ_NO_END.nm) - \ - getattr(reqs[0], f.SEQ_NO_START.nm) + 1 - timeout = self._getCatchupTimeout(len(reqs), batchSize) - self._schedule(partial(self.checkIfTxnsNeeded, ledgerId), - timeout) + + if self.mark_catchup_completed_if_possible(ledgerInfo): + logger.debug('{} found that ledger {} does not need catchup'. + format(self, ledgerId)) + else: + eligible_nodes = self.nodes_to_request_txns_from + if eligible_nodes: + reqs = self.getCatchupReqs(p) + for (req, to) in zip(reqs, eligible_nodes): + self.sendTo(req, to) + if reqs: + ledgerInfo.catchupReplyTimer = time.perf_counter() + batchSize = getattr(reqs[0], f.SEQ_NO_END.nm) - \ + getattr(reqs[0], f.SEQ_NO_START.nm) + 1 + timeout = self._getCatchupTimeout(len(reqs), batchSize) + self._schedule(partial(self.request_txns_if_needed, ledgerId), + timeout) + else: + logger.info('{} needs to catchup ledger {} but it has not found' + ' any connected nodes'.format(self, ledgerId)) def _getCatchupTimeout(self, numRequest, batchSize): return numRequest * (self.config.CatchupTransactionsTimeout + 0.1 * batchSize) - def catchupCompleted(self, ledgerId: int, lastPpSeqNo: int=-1): + def catchupCompleted(self, ledgerId: int, last_3PC: Tuple=(0,0)): # Since multiple ledger will be caught up and catchups might happen # multiple times for a single ledger, the largest seen # ppSeqNo needs to be known. - if self.lastCaughtUpPpSeqNo < lastPpSeqNo: - self.lastCaughtUpPpSeqNo = lastPpSeqNo + if compare_3PC_keys(self.last_caught_up_3PC, last_3PC) > 0: + self.last_caught_up_3PC = last_3PC - ledgerInfo = self.getLedgerInfoByType(ledgerId) - ledgerInfo.catchupReplyTimer = None - logger.debug("{} completed catching up ledger {}" - .format(self, ledgerId)) if ledgerId not in self.ledgerRegistry: logger.error("{} called catchup completed for ledger {}". format(self, ledgerId)) return - ledgerInfo.canSync = False - ledgerInfo.state = LedgerState.synced - ledgerInfo.ledgerStatusOk = set() - ledgerInfo.recvdConsistencyProofs = {} - ledgerInfo.postCatchupCompleteClbk() + ledgerInfo = self.getLedgerInfoByType(ledgerId) + ledgerInfo.done_syncing() + logger.info("{} completed catching up ledger {}, caught up {} in total". + format(self, ledgerId, ledgerInfo.num_txns_caught_up), + extra={'cli': True}) if self.postAllLedgersCaughtUp: if all(l.state == LedgerState.synced @@ -785,7 +814,25 @@ def catchupCompleted(self, ledgerId: int, lastPpSeqNo: int=-1): self.postAllLedgersCaughtUp() def getCatchupReqs(self, consProof: ConsistencyProof): - nodeCount = len(self.nodestack.conns) + # TODO: This needs to be optimised, there needs to be a minimum size + # of catchup requests so if a node is trying to catchup only 50 txns + # from 10 nodes, each of thise 10 nodes will servce 5 txns and prepare + # a consistency proof for other txns. This is bad for the node catching + # up as it involves more network traffic and more computation to verify + # so many consistency proofs and for the node serving catchup reqs. But + # if the node sent only 2 catchup requests the network traffic greatly + # reduces and 25 txns can be read of a single chunk probably + # (if txns dont span across multiple chunks). A practical value of this + # "minimum size" is some multiple of chunk size of the ledger + # nodeCount = len(self.nodestack.conns) + nodeCount = len(self.nodes_to_request_txns_from) + if nodeCount == 0: + logger.debug('{} did not find any connected to nodes to send ' + 'CatchupReq'.format(self)) + return + # TODO: Consider setting start to `max(ledger.size, consProof.start)` + # since ordered requests might have been executed after receiving + # sufficient ConsProof in `preCatchupClbk` start = getattr(consProof, f.SEQ_NO_START.nm) end = getattr(consProof, f.SEQ_NO_END.nm) batchLength = math.ceil((end-start)/nodeCount) @@ -808,8 +855,8 @@ def getConsistencyProof(self, status: LedgerStatus): seqNoEnd = ledger.size return self._buildConsistencyProof(ledgerId, seqNoStart, seqNoEnd) - def _getNotEmptyProofs(self, proofs): - return [proof for frm, proof in proofs.items() if proof] + def __get_equal_state_proofs_count(self, proofs): + return sum(1 for frm, proof in proofs.items() if not proof) def _buildConsistencyProof(self, ledgerId, seqNoStart, seqNoEnd): @@ -842,14 +889,20 @@ def _buildConsistencyProof(self, ledgerId, seqNoStart, seqNoEnd): oldRoot = ledger.tree.merkle_tree_hash(0, seqNoStart) newRoot = ledger.tree.merkle_tree_hash(0, seqNoEnd) - ppSeqNo = self.owner.ppSeqNoForTxnSeqNo(ledgerId, seqNoEnd) - logger.debug('{} found ppSeqNo {} for ledger {} seqNo {}'. - format(self, ppSeqNo, ledgerId, seqNoEnd)) + key = self.owner.three_phase_key_for_txn_seq_no(ledgerId, seqNoEnd) + logger.debug('{} found 3 phase key {} for ledger {} seqNo {}'. + format(self, key, ledgerId, seqNoEnd)) + if key is None: + # The node receiving consistency proof should check if it has + # received this sentinel 3 phase key (0, 0) in spite of seeing a + # non-zero txn seq no + key = (0, 0) + return ConsistencyProof( ledgerId, seqNoStart, seqNoEnd, - ppSeqNo, + *key, Ledger.hashToStr(oldRoot), Ledger.hashToStr(newRoot), [Ledger.hashToStr(p) for p in proof] @@ -865,18 +918,21 @@ def _compareLedger(self, status: LedgerStatus): return ledger.seqNo - seqNo def isLedgerOld(self, status: LedgerStatus): + # Is self ledger older than the `LedgerStatus` return self._compareLedger(status) < 0 def isLedgerNew(self, status: LedgerStatus): + # Is self ledger newer than the `LedgerStatus` return self._compareLedger(status) > 0 def isLedgerSame(self, status: LedgerStatus): + # Is self ledger same as the `LedgerStatus` return self._compareLedger(status) == 0 def getLedgerForMsg(self, msg: Any) -> Ledger: - ledgerType = getattr(msg, f.LEDGER_ID.nm) - if ledgerType in self.ledgerRegistry: - return self.getLedgerInfoByType(ledgerType).ledger + ledger_id = getattr(msg, f.LEDGER_ID.nm) + if ledger_id in self.ledgerRegistry: + return self.getLedgerInfoByType(ledger_id).ledger self.discard(msg, reason="Invalid ledger msg type") def getLedgerInfoByType(self, ledgerType) -> LedgerInfo: @@ -897,11 +953,15 @@ def stashLedgerStatus(self, ledgerId: int, status, frm: str): def processStashedLedgerStatuses(self, ledgerId: int): ledgerInfo = self.getLedgerInfoByType(ledgerId) i = 0 - while ledgerInfo.stashedLedgerStatuses: - msg, frm = ledgerInfo.stashedLedgerStatuses.pop() + max_iter = len(ledgerInfo.stashedLedgerStatuses) + logger.debug('{} going to process {} stashed ledger statuses for ledger' + ' {}'.format(self, max_iter, ledgerId)) + # Since `processLedgerStatus` can stash some ledger statuses, make sure + # each item in `ledgerInfo.stashedLedgerStatuses` is processed only once + while max_iter != i: + msg, frm = ledgerInfo.stashedLedgerStatuses.popleft() i += 1 self.processLedgerStatus(msg, frm) - logger.debug("{} processed {} stashed ledger statuses".format(self, i)) return i def getStack(self, remoteName: str): @@ -919,15 +979,12 @@ def sendTo(self, msg: Any, to: str): # If the message is being sent by a node if self.ownedByNode: if stack == self.nodestack: - rid = self.nodestack.getRemote(to).uid - self.send(msg, rid) + self.sendToNodes(msg, [to,]) if stack == self.clientstack: self.owner.transmitToClient(msg, to) # If the message is being sent by a client else: - rid = self.nodestack.getRemote(to).uid - signer = self.owner.fetchSigner(self.owner.defaultIdentifier) - self.nodestack.send(msg, rid, signer=signer) + self.sendToNodes(msg, [to,]) @property def nodestack(self): @@ -941,6 +998,14 @@ def clientstack(self): def send(self): return self.owner.send + @property + def send(self): + return self.owner.send + + @property + def sendToNodes(self): + return self.owner.sendToNodes + @property def discard(self): return self.owner.discard @@ -950,3 +1015,8 @@ def blacklistedNodes(self): if self.ownedByNode: return self.owner.blacklistedNodes return set() + + @property + def nodes_to_request_txns_from(self): + return [nm for nm in self.nodestack.registry + if nm not in self.blacklistedNodes and nm != self.nodestack.name] diff --git a/plenum/common/message_processor.py b/plenum/common/message_processor.py index 7ccea9bc55..12a5a9b080 100644 --- a/plenum/common/message_processor.py +++ b/plenum/common/message_processor.py @@ -2,7 +2,6 @@ from typing import Dict from plenum.common.request import Request -from plenum.common.types import TaggedTupleBase from stp_core.crypto.signer import Signer @@ -37,9 +36,7 @@ def toDict(self, msg: Dict) -> Dict: for transmission """ - if isinstance(msg, TaggedTupleBase): - tmsg = msg.melted() - elif isinstance(msg, Request): + if isinstance(msg, Request): tmsg = msg.as_dict elif hasattr(msg, "_asdict"): tmsg = dict(msg._asdict()) diff --git a/plenum/common/messages/client_request.py b/plenum/common/messages/client_request.py index 7d42d2b48e..5bfa685eb9 100644 --- a/plenum/common/messages/client_request.py +++ b/plenum/common/messages/client_request.py @@ -1,6 +1,7 @@ from plenum.common.constants import * from plenum.common.messages.fields import * from plenum.common.messages.message_base import MessageValidator +from plenum.common.types import OPERATION, f class ClientNodeOperationData(MessageValidator): @@ -40,14 +41,26 @@ class ClientNYMOperation(MessageValidator): # TODO: validate role using ChooseField, # do roles list expandable form outer context ) + schema_is_strict = False + + +class ClientGetTxnOperation(MessageValidator): + schema = ( + (TXN_TYPE, ConstantField(GET_TXN)), + (DATA, TxnSeqNoField()), + ) class ClientOperationField(MessageValidator): - operations = { - NODE: ClientNodeOperation(), - NYM: ClientNYMOperation(), - } + def __init__(self, *args, **kwargs): + strict = kwargs.get("schema_is_strict", True) + self.operations = { + NODE: ClientNodeOperation(schema_is_strict=strict), + NYM: ClientNYMOperation(schema_is_strict=strict), + GET_TXN: ClientGetTxnOperation(schema_is_strict=strict), + } + super().__init__(*args, **kwargs) def validate(self, dct): """ @@ -66,3 +79,28 @@ def validate(self, dct): # check only if the schema is defined op = self.operations[schema_type] op.validate(dct) + + +class ClientMessageValidator(MessageValidator): + + def __init__(self, operation_schema_is_strict, *args, **kwargs): + super().__init__(*args, **kwargs) + # Following code is for support of non-strict schema + # TODO: refactor this + # TODO: this (and all related functionality) can be removed when + # when fixed problem with transaction serialization (INDY-338) + strict = operation_schema_is_strict + if not strict: + operation_field_index = 2 + op = ClientOperationField(schema_is_strict=False) + schema = list(self.schema) + schema[operation_field_index] = (OPERATION, op) + self.schema = tuple(schema) + + schema = ( + (f.IDENTIFIER.nm, IdentifierField()), + (f.REQ_ID.nm, NonNegativeNumberField()), + (OPERATION, ClientOperationField()), + (f.SIG.nm, SignatureField(optional=True)), + (f.DIGEST.nm, NonEmptyStringField(optional=True)), + ) \ No newline at end of file diff --git a/plenum/common/messages/fields.py b/plenum/common/messages/fields.py index bc82d75e0f..b14d1a27a0 100644 --- a/plenum/common/messages/fields.py +++ b/plenum/common/messages/fields.py @@ -1,24 +1,50 @@ import ipaddress import json import base58 +import re from plenum.common.constants import DOMAIN_LEDGER_ID, POOL_LEDGER_ID +from abc import ABCMeta, abstractmethod -class FieldValidator: +class FieldValidator(metaclass=ABCMeta): + """" + Interface for field validators + """ + optional = False + + @abstractmethod def validate(self, val): - raise NotImplementedError + """ + Validates field value + + :param val: field value to validate + :return: error message or None + """ + +class FieldBase(FieldValidator, metaclass=ABCMeta): + """ + Base class for field validators + """ -class FieldBase(FieldValidator): _base_types = () def __init__(self, optional=False, nullable=False): self.optional = optional self.nullable = nullable + # TODO: `validate` should be renamed to `validation_error` def validate(self, val): + """ + Performs basic validation of field value and then passes it for + specific validation. + + :param val: field value to validate + :return: error message or None + """ + if self.nullable and val is None: return type_er = self.__type_check(val) @@ -29,8 +55,15 @@ def validate(self, val): if spec_err: return spec_err + @abstractmethod def _specific_validation(self, val): - raise NotImplementedError + """ + Performs specific validation of field. Should be implemented in + subclasses. Use it instead of overriding 'validate'. + + :param val: field value to validate + :return: error message or None + """ def __type_check(self, val): if self._base_types is None: @@ -46,6 +79,23 @@ def _wrong_type_msg(self, val): "".format(types_str, type(val).__name__) +# TODO: The fields below should be singleton. + + +class AnyField(FieldBase): + _base_types = (object,) + + def _specific_validation(self, _): + return + + +class BooleanField(FieldBase): + _base_types = (bool,) + + def _specific_validation(self, val): + return + + class NonEmptyStringField(FieldBase): _base_types = (str,) @@ -54,6 +104,22 @@ def _specific_validation(self, val): return 'empty string' +class LimitedLengthStringField(FieldBase): + _base_types = (str,) + + def __init__(self, max_length: int, **kwargs): + assert max_length > 0, 'should be greater than 0' + super().__init__(**kwargs) + self._max_length = max_length + + def _specific_validation(self, val): + if not val: + return 'empty string' + if len(val) > self._max_length: + val = val[:100] + ('...' if len(val) > 100 else '') + return '{} is longer than {} symbols'.format(val, self._max_length) + + class SignatureField(FieldBase): _base_types = (str, type(None)) # TODO do nothing because EmptySignature should be raised somehow @@ -112,22 +178,31 @@ def _specific_validation(self, val): class MapField(FieldBase): _base_types = (dict, ) - def __init__(self, key_field: FieldBase, value_field: FieldBase, + def __init__(self, key_field: FieldValidator, + value_field: FieldValidator, **kwargs): super().__init__(**kwargs) - self._key_field = key_field - self._value_field = value_field + self.key_field = key_field + self.value_field = value_field def _specific_validation(self, val): for k, v in val.items(): - key_error = self._key_field.validate(k) + key_error = self.key_field.validate(k) if key_error: return key_error - val_error = self._value_field.validate(v) + val_error = self.value_field.validate(v) if val_error: return val_error +class AnyMapField(FieldBase): + # A map where key and value can be of arbitrary types + _base_types = (dict,) + + def _specific_validation(self, _): + return + + class NetworkPortField(FieldBase): _base_types = (int,) @@ -163,6 +238,23 @@ def _specific_validation(self, val): .format(', '.join(map(str, self._possible_values)), val) +class MessageField(FieldBase): + _base_types = None + + def __init__(self, message_type, **kwargs): + self._message_type = message_type + super().__init__(**kwargs) + + def _specific_validation(self, val): + if isinstance(val, self._message_type): + return + try: + self._message_type(**val) + except TypeError as ex: + return "value {} cannot be represented as {} due to: {}"\ + .format(val, self._message_type.typename, ex) + + class LedgerIdField(ChooseField): _base_types = (int,) ledger_ids = (POOL_LEDGER_ID, DOMAIN_LEDGER_ID) @@ -174,30 +266,25 @@ def __init__(self, **kwargs): class Base58Field(FieldBase): _base_types = (str,) - #long id is 32 bye long; short is 16 bytes long; - #upper limit is calculated according to formula - #for the max length of encoded data - #ceil(n * 138 / 100 + 1) - #lower formula is based on data from field - def __init__(self, short=False, long=False, *args, **kwargs): + def __init__(self, byte_lengths=None, *args, **kwargs): super().__init__(*args, **kwargs) self._alphabet = set(base58.alphabet) - self._lengthLimits = [] - if short: - self._lengthLimits.append(range(15, 26)) - if long: - self._lengthLimits.append(range(43, 46)) + self.byte_lengths = byte_lengths def _specific_validation(self, val): - if self._lengthLimits: - inlen = len(val) - goodlen = any(inlen in r for r in self._lengthLimits) - if not goodlen: - return 'value length {} is not in ranges {}'\ - .format(inlen, self._lengthLimits) - if set(val) - self._alphabet: - return 'should not contains chars other than {}' \ - .format(self._alphabet) + invalid_chars = set(val) - self._alphabet + if invalid_chars: + # only 10 chars to shorten the output + to_print = sorted(invalid_chars)[:10] + return 'should not contain the following chars {}{}' \ + .format(to_print, + ' (truncated)' if len(to_print) < len(invalid_chars) else '') + if self.byte_lengths is not None: + # TODO could impact performace, need to check + b58len = len(base58.b58decode(val)) + if b58len not in self.byte_lengths: + return 'b58 decoded value length {} should be one of {}' \ + .format(b58len, list(self.byte_lengths)) class IdentifierField(Base58Field): @@ -207,7 +294,7 @@ def __init__(self, *args, **kwargs): # TODO the tests in client are failing because the field # can be short and long both. It is can be an error. # We have to double check the type of the field. - super().__init__(short=True, long=True, *args, **kwargs) + super().__init__(byte_lengths=(16, 32), *args, **kwargs) class DestNodeField(Base58Field): @@ -217,7 +304,7 @@ def __init__(self, *args, **kwargs): # TODO the tests in client are failing because the field # can be short and long both. It is can be an error. # We have to double check the type of the field. - super().__init__(short=True, long=True, *args, **kwargs) + super().__init__(byte_lengths=(16, 32), *args, **kwargs) class DestNymField(Base58Field): @@ -227,7 +314,7 @@ def __init__(self, *args, **kwargs): # TODO the tests in client are failing because the field # can be short and long both. It is can be an error. # We have to double check the type of the field. - super().__init__(short=True, long=True, *args, **kwargs) + super().__init__(byte_lengths=(16, 32), *args, **kwargs) class RequestIdentifierField(FieldBase): @@ -263,18 +350,15 @@ def _specific_validation(self, val): # TODO: think about making it a subclass of Base58Field class VerkeyField(FieldBase): _base_types = (str, ) - _b58short = Base58Field(short=True) - _b58long = Base58Field(long=True) + _b58abbreviated = Base58Field(byte_lengths=(16,)) + _b58full = Base58Field(byte_lengths=(32,)) def _specific_validation(self, val): - vk_error = NonEmptyStringField().validate(val) - if vk_error: - return vk_error if val.startswith('~'): - #short base58 - return self._b58short.validate(val[1:]) - #long base58 - return self._b58long.validate(val) + #abbreviated base58 + return self._b58abbreviated.validate(val[1:]) + #full base58 + return self._b58full.validate(val) class HexField(FieldBase): @@ -297,28 +381,114 @@ class MerkleRootField(Base58Field): _base_types = (str, ) def __init__(self, *args, **kwargs): - super().__init__(long=True, *args, **kwargs) + super().__init__(byte_lengths=(32,), *args, **kwargs) class TimestampField(FieldBase): - _base_types = (float, int) + _base_types = (int,) + _oldest_time = 1499906902 def _specific_validation(self, val): - normal_val = val - if isinstance(val, int): - # This is needed because timestamp is usually multiplied - # by 1000 to "make it compatible to JavaScript Date()" - normal_val /= 1000 - if normal_val <= 0: - return 'should be a positive number but was {}'.format(val) + if val < self._oldest_time: + return 'should be greater than {} but was {}'.\ + format(self._oldest_time, val) class JsonField(FieldBase): _base_types = (str,) def _specific_validation(self, val): - # TODO: Need a mechanism to ensure a non-empty JSON if needed. + # TODO: Need a mechanism to ensure a non-empty JSON if needed try: json.loads(val) except json.decoder.JSONDecodeError: return 'should be a valid JSON string' + + +class SerializedValueField(FieldBase): + _base_types = (bytes, str) + + def _specific_validation(self, val): + if not val: + return 'empty serialized value' + + +class VersionField(FieldBase): + _base_types = (str,) + + def __init__(self, components_number=(3,), **kwargs): + super().__init__(**kwargs) + self._comp_num = components_number + + def _specific_validation(self, val): + parts = val.split(".") + if len(parts) not in self._comp_num: + return "version consists of {} components, but it should contain {}".format(len(parts), self._comp_num) + for p in parts: + if not p.isdigit(): + return "version component should contain only digits" + return None + + +class TxnSeqNoField(FieldBase): + + _base_types = (int,) + + def _specific_validation(self, val): + if val < 1: + return 'cannot be smaller than 1' + + +class Sha256HexField(FieldBase): + """ + Validates a sha-256 hash specified in hex + """ + _base_types = (str,) + regex = re.compile('^[A-Fa-f0-9]{64}$') + + def _specific_validation(self, val): + if self.regex.match(val) is None: + return 'not a valid hash (needs to be in hex too)' + + +class AnyValueField(FieldBase): + """ + Stub field validator + """ + _base_types = None + + def _specific_validation(self, val): + pass + + +class StringifiedNonNegativeNumberField(NonNegativeNumberField): + """ + This validator is needed because of json limitations: in some cases + numbers being converted to strings. + """ + # TODO: Probably this should be solved another way + + _base_types = (str, int) + _num_validator = NonNegativeNumberField() + + def _specific_validation(self, val): + try: + return self._num_validator.validate(int(val)) + except ValueError: + return "stringified int expected, but was '{}'"\ + .format(val) + + +class LedgerInfoField(FieldBase): + _base_types = (list, tuple) + _ledger_id_class = LedgerIdField + + def _specific_validation(self, val): + assert len(val) == 3 + ledgerId, ledgerLength, merkleRoot = val + for validator, value in ((self._ledger_id_class().validate, ledgerId), + (NonNegativeNumberField().validate, ledgerLength), + (MerkleRootField().validate, merkleRoot)): + err = validator(value) + if err: + return err diff --git a/plenum/common/messages/message_base.py b/plenum/common/messages/message_base.py index bc67fac0a5..4ea4932f2a 100644 --- a/plenum/common/messages/message_base.py +++ b/plenum/common/messages/message_base.py @@ -6,13 +6,18 @@ from plenum.common.constants import OP_FIELD_NAME from plenum.common.messages.fields import FieldValidator + class MessageValidator(FieldValidator): # the schema has to be an ordered iterable because the message class # can be create with positional arguments __init__(*args) + schema = () optional = False + def __init__(self, schema_is_strict=True): + self.schema_is_strict = schema_is_strict + def validate(self, dct): self._validate_fields_with_schema(dct, self.schema) self._validate_message(dct) @@ -28,33 +33,41 @@ def _validate_fields_with_schema(self, dct, schema): self._raise_missed_fields(*missed_required_fields) for k, v in dct.items(): if k not in schema_dct: - self._raise_unknown_fields(k, v) - validation_error = schema_dct[k].validate(v) - if validation_error: - self._raise_invalid_fields(k, v, validation_error) - - - def _raise_invalid_type(self, dct): - raise TypeError("validation error: invalid type {}, dict expected" - .format(type(dct))) + if self.schema_is_strict: + self._raise_unknown_fields(k, v) + else: + validation_error = schema_dct[k].validate(v) + if validation_error: + self._raise_invalid_fields(k, v, validation_error) def _validate_message(self, dct): return None + def _raise_invalid_type(self, dct): + raise TypeError("{} invalid type {}, dict expected" + .format(self.__error_msg_prefix, type(dct))) + def _raise_missed_fields(self, *fields): - raise TypeError("validation error: missed fields " - "{}".format(', '.join(map(str, fields)))) + raise TypeError("{} missed fields - {}" + .format(self.__error_msg_prefix, + ', '.join(map(str, fields)))) def _raise_unknown_fields(self, field, value): - raise TypeError("validation error: unknown field " - "({}={})".format(field, value)) + raise TypeError("{} unknown field - " + "{}={}".format(self.__error_msg_prefix, + field, value)) def _raise_invalid_fields(self, field, value, reason): - raise TypeError("validation error: {} " - "({}={})".format(reason, field, value)) + raise TypeError("{} {} " + "({}={})".format(self.__error_msg_prefix, reason, + field, value)) def _raise_invalid_message(self, reason): - raise TypeError("validation error: {}".format(reason)) + raise TypeError("{} {}".format(self.__error_msg_prefix, reason)) + + @property + def __error_msg_prefix(self): + return 'validation error [{}]:'.format(self.__class__.__name__) class MessageBase(Mapping, MessageValidator): @@ -95,9 +108,6 @@ def __getitem__(self, key): raise TypeError("Invalid argument type.") def _asdict(self): - """ - Legacy form TaggedTuple - """ return self.__dict__ @property @@ -132,7 +142,19 @@ def values(self): def __str__(self): return "{}{}".format(self.typename, dict(self.items())) + def __repr__(self): + return self.__str__() + def __eq__(self, other): if not issubclass(other.__class__, self.__class__): return False return self._asdict() == other._asdict() + + def __hash__(self): + h = 1 + for index, value in enumerate(list(self.__iter__())): + h = h * (index + 1) * (hash(value) + 1) + return h + + def __dir__(self): + return self.keys() diff --git a/plenum/common/messages/node_message_factory.py b/plenum/common/messages/node_message_factory.py new file mode 100644 index 0000000000..6a8e2d24dc --- /dev/null +++ b/plenum/common/messages/node_message_factory.py @@ -0,0 +1,112 @@ +import sys +from importlib import import_module + +from plenum.common.constants import OP_FIELD_NAME +from plenum.common.exceptions import MissingNodeOp, InvalidNodeOp +from plenum.common.messages.fields import IterableField, MapField +from plenum.common.messages.message_base import MessageBase + + +class MessageFactory: + + def __init__(self, class_module_name): + classes_module = self.__load_module_by_name(class_module_name) + self.__classes = self.__get_message_classes(classes_module) + assert len(self.__classes) > 0, "at least one message class loaded" + + @classmethod + def __load_module_by_name(cls, module_name): + the_module = cls.__get_module_by_name(module_name) + if the_module is not None: + return the_module + + import_module(module_name) # can raise ImportError + the_module = cls.__get_module_by_name(module_name) + return the_module + + @classmethod + def __get_message_classes(cls, classes_module): + classes = {} + for x in dir(classes_module): + obj = getattr(classes_module, x) + doesnt_fit_reason = cls.__check_obj_fits(obj) + if doesnt_fit_reason is None: + classes.update({obj.typename: obj}) + return classes + + def get_instance(self, **message_raw): + message_op = message_raw.get(OP_FIELD_NAME, None) + if message_op is None: + raise MissingNodeOp + cls = self.get_type(message_op) + msg = self.__msg_without_op_field(message_raw) + return cls(**msg) + + def get_type(self, message_op): + message_cls = self.__classes.get(message_op, None) + if message_cls is None: + raise InvalidNodeOp(message_op) + return message_cls + + @staticmethod + def __msg_without_op_field(msg): + return {k: v for k, v in msg.items() if k != OP_FIELD_NAME} + + def set_message_class(self, message_class): + doesnt_fit_reason = self.__check_obj_fits(message_class) + assert not doesnt_fit_reason, doesnt_fit_reason + self.__classes.update({message_class.typename: message_class}) + + @staticmethod + def __get_module_by_name(module_name): + return sys.modules.get(module_name, None) + + @staticmethod + def __check_obj_fits(obj): + if not getattr(obj, "schema", None): + return "must have a non empty 'schema'" + if not getattr(obj, "typename", None): + return "must have a non empty 'typename'" + # has to be the last because of: 'str' week ref error + if not issubclass(obj, MessageBase): + return "must be a subclass of 'MessageBase'" + + # TODO: it is a workaround which helps extend some fields from + # downstream projects, should be removed after we find a better way + # to do this + def update_schemas_by_field_type(self, old_field_type, new_field_type): + for cls in self.__classes.values(): + new_schema = [] + for name, field in cls.schema: + field = self._transform_field(field, old_field_type, new_field_type) + new_schema.append((name, field)) + cls.schema = tuple(new_schema) + + def _transform_field(self, field, old_field_type, new_field_type): + if isinstance(field, old_field_type): + return new_field_type() + elif self.__is_iterable_and_contains_type(field, old_field_type): + return IterableField(new_field_type()) + elif isinstance(field, MapField): + key = field.key_field + val = field.value_field + if isinstance(field.key_field, old_field_type): + key = new_field_type() + if isinstance(field.value_field, old_field_type): + val = new_field_type() + return MapField(key, val) + return field + + @staticmethod + def __is_iterable_and_contains_type(field, field_type): + return isinstance(field, IterableField) and \ + isinstance(field.inner_field_type, field_type) + + +class NodeMessageFactory(MessageFactory): + + def __init__(self): + super().__init__('plenum.common.messages.node_messages') + + +node_message_factory = NodeMessageFactory() diff --git a/plenum/common/messages/node_messages.py b/plenum/common/messages/node_messages.py new file mode 100644 index 0000000000..15867d13e1 --- /dev/null +++ b/plenum/common/messages/node_messages.py @@ -0,0 +1,334 @@ +from typing import TypeVar, NamedTuple + +from plenum.common.constants import * +from plenum.common.messages.fields import * +from plenum.common.messages.message_base import MessageBase +from plenum.common.types import f +from plenum.common.messages.client_request import ClientMessageValidator + + +class Nomination(MessageBase): + typename = NOMINATE + + schema = ( + (f.NAME.nm, NonEmptyStringField()), + (f.INST_ID.nm, NonNegativeNumberField()), + (f.VIEW_NO.nm, NonNegativeNumberField()), + (f.ORD_SEQ_NO.nm, NonNegativeNumberField()), + ) + + +class Batch(MessageBase): + + typename = BATCH + + schema = ( + (f.MSGS.nm, IterableField(SerializedValueField())), + (f.SIG.nm, SignatureField()), + ) + + +class Reelection(MessageBase): + typename = REELECTION + + schema = ( + (f.INST_ID.nm, NonNegativeNumberField()), + (f.ROUND.nm, NonNegativeNumberField()), + (f.TIE_AMONG.nm, IterableField(TieAmongField())), + (f.VIEW_NO.nm, NonNegativeNumberField()), + ) + + +class Primary(MessageBase): + typename = PRIMARY + + schema = ( + (f.NAME.nm, NonEmptyStringField()), + (f.INST_ID.nm, NonNegativeNumberField()), + (f.VIEW_NO.nm, NonNegativeNumberField()), + (f.ORD_SEQ_NO.nm, NonNegativeNumberField()), + ) + + +# TODO implement actual rules +class BlacklistMsg(MessageBase): + typename = BLACKLIST + schema = ( + (f.SUSP_CODE.nm, AnyValueField()), + (f.NODE_NAME.nm, AnyValueField()), + ) + + +# TODO implement actual rules +class RequestAck(MessageBase): + typename = REQACK + schema = ( + (f.IDENTIFIER.nm, AnyValueField()), + (f.REQ_ID.nm, AnyValueField()) + ) + + +# TODO implement actual rules +class RequestNack(MessageBase): + typename = REQNACK + schema = ( + (f.IDENTIFIER.nm, AnyValueField()), + (f.REQ_ID.nm, AnyValueField()), + (f.REASON.nm, AnyValueField()), + ) + + +# TODO implement actual rules +class Reject(MessageBase): + typename = REJECT + schema = ( + (f.IDENTIFIER.nm, AnyValueField()), + (f.REQ_ID.nm, AnyValueField()), + (f.REASON.nm, AnyValueField()), + ) + + +# TODO implement actual rules +class PoolLedgerTxns(MessageBase): + typename = POOL_LEDGER_TXNS + schema = ( + (f.TXN.nm, AnyValueField()), + ) + + +class Ordered(MessageBase): + typename = ORDERED + schema = ( + (f.INST_ID.nm, NonNegativeNumberField()), + (f.VIEW_NO.nm, NonNegativeNumberField()), + (f.REQ_IDR.nm, IterableField(RequestIdentifierField())), + (f.PP_SEQ_NO.nm, NonNegativeNumberField()), + (f.PP_TIME.nm, TimestampField()), + (f.LEDGER_ID.nm, LedgerIdField()), + (f.STATE_ROOT.nm, MerkleRootField(nullable=True)), + (f.TXN_ROOT.nm, MerkleRootField(nullable=True)), + ) + + +class Propagate(MessageBase): + typename = PROPAGATE + schema = ( + (f.REQUEST.nm, ClientMessageValidator(operation_schema_is_strict=True)), + (f.SENDER_CLIENT.nm, NonEmptyStringField(nullable=True)), + ) + + +class PrePrepare(MessageBase): + typename = PREPREPARE + schema = ( + (f.INST_ID.nm, NonNegativeNumberField()), + (f.VIEW_NO.nm, NonNegativeNumberField()), + (f.PP_SEQ_NO.nm, NonNegativeNumberField()), + (f.PP_TIME.nm, TimestampField()), + (f.REQ_IDR.nm, IterableField(RequestIdentifierField())), + (f.DISCARDED.nm, NonNegativeNumberField()), + (f.DIGEST.nm, NonEmptyStringField()), + (f.LEDGER_ID.nm, LedgerIdField()), + (f.STATE_ROOT.nm, MerkleRootField(nullable=True)), + (f.TXN_ROOT.nm, MerkleRootField(nullable=True)), + ) + + +class Prepare(MessageBase): + typename = PREPARE + schema = ( + (f.INST_ID.nm, NonNegativeNumberField()), + (f.VIEW_NO.nm, NonNegativeNumberField()), + (f.PP_SEQ_NO.nm, NonNegativeNumberField()), + (f.PP_TIME.nm, TimestampField()), + (f.DIGEST.nm, NonEmptyStringField()), + (f.STATE_ROOT.nm, MerkleRootField(nullable=True)), + (f.TXN_ROOT.nm, MerkleRootField(nullable=True)), + ) + + +class Commit(MessageBase): + typename = COMMIT + schema = ( + (f.INST_ID.nm, NonNegativeNumberField()), + (f.VIEW_NO.nm, NonNegativeNumberField()), + (f.PP_SEQ_NO.nm, NonNegativeNumberField()), + ) + + +class Checkpoint(MessageBase): + typename = CHECKPOINT + schema = ( + (f.INST_ID.nm, NonNegativeNumberField()), + (f.VIEW_NO.nm, NonNegativeNumberField()), + (f.SEQ_NO_START.nm, NonNegativeNumberField()), + (f.SEQ_NO_END.nm, NonNegativeNumberField()), + (f.DIGEST.nm, NonEmptyStringField()), + ) + + +class ThreePCState(MessageBase): + typename = THREE_PC_STATE + schema = ( + (f.INST_ID.nm, NonNegativeNumberField()), + (f.MSGS.nm, IterableField(ClientMessageValidator(operation_schema_is_strict=True))), + ) + + +# TODO implement actual rules +class CheckpointState(MessageBase): + typename = CHECKPOINT_STATE + schema = ( + (f.SEQ_NO.nm, AnyValueField()), + (f.DIGESTS.nm, AnyValueField()), + (f.DIGEST.nm, AnyValueField()), + (f.RECEIVED_DIGESTS.nm, AnyValueField()), + (f.IS_STABLE.nm, AnyValueField()) + ) + + +# TODO implement actual rules +class Reply(MessageBase): + typename = REPLY + schema = ( + (f.RESULT.nm, AnyValueField()), + ) + + +class InstanceChange(MessageBase): + typename = INSTANCE_CHANGE + schema = ( + (f.VIEW_NO.nm, NonNegativeNumberField()), + (f.REASON.nm, NonNegativeNumberField()) + ) + + +class LedgerStatus(MessageBase): + """ + Purpose: spread status of ledger copy on a specific node. + When node receives this message and see that it has different + status of ledger it should reply with LedgerStatus that contains its + status + """ + typename = LEDGER_STATUS + schema = ( + (f.LEDGER_ID.nm, LedgerIdField()), + (f.TXN_SEQ_NO.nm, NonNegativeNumberField()), + (f.VIEW_NO.nm, NonNegativeNumberField(nullable=True)), + (f.PP_SEQ_NO.nm, NonNegativeNumberField(nullable=True)), + (f.MERKLE_ROOT.nm, MerkleRootField()), + ) + + +class ConsistencyProof(MessageBase): + typename = CONSISTENCY_PROOF + schema = ( + (f.LEDGER_ID.nm, LedgerIdField()), + (f.SEQ_NO_START.nm, NonNegativeNumberField()), + (f.SEQ_NO_END.nm, NonNegativeNumberField()), + (f.VIEW_NO.nm, NonNegativeNumberField()), + (f.PP_SEQ_NO.nm, NonNegativeNumberField()), + (f.OLD_MERKLE_ROOT.nm, MerkleRootField()), + (f.NEW_MERKLE_ROOT.nm, MerkleRootField()), + (f.HASHES.nm, IterableField(NonEmptyStringField())), + ) + + +class CatchupReq(MessageBase): + typename = CATCHUP_REQ + schema = ( + (f.LEDGER_ID.nm, LedgerIdField()), + (f.SEQ_NO_START.nm, NonNegativeNumberField()), + (f.SEQ_NO_END.nm, NonNegativeNumberField()), + (f.CATCHUP_TILL.nm, NonNegativeNumberField()), + ) + + +class CatchupRep(MessageBase): + typename = CATCHUP_REP + schema = ( + (f.LEDGER_ID.nm, LedgerIdField()), + # TODO: turn on validation, the cause is INDY-388 + # (f.TXNS.nm, MapField(key_field=StringifiedNonNegativeNumberField(), + # value_field=ClientMessageValidator(operation_schema_is_strict=False))), + (f.TXNS.nm, AnyValueField()), + (f.CONS_PROOF.nm, IterableField(Base58Field(byte_lengths=(32,)))), + ) + + +class ViewChangeDone(MessageBase): + """ + Node sends this kind of message when view change steps done and it is + ready to switch to the new primary. + In contrast to 'Primary' message this one does not imply election. + """ + typename = VIEW_CHANGE_DONE + + schema = ( + # name is nullable because this message can be sent when + # there were no view changes and instance has no primary yet + (f.VIEW_NO.nm, NonNegativeNumberField()), + (f.NAME.nm, NonEmptyStringField(nullable=True)), + (f.LEDGER_INFO.nm, IterableField(LedgerInfoField())) + ) + + +class CurrentState(MessageBase): + """ + Node sends this kind of message for nodes which + suddenly reconnected (lagged). It contains information about current + pool state, like view no, primary etc. + """ + typename = CURRENT_STATE + + schema = ( + (f.VIEW_NO.nm, NonNegativeNumberField()), + (f.PRIMARY.nm, IterableField(AnyField())), # ViewChangeDone + ) + + +""" +The choice to do a generic 'request message' feature instead of a specific +one was debated. It has some pros and some cons. We wrote up the analysis in +http://bit.ly/2uxf6Se. This decision can and should be revisited if we feel a +lot of ongoing dissonance about it. Lovesh, Alex, and Daniel, July 2017 +""" +class MessageReq(MessageBase): + """ + Purpose: ask node for any message + """ + allowed_types = {LEDGER_STATUS, CONSISTENCY_PROOF, PREPREPARE, + PROPAGATE} + typename = MESSAGE_REQUEST + schema = ( + (f.MSG_TYPE.nm, ChooseField(values=allowed_types)), + (f.PARAMS.nm, AnyMapField()) + ) + + +class MessageRep(MessageBase): + """ + Purpose: respond to a node for any requested message + """ + # TODO: support a setter for `msg` to create an instance of a type according to `msg_type` + typename = MESSAGE_RESPONSE + schema = ( + (f.MSG_TYPE.nm, ChooseField(values=MessageReq.allowed_types)), + (f.PARAMS.nm, AnyMapField()), + (f.MSG.nm, AnyField()) + ) + + +ThreePhaseType = (PrePrepare, Prepare, Commit) +ThreePhaseMsg = TypeVar("3PhaseMsg", *ThreePhaseType) + + +ElectionType = (Nomination, Primary, Reelection) +ElectionMsg = TypeVar("ElectionMsg", *ElectionType) + +ThreePhaseKey = NamedTuple("ThreePhaseKey", [ + f.VIEW_NO, + f.PP_SEQ_NO + ]) + + diff --git a/plenum/common/perf_util.py b/plenum/common/perf_util.py index 45a644f434..add91acfc7 100644 --- a/plenum/common/perf_util.py +++ b/plenum/common/perf_util.py @@ -1,7 +1,10 @@ +from collections import abc +from collections import deque from functools import wraps import sys import time +from typing import Optional, Tuple def get_size(obj, seen=None): @@ -38,3 +41,33 @@ def timed(*args, **kw): return result return timed + + +def get_collection_sizes(obj, collections: Optional[Tuple]=None, + get_only_non_empty=False): + """ + Iterates over `collections` of the gives object and gives its byte size + and number of items in collection + """ + from pympler import asizeof + collections = collections or (list, dict, set, deque, abc.Sized) + if not isinstance(collections, tuple): + collections = tuple(collections) + + result = [] + for attr_name in dir(obj): + attr = getattr(obj, attr_name) + if isinstance(attr, collections) and (not get_only_non_empty or len(attr) > 0): + result.append((attr_name, len(attr), asizeof.asizeof(attr, detail=1))) + return result + + +def get_memory_usage(obj, get_collections_memory_usage=False, + get_only_non_empty=False): + result = [] + from pympler import asizeof + result.append(asizeof.asizeof(obj)) + if get_collections_memory_usage: + result.append(get_collection_sizes(obj, + get_only_non_empty=get_only_non_empty)) + return result diff --git a/plenum/common/request.py b/plenum/common/request.py index a11708d85c..9fce3b311b 100644 --- a/plenum/common/request.py +++ b/plenum/common/request.py @@ -4,8 +4,9 @@ from stp_core.types import Identifier from plenum.common.signing import serializeMsg -from plenum.common.constants import REQDIGEST, REQKEY -from plenum.common.types import f, OPERATION, ClientMessageValidator +from plenum.common.constants import REQDIGEST, REQKEY, FORCE +from plenum.common.types import f, OPERATION +from plenum.common.messages.client_request import ClientMessageValidator class Request: @@ -70,6 +71,13 @@ def fromState(cls, state): def serialized(self): return serializeMsg(self.__getstate__()) + def isForced(self): + force = self.operation.get(FORCE) + return str(force) == 'True' + + def __hash__(self): + return hash(self.serialized()) + class ReqDigest(NamedTuple(REQDIGEST, [f.IDENTIFIER, f.REQ_ID, diff --git a/plenum/common/signer_did.py b/plenum/common/signer_did.py index 1a9cc461ab..fb9b887dba 100644 --- a/plenum/common/signer_did.py +++ b/plenum/common/signer_did.py @@ -16,11 +16,26 @@ class DidIdentity: abbr_prfx = '~' def __init__(self, identifier, verkey=None, rawVerkey=None): + self.abbreviated = None + if verkey is None and rawVerkey is None: + if identifier: + self._identifier = identifier + self._verkey = None + return + assert (verkey or rawVerkey) and not (verkey and rawVerkey) if identifier: self._identifier = identifier - self._verkey = verkey or rawToFriendly(rawVerkey) - self.abbreviated = False + if rawVerkey: + self._verkey = rawToFriendly(rawVerkey) + self.abbreviated = False + else: + if verkey.startswith("~"): + self._verkey = verkey[1:] + self.abbreviated = True + else: + self._verkey = verkey + self.abbreviated = False else: verraw = rawVerkey or friendlyToRaw(verkey) self._identifier = rawToFriendly(verraw[:16]) @@ -33,11 +48,23 @@ def identifier(self) -> Identifier: @property def verkey(self) -> str: + if self._verkey is None: + return None + if self.abbreviated: return self.abbr_prfx + self._verkey else: return self._verkey + @property + def full_verkey(self): + if self.abbreviated: + rtn = friendlyToRaw(self.identifier) + rtn += friendlyToRaw(self.verkey[1:]) + return rawToFriendly(rtn) + else: + return self.verkey + class DidSigner(DidIdentity, Signer): """ diff --git a/plenum/common/signer_simple.py b/plenum/common/signer_simple.py index 0a58752d42..c9b7ca240f 100644 --- a/plenum/common/signer_simple.py +++ b/plenum/common/signer_simple.py @@ -42,8 +42,8 @@ def __init__(self, identifier=None, seed=None, alias=None): # this is the public key used to verify signatures (securely shared # before-hand with recipient) - - self.verkey = hexToFriendly(hexlify(self.naclSigner.verraw)) + hex_verkey = hexlify(self.naclSigner.verraw) + self.verkey = hexToFriendly(hex_verkey) self._identifier = identifier or self.verkey diff --git a/plenum/common/stack_manager.py b/plenum/common/stack_manager.py index 499fc91a20..dbd7be82eb 100644 --- a/plenum/common/stack_manager.py +++ b/plenum/common/stack_manager.py @@ -2,6 +2,7 @@ import shutil from abc import abstractmethod from collections import OrderedDict +from typing import List from plenum.common.keygen_utils import initRemoteKeys from plenum.common.signer_did import DidIdentity @@ -75,7 +76,7 @@ def parseLedgerForHaAndKeys(ledger, returnActive=True): cliNodeReg = OrderedDict() nodeKeys = {} activeValidators = set() - for _, txn in ledger.getAllTxn().items(): + for _, txn in ledger.getAllTxn(): if txn[TXN_TYPE] == NODE: nodeName = txn[DATA][ALIAS] clientStackName = nodeName + CLIENT_STACK_SUFFIX @@ -89,8 +90,13 @@ def parseLedgerForHaAndKeys(ledger, returnActive=True): nodeReg[nodeName] = HA(*nHa) if cHa: cliNodeReg[clientStackName] = HA(*cHa) - # TODO: Need to handle abbreviated verkey - verkey = cryptonymToHex(txn[TARGET_NYM]) + + try: + # TODO: Need to handle abbreviated verkey + verkey = cryptonymToHex(txn[TARGET_NYM]) + except ValueError as ex: + raise ValueError("Invalid verkey. Rebuild pool transactions.") + nodeKeys[nodeName] = verkey services = txn[DATA].get(SERVICES) @@ -172,13 +178,9 @@ def stackKeysChanged(self, txn, remoteName, nodeOrClientObj): else: verkey = cryptonymToHex(txn[VERKEY]) - try: - # Override any keys found - initRemoteKeys(self.name, remoteName, self.basedirpath, - verkey, override=True) - except Exception as ex: - logger.error("Exception while initializing keep for remote {}". - format(ex)) + # Override any keys found + initRemoteKeys(self.name, remoteName, self.basedirpath, + verkey, override=True) # Attempt connection with the new keys nodeOrClientObj.nodestack.maintainConnections(force=True) @@ -216,15 +218,18 @@ def addRemoteKeysFromLedger(self, keys): format(ex)) def nodeExistsInLedger(self, nym): - for txn in self.ledger.getAllTxn().values(): + # Since PoolLedger is going to be small so using + # `getAllTxn` is fine + for _, txn in self.ledger.getAllTxn(): if txn[TXN_TYPE] == NODE and \ txn[TARGET_NYM] == nym: return True return False + # TODO: Consider removing `nodeIds` and using `node_ids_in_order` @property def nodeIds(self) -> set: - return {txn[TARGET_NYM] for txn in self.ledger.getAllTxn().values()} + return {txn[TARGET_NYM] for _, txn in self.ledger.getAllTxn()} def getNodeInfoFromLedger(self, nym, excludeLast=True): # Returns the info of the node from the ledger with transaction @@ -233,7 +238,7 @@ def getNodeInfoFromLedger(self, nym, excludeLast=True): # it is used after update to the ledger has already been made txns = [] nodeTxnSeqNos = [] - for seqNo, txn in self.ledger.getAllTxn().items(): + for seqNo, txn in self.ledger.getAllTxn(): if txn[TXN_TYPE] == NODE and txn[TARGET_NYM] == nym: txns.append(txn) nodeTxnSeqNos.append(seqNo) diff --git a/plenum/common/stacks.py b/plenum/common/stacks.py index 76fea65e2a..f0b6f591b4 100644 --- a/plenum/common/stacks.py +++ b/plenum/common/stacks.py @@ -2,18 +2,20 @@ from plenum import config from plenum.common.batched import Batched, logger +from plenum.common.config_util import getConfig from plenum.common.message_processor import MessageProcessor from stp_raet.rstack import SimpleRStack, KITRStack from stp_core.types import HA -from stp_zmq.zstack import SimpleZStack, KITZStack +from stp_zmq.kit_zstack import KITZStack +from stp_zmq.simple_zstack import SimpleZStack class ClientZStack(SimpleZStack, MessageProcessor): - def __init__(self, stackParams: dict, msgHandler: Callable, seed=None): + def __init__(self, stackParams: dict, msgHandler: Callable, seed=None, + config=None): + config = config or getConfig() SimpleZStack.__init__(self, stackParams, msgHandler, seed=seed, - onlyListener=True, - listenerQuota=config.LISTENER_MESSAGE_QUOTA, - remoteQuota=config.REMOTES_MESSAGE_QUOTA) + onlyListener=True, config=config) MessageProcessor.__init__(self, allowDictOnly=False) self.connectedClients = set() @@ -55,12 +57,12 @@ def transmitToClients(self, msg: Any, remoteNames: List[str]): class NodeZStack(Batched, KITZStack): def __init__(self, stackParams: dict, msgHandler: Callable, - registry: Dict[str, HA], seed=None, sighex: str=None): + registry: Dict[str, HA], seed=None, sighex: str=None, + config=None): + config = config or getConfig() Batched.__init__(self) KITZStack.__init__(self, stackParams, msgHandler, registry=registry, - seed=seed, sighex=sighex, - listenerQuota=config.LISTENER_MESSAGE_QUOTA, - remoteQuota=config.REMOTES_MESSAGE_QUOTA) + seed=seed, sighex=sighex, config=config) MessageProcessor.__init__(self, allowDictOnly=False) # TODO: Reconsider defaulting `reSetupAuth` to True. diff --git a/plenum/common/startable.py b/plenum/common/startable.py index 912cc7077f..44856f7071 100644 --- a/plenum/common/startable.py +++ b/plenum/common/startable.py @@ -48,8 +48,18 @@ class Mode(IntEnum): """ Mode a node can be in """ - starting = 1 - discovering = 2 # catching up on pool txn ledger - discovered = 3 # caught up with pool txn ledger - syncing = 4 # catching up on domain txn ledger - participating = 5 # caught up with domain txn ledger + # TODO: This assumes Pool ledger is the first ledger and Domain ledger + starting = 100 + discovering = 200 # catching up on pool txn ledger + discovered = 300 # caught up with pool txn ledger + syncing = 400 # catching up on domain txn ledger + synced = 410 # caught up with domain txn ledger + participating = 500 # caught up completely and chosen primary + + @classmethod + def is_done_discovering(cls, mode): + return mode >= cls.discovered + + @classmethod + def is_done_syncing(cls, mode): + return mode >= cls.synced diff --git a/plenum/common/test_network_setup.py b/plenum/common/test_network_setup.py index 438748db73..a8b88fb01b 100644 --- a/plenum/common/test_network_setup.py +++ b/plenum/common/test_network_setup.py @@ -14,7 +14,9 @@ from plenum.common.keygen_utils import initLocalKeys from plenum.common.constants import STEWARD, CLIENT_STACK_SUFFIX, TRUSTEE -from plenum.common.util import hexToFriendly, adict +from plenum.common.util import hexToFriendly +from plenum.common.signer_did import DidSigner +from stp_core.common.util import adict class TestNetworkSetup: @@ -70,11 +72,11 @@ def bootstrapTestNodesCore(cls, config, envName, appendToLedgers, domainLedger = cls.init_domain_ledger(appendToLedgers, baseDir, config, envName, domainTxnFieldOrder) - trustee_txn = Member.nym_txn(trustee_def.nym, trustee_def.name, role=TRUSTEE) + trustee_txn = Member.nym_txn(trustee_def.nym, trustee_def.name, verkey=trustee_def.verkey, role=TRUSTEE) domainLedger.add(trustee_txn) for sd in steward_defs: - nym_txn = Member.nym_txn(sd.nym, sd.name, role=STEWARD, + nym_txn = Member.nym_txn(sd.nym, sd.name, verkey=sd.verkey, role=STEWARD, creator=trustee_def.nym) domainLedger.add(nym_txn) @@ -107,7 +109,7 @@ def bootstrapTestNodesCore(cls, config, envName, appendToLedgers, poolLedger.add(node_txn) for cd in client_defs: - txn = Member.nym_txn(cd.nym, cd.name, creator=trustee_def.nym) + txn = Member.nym_txn(cd.nym, cd.name, verkey=cd.verkey, creator=trustee_def.nym) domainLedger.add(txn) poolLedger.stop() @@ -257,9 +259,10 @@ def gen_defs(cls, ips, nodeCount, starting_port): for i in range(1, nodeCount + 1): d = adict() d.name = "Steward" + str(i) - s_sigseed = cls.getSigningSeed(d.name) - s_verkey = Signer(s_sigseed).verhex - d.nym = cls.getNymFromVerkey(s_verkey) + d.sigseed = cls.getSigningSeed(d.name) + s_signer = DidSigner(seed=d.sigseed) + d.nym = s_signer.identifier + d.verkey = s_signer.verkey steward_defs.append(d) name = "Node" + str(i) @@ -280,8 +283,9 @@ def gen_client_def(cls, idx): d = adict() d.name = "Client" + str(idx) d.sigseed = cls.getSigningSeed(d.name) - d.verkey = Signer(d.sigseed).verhex - d.nym = cls.getNymFromVerkey(d.verkey) + c_signer = DidSigner(seed=d.sigseed) + d.nym = c_signer.identifier + d.verkey = c_signer.verkey return d @classmethod @@ -293,8 +297,9 @@ def gen_trustee_def(cls, idx): d = adict() d.name = 'Trustee' + str(idx) d.sigseed = cls.getSigningSeed(d.name) - d.verkey = Signer(d.sigseed).verhex - d.nym = cls.getNymFromVerkey(d.verkey) + t_signer = DidSigner(seed=d.sigseed) + d.nym = t_signer.identifier + d.verkey = t_signer.verkey return d diff --git a/plenum/common/transaction_store.py b/plenum/common/transaction_store.py index 19e715998b..42fd4f88f2 100644 --- a/plenum/common/transaction_store.py +++ b/plenum/common/transaction_store.py @@ -4,7 +4,8 @@ from typing import Optional from plenum.common.constants import TXN_ID -from plenum.common.types import Reply, f +from plenum.common.types import f +from plenum.common.messages.node_messages import Reply from stp_core.common.log import getlogger from plenum.persistence.storage import Storage diff --git a/plenum/common/transactions.py b/plenum/common/transactions.py index 3b08b680f2..bdd521914b 100644 --- a/plenum/common/transactions.py +++ b/plenum/common/transactions.py @@ -8,6 +8,7 @@ class PlenumTransactions(Enum): # Also the numeric constants CANNOT collide with transactions in dependent components. NODE = "0" NYM = "1" + GET_TXN = "3" def __str__(self): return self.name diff --git a/plenum/common/txn_util.py b/plenum/common/txn_util.py index 8f1e12138d..6d940bdd7d 100644 --- a/plenum/common/txn_util.py +++ b/plenum/common/txn_util.py @@ -18,7 +18,7 @@ def getTxnOrderedFields(): (f.IDENTIFIER.nm, (str, str)), (f.REQ_ID.nm, (str, int)), (f.SIG.nm, (str, str)), - (TXN_TIME, (str, float)), + (TXN_TIME, (str, int)), (TXN_TYPE, (str, str)), (TARGET_NYM, (str, str)), (VERKEY, (str, str)), @@ -50,18 +50,50 @@ def createGenesisTxnFile(genesisTxns, targetDir, fileName, fieldOrdering, ledger.stop() -def reqToTxn(req: Request): +def reqToTxn(req: Request, cons_time=None): """ Transform a client request such that it can be stored in the ledger. Also this is what will be returned to the client in the reply :param req: + :param cons_time: UTC epoch at which consensus was reached :return: """ - data = req.signingState + # TODO: we should not reformat transaction this way + # When refactor keep in mind thought about back compatibility + + # data = req.signingState + # res = { + # f.IDENTIFIER.nm: req.identifier, + # f.REQ_ID.nm: req.reqId, + # f.SIG.nm: req.signature + # } + # res.update(data[OPERATION]) + # return res + + if isinstance(req, dict): + if TXN_TYPE in req: + return req + data = req + else : + data = req.as_dict + res = { - f.IDENTIFIER.nm: req.identifier, - f.REQ_ID.nm: req.reqId, - f.SIG.nm: req.signature + f.IDENTIFIER.nm: data[f.IDENTIFIER.nm], + f.REQ_ID.nm: data[f.REQ_ID.nm], + f.SIG.nm: data[f.SIG.nm], + TXN_TIME: cons_time or data.get(TXN_TIME) } res.update(data[OPERATION]) return res + + +def txnToReq(txn): + """ + Transforms transactions to request form (not to Request) + """ + txn = txn.copy() + request = {} + for field_name in [f.IDENTIFIER.nm, f.REQ_ID.nm, f.SIG.nm]: + request[field_name] = txn.pop(field_name, None) + request[OPERATION] = txn + return request diff --git a/plenum/common/types.py b/plenum/common/types.py index 3137eb50e1..8aa0fe4136 100644 --- a/plenum/common/types.py +++ b/plenum/common/types.py @@ -1,18 +1,7 @@ -from typing import NamedTuple, Any, List, Mapping, Optional, TypeVar, Dict, \ - Tuple - -import sys from collections import namedtuple +from typing import NamedTuple, Any, List, Mapping, Optional, Dict, \ + Tuple -from plenum.common.constants import NOMINATE, PRIMARY, REELECTION, REQACK, \ - ORDERED, PROPAGATE, PREPREPARE, REPLY, COMMIT, PREPARE, BATCH, \ - INSTANCE_CHANGE, BLACKLIST, REQNACK, LEDGER_STATUS, CONSISTENCY_PROOF, \ - CATCHUP_REQ, CATCHUP_REP, POOL_LEDGER_TXNS, CONS_PROOF_REQUEST, CHECKPOINT, \ - CHECKPOINT_STATE, THREE_PC_STATE, REJECT, OP_FIELD_NAME, POOL_LEDGER_ID, DOMAIN_LEDGER_ID -from plenum.common.messages.client_request import ClientOperationField -from plenum.common.messages.fields import * -from plenum.common.messages.fields import IdentifierField, NonNegativeNumberField, SignatureField -from plenum.common.messages.message_base import MessageBase, MessageValidator from stp_core.types import HA NodeDetail = NamedTuple("NodeDetail", [ @@ -42,6 +31,7 @@ class f: # provides a namespace for reusable field constants SENDER_NODE = Field('senderNode', str) REQ_ID = Field('reqId', int) VIEW_NO = Field('viewNo', int) + LEDGER_INFO = Field("ledgerInfo", List[tuple]) INST_ID = Field('instId', int) IS_STABLE = Field('isStable', bool) MSGS = Field('messages', List[Mapping]) @@ -69,402 +59,15 @@ class f: # provides a namespace for reusable field constants TXNS = Field("txns", List[Any]) TXN = Field("txn", Any) NODES = Field('nodes', Dict[str, HA]) - POOL_LEDGER_STATUS = Field("poolLedgerStatus", Any) - DOMAIN_LEDGER_STATUS = Field("domainLedgerStatus", Any) CONS_PROOF = Field("consProof", Any) - POOL_CONS_PROOF = Field("poolConsProof", Any) - DOMAIN_CONS_PROOF = Field("domainConsProof", Any) - POOL_CATCHUP_REQ = Field("poolCatchupReq", Any) - DOMAIN_CATCHUP_REQ = Field("domainCatchupReq", Any) - POOL_CATCHUP_REP = Field("poolCatchupRep", Any) - DOMAIN_CATCHUP_REP = Field("domainCatchupRep", Any) - + MSG_TYPE = Field("msg_type", str) + PARAMS = Field("params", dict) + PRIMARY = Field("primary", dict) -class TaggedTupleBase: - def melted(self): - """ - Return the tagged tuple in a dictionary form. - """ - if hasattr(self, "__dict__"): - m = self.__dict__ - elif hasattr(self, "_asdict"): - m = self._asdict() - else: - raise RuntimeError("Cannot convert argument to a dictionary") - m[OP_FIELD_NAME] = self.typename - m.move_to_end(OP_FIELD_NAME, False) - return m - - -# noinspection PyProtectedMember -def TaggedTuple(typename, fields) -> NamedTuple: - cls = NamedTuple(typename, fields) - if OP_FIELD_NAME in cls._fields: - raise RuntimeError("field name '{}' is reserved in TaggedTuple" - .format(OP_FIELD_NAME)) - cls.__bases__ += (TaggedTupleBase,) - cls.typename = typename - return cls OPERATION = 'operation' -class ClientMessageValidator(MessageValidator): - schema = ( - (f.IDENTIFIER.nm, IdentifierField()), - (f.REQ_ID.nm, NonNegativeNumberField()), - (OPERATION, ClientOperationField()), - (f.SIG.nm, SignatureField(optional=True)), - (f.DIGEST.nm, NonEmptyStringField(optional=True)), - ) - - -class Nomination(MessageBase): - typename = NOMINATE - - schema = ( - (f.NAME.nm, NonEmptyStringField()), - (f.INST_ID.nm, NonNegativeNumberField()), - (f.VIEW_NO.nm, NonNegativeNumberField()), - (f.ORD_SEQ_NO.nm, NonNegativeNumberField()), - ) -# Nomination = TaggedTuple(NOMINATE, [ -# f.NAME, -# f.INST_ID, -# f.VIEW_NO, -# f.ORD_SEQ_NO]) - - -Batch = TaggedTuple(BATCH, [ - f.MSGS, - f.SIG]) - -# Reelection messages that nodes send when they find the 2 or more nodes have -# equal nominations for primary. `round` indicates the reelection round -# number. So the first reelection would have round number 1, the one after -# that would have round number 2. If a node receives a reelection message with -# a round number that is not 1 greater than the reelections rounds it has -# already seen then it rejects that message - - -class Reelection(MessageBase): - typename = REELECTION - - schema = ( - (f.INST_ID.nm, NonNegativeNumberField()), - (f.ROUND.nm, NonNegativeNumberField()), - (f.TIE_AMONG.nm, IterableField(TieAmongField())), - (f.VIEW_NO.nm, NonNegativeNumberField()), - ) -# Reelection = TaggedTuple(REELECTION, [ -# f.INST_ID, -# f.ROUND, -# f.TIE_AMONG, -# f.VIEW_NO]) - -# Declaration of a winner - -class Primary(MessageBase): - typename = PRIMARY - - schema = ( - (f.NAME.nm, NonEmptyStringField()), - (f.INST_ID.nm, NonNegativeNumberField()), - (f.VIEW_NO.nm, NonNegativeNumberField()), - (f.ORD_SEQ_NO.nm, NonNegativeNumberField()), - ) -# Primary = TaggedTuple(PRIMARY, [ -# f.NAME, -# f.INST_ID, -# f.VIEW_NO, -# f.ORD_SEQ_NO]) - -BlacklistMsg = NamedTuple(BLACKLIST, [ - f.SUSP_CODE, - f.NODE_NAME]) - -RequestAck = TaggedTuple(REQACK, [ - f.IDENTIFIER, - f.REQ_ID]) - -RequestNack = TaggedTuple(REQNACK, [ - f.IDENTIFIER, - f.REQ_ID, - f.REASON]) - -Reject = TaggedTuple(REJECT, [ - f.IDENTIFIER, - f.REQ_ID, - f.REASON]) - -PoolLedgerTxns = TaggedTuple(POOL_LEDGER_TXNS, [ - f.TXN -]) - - -class Ordered(MessageBase): - typename = ORDERED - schema = ( - (f.INST_ID.nm, NonNegativeNumberField()), - (f.VIEW_NO.nm, NonNegativeNumberField()), - (f.REQ_IDR.nm, IterableField(RequestIdentifierField())), - (f.PP_SEQ_NO.nm, NonNegativeNumberField()), - (f.PP_TIME.nm, TimestampField()), - (f.LEDGER_ID.nm, LedgerIdField()), - (f.STATE_ROOT.nm, HexField(length=64, nullable=True)), - (f.TXN_ROOT.nm, HexField(length=64, nullable=True)), - ) -# Ordered = NamedTuple(ORDERED, [ -# f.INST_ID, -# f.VIEW_NO, -# f.REQ_IDR, -# f.PP_SEQ_NO, -# f.PP_TIME, -# f.LEDGER_ID, -# f.STATE_ROOT, -# f.TXN_ROOT, -# ]) - -# σc, i>~μi -# s = client sequence number (comes from Aardvark paper) - -class Propagate(MessageBase): - typename = PROPAGATE - schema = ( - (f.REQUEST.nm, ClientMessageValidator()), - (f.SENDER_CLIENT.nm, NonEmptyStringField()), - ) -# Propagate = TaggedTuple(PROPAGATE, [ -# f.REQUEST, -# f.SENDER_CLIENT]) - - -class PrePrepare(MessageBase): - typename = PREPREPARE - schema = ( - (f.INST_ID.nm, NonNegativeNumberField()), - (f.VIEW_NO.nm, NonNegativeNumberField()), - (f.PP_SEQ_NO.nm, NonNegativeNumberField()), - (f.PP_TIME.nm, TimestampField()), - (f.REQ_IDR.nm, IterableField(RequestIdentifierField())), - (f.DISCARDED.nm, NonNegativeNumberField()), - (f.DIGEST.nm, NonEmptyStringField()), - (f.LEDGER_ID.nm, LedgerIdField()), - (f.STATE_ROOT.nm, HexField(length=64, nullable=True)), - (f.TXN_ROOT.nm, HexField(length=64, nullable=True)), - ) -# PrePrepare = TaggedTuple(PREPREPARE, [ -# f.INST_ID, -# f.VIEW_NO, -# f.PP_SEQ_NO, -# f.PP_TIME, -# f.REQ_IDR, -# f.DISCARDED, -# f.DIGEST, -# f.LEDGER_ID, -# f.STATE_ROOT, -# f.TXN_ROOT, -# ]) - - -class Prepare(MessageBase): - typename = PREPARE - schema = ( - (f.INST_ID.nm, NonNegativeNumberField()), - (f.VIEW_NO.nm, NonNegativeNumberField()), - (f.PP_SEQ_NO.nm, NonNegativeNumberField()), - (f.DIGEST.nm, NonEmptyStringField()), - (f.STATE_ROOT.nm, HexField(length=64, nullable=True)), - (f.TXN_ROOT.nm, HexField(length=64, nullable=True)), - ) -# Prepare = TaggedTuple(PREPARE, [ -# f.INST_ID, -# f.VIEW_NO, -# f.PP_SEQ_NO, -# f.DIGEST, -# f.STATE_ROOT, -# f.TXN_ROOT, -# ]) - - -class Commit(MessageBase): - typename = COMMIT - schema = ( - (f.INST_ID.nm, NonNegativeNumberField()), - (f.VIEW_NO.nm, NonNegativeNumberField()), - (f.PP_SEQ_NO.nm, NonNegativeNumberField()), - ) -# Commit = TaggedTuple(COMMIT, [ -# f.INST_ID, -# f.VIEW_NO, -# f.PP_SEQ_NO -# ]) - -# class Checkpoint(MessageBase): -# typename = CHECKPOINT -# schema = ( -# (f.INST_ID.nm, NonNegativeNumberField()), -# (f.VIEW_NO.nm, NonNegativeNumberField()), -# (f.SEQ_NO_START.nm, NonNegativeNumberField()), -# (f.SEQ_NO_END.nm, NonNegativeNumberField()), -# (f.DIGEST.nm, NonEmptyStringField()), -# ) -Checkpoint = TaggedTuple(CHECKPOINT, [ - f.INST_ID, - f.VIEW_NO, - f.SEQ_NO_START, - f.SEQ_NO_END, - f.DIGEST]) - - -CheckpointState = NamedTuple(CHECKPOINT_STATE, [ - f.SEQ_NO, # Current ppSeqNo in the checkpoint - f.DIGESTS, # Digest of all the requests in the checkpoint - f.DIGEST, # Final digest of the checkpoint, after all requests in its - # range have been ordered - f.RECEIVED_DIGESTS, - f.IS_STABLE - ]) - - -# class ThreePCState(MessageBase): -# typename = THREE_PC_STATE -# schema = ( -# (f.INST_ID.nm, NonNegativeNumberField()), -# (f.MSGS.nm, IterableField(ClientMessageValidator())), -# ) -ThreePCState = TaggedTuple(THREE_PC_STATE, [ - f.INST_ID, - f.MSGS]) - -Reply = TaggedTuple(REPLY, [f.RESULT]) - - -class InstanceChange(MessageBase): - typename = INSTANCE_CHANGE - schema = ( - (f.VIEW_NO.nm, NonNegativeNumberField()), - (f.REASON.nm, NonNegativeNumberField()) - ) -# InstanceChange = TaggedTuple(INSTANCE_CHANGE, [ -# f.VIEW_NO, -# f.REASON -# ]) - - -class LedgerStatus(MessageBase): - typename = LEDGER_STATUS - schema = ( - (f.LEDGER_ID.nm, LedgerIdField()), - (f.TXN_SEQ_NO.nm, NonNegativeNumberField()), - (f.MERKLE_ROOT.nm, MerkleRootField()), - ) -# LedgerStatus = TaggedTuple(LEDGER_STATUS, [ -# f.LEDGER_ID, -# f.TXN_SEQ_NO, -# f.MERKLE_ROOT]) - - -class ConsistencyProof(MessageBase): - typename = CONSISTENCY_PROOF - schema = ( - (f.LEDGER_ID.nm, LedgerIdField()), - (f.SEQ_NO_START.nm, NonNegativeNumberField()), - (f.SEQ_NO_END.nm, NonNegativeNumberField()), - (f.PP_SEQ_NO.nm, NonNegativeNumberField()), - (f.OLD_MERKLE_ROOT.nm, MerkleRootField()), - (f.NEW_MERKLE_ROOT.nm, MerkleRootField()), - (f.HASHES.nm, IterableField(NonEmptyStringField())), - ) -# ConsistencyProof = TaggedTuple(CONSISTENCY_PROOF, [ -# f.LEDGER_ID, -# f.SEQ_NO_START, -# f.SEQ_NO_END, -# f.PP_SEQ_NO, -# f.OLD_MERKLE_ROOT, -# f.NEW_MERKLE_ROOT, -# f.HASHES -# ]) - -# TODO: Catchup is not a good name, replace it with `sync` or something which -# is familiar - -# class CatchupReq(MessageBase): -# typename = CATCHUP_REQ -# schema = ( -# (f.LEDGER_ID.nm, LedgerIdField()), -# (f.SEQ_NO_START.nm, NonNegativeNumberField()), -# (f.SEQ_NO_END.nm, NonNegativeNumberField()), -# (f.CATCHUP_TILL.nm, NonNegativeNumberField()), -# ) -CatchupReq = TaggedTuple(CATCHUP_REQ, [ - f.LEDGER_ID, - f.SEQ_NO_START, - f.SEQ_NO_END, - f.CATCHUP_TILL -]) - - -# class CatchupRep(MessageBase): -# typename = CATCHUP_REQ -# schema = ( -# (f.LEDGER_ID.nm, LedgerIdField()), -# (f.TXNS.nm, IterableField(ClientMessageValidator())), -# (f.CONS_PROOF.nm, IterableField(HexField(length=64))), -# ) -CatchupRep = TaggedTuple(CATCHUP_REP, [ - f.LEDGER_ID, - f.TXNS, - f.CONS_PROOF -]) - -# class ConsProofRequest(MessageBase): -# typename = CONS_PROOF_REQUEST -# schema = ( -# (f.LEDGER_ID.nm, LedgerIdField()), -# (f.SEQ_NO_START.nm, NonNegativeNumberField()), -# (f.SEQ_NO_END.nm, NonNegativeNumberField()), -# ) -ConsProofRequest = TaggedTuple(CONS_PROOF_REQUEST, [ - f.LEDGER_ID, - f.SEQ_NO_START, - f.SEQ_NO_END -]) - - -TaggedTuples = None # type: Dict[str, class] - - -def loadRegistry(): - global TaggedTuples - if not TaggedTuples: - this = sys.modules[__name__] - TaggedTuples = {getattr(this, x).__name__: getattr(this, x) - for x in dir(this) if - callable(getattr(getattr(this, x), "melted", None)) - and getattr(getattr(this, x), "_fields", None)} - # attach MessageBase, for pre-testing procedure - # TODO: add MessageBase classes another way - TaggedTuples.update( - {getattr(this, x).typename: getattr(this, x) - for x in dir(this) - if getattr(getattr(this, x), "schema", None) and issubclass(getattr(this, x), MessageBase)} - ) - -loadRegistry() - -ThreePhaseType = (PrePrepare, Prepare, Commit) -ThreePhaseMsg = TypeVar("3PhaseMsg", *ThreePhaseType) - - -ElectionType = (Nomination, Primary, Reelection) -ElectionMsg = TypeVar("ElectionMsg", *ElectionType) - -ThreePhaseKey = NamedTuple("ThreePhaseKey", [ - f.VIEW_NO, - f.PP_SEQ_NO - ]) - PLUGIN_TYPE_VERIFICATION = "VERIFICATION" PLUGIN_TYPE_PROCESSING = "PROCESSING" PLUGIN_TYPE_STATS_CONSUMER = "STATS_CONSUMER" diff --git a/plenum/common/util.py b/plenum/common/util.py index 9c3f2714e5..1eb3e528e7 100644 --- a/plenum/common/util.py +++ b/plenum/common/util.py @@ -1,19 +1,20 @@ import asyncio import collections +import functools import glob import inspect import ipaddress import itertools import json import logging +import math import os import random -import string import time -import math from binascii import unhexlify, hexlify -from collections import Counter +from collections import Counter, defaultdict from collections import OrderedDict +from datetime import datetime from math import floor from os.path import basename from typing import TypeVar, Iterable, Mapping, Set, Sequence, Any, Dict, \ @@ -21,9 +22,8 @@ import base58 import libnacl.secret -from libnacl import randombytes, randombytes_uniform import psutil -from jsonpickle import encode, decode +from libnacl import randombytes, randombytes_uniform from six import iteritems, string_types from ledger.util import F @@ -31,9 +31,10 @@ from plenum.common.error import error from stp_core.crypto.util import isHexKey, isHex from stp_core.network.exceptions import \ - MissingEndpoint, \ InvalidEndpointIpAddress, InvalidEndpointPort -import functools + +# Do not remove the next import until imports in sovrin are fixed +from stp_core.common.util import adict T = TypeVar('T') @@ -42,12 +43,13 @@ def randomString(size: int = 20) -> str: """ - Generate a random string of the specified size, - DONOT use python provided random class its a Pseudo Random Number Generator + Generate a random string in hex of the specified size + + DO NOT use python provided random class its a Pseudo Random Number Generator and not secure enough for our needs :param size: size of the random string to generate - :return: the random string generated + :return: the hexadecimal random string """ def randomStr(size): @@ -176,33 +178,11 @@ def getMaxFailures(nodeCount: int) -> int: :return: maximum permissible Byzantine failures in the system """ if nodeCount >= 4: - return floor((nodeCount - 1) / 3) + return int(floor((nodeCount - 1) / 3)) else: return 0 -def get_strong_quorum(nodeCount: int = None, f: int = None) -> int: - r""" - Return the minimum number of nodes where the number of correct nodes is - greater than the number of faulty nodes. - Calculated as :math:`2*f + 1` - - :param nodeCount: the number of nodes in the system - :param f: the max. number of failures - """ - if nodeCount is not None: - f = getMaxFailures(nodeCount) - if f is not None: - return 2 * f + 1 - - -def get_weak_quorum(nodeCount: int = None, f: int = None) -> int: - if nodeCount is not None: - f = getMaxFailures(nodeCount) - if f is not None: - return f + 1 - - def getNoInstances(nodeCount: int) -> int: """ Return the number of protocol instances which is equal to f + 1. See @@ -240,37 +220,6 @@ def prime_gen() -> int: D[x] = p -class adict(dict): - """Dict with attr access to keys.""" - marker = object() - - def __init__(self, **kwargs): - super().__init__() - for key in kwargs: - self.__setitem__(key, kwargs[key]) - - def __setitem__(self, key, value): - if isinstance(value, dict) and not isinstance(value, adict): - value = adict(**value) - super(adict, self).__setitem__(key, value) - - def __getitem__(self, key): - found = self.get(key, adict.marker) - if found is adict.marker: - found = adict() - super(adict, self).__setitem__(key, found) - return found - - def copy(self): - return self.__copy__() - - def __copy__(self): - return adict(**self) - - __setattr__ = __setitem__ - __getattr__ = __getitem__ - - async def untilTrue(condition, *args, timeout=5) -> bool: """ Keep checking the condition till it is true or a timeout is reached @@ -346,10 +295,10 @@ def runWithLoop(loop, callback, *args, **kwargs): def checkIfMoreThanFSameItems(items, maxF): - jsonifiedItems = [json.dumps(item, sort_keys=True) for item in items] - counts = {} - for jItem in jsonifiedItems: - counts[jItem] = counts.get(jItem, 0) + 1 + jsonified_items = [json.dumps(item, sort_keys=True) for item in items] + counts = defaultdict(int) + for j_item in jsonified_items: + counts[j_item] += 1 if counts and counts[max(counts, key=counts.get)] > maxF: return json.loads(max(counts, key=counts.get)) else: @@ -475,6 +424,14 @@ def isMaxCheckTimeExpired(startTime, maxCheckForMillis): return startTimeRounded + maxCheckForMillis < curTimeRounded +def get_utc_epoch() -> int: + """ + Returns epoch in UTC + :return: + """ + return int(datetime.utcnow().timestamp()) + + def lxor(a, b): # Logical xor of 2 items, return true when one of them is truthy and # one of them falsy @@ -557,23 +514,8 @@ def getWalletFilePath(basedir, walletFileName): return os.path.join(basedir, walletFileName) -def saveGivenWallet(wallet, fileName, contextDir): - createDirIfNotExists(contextDir) - walletFilePath = getWalletFilePath( - contextDir, fileName) - with open(walletFilePath, "w+") as walletFile: - encodedWallet = encode(wallet, keys=True) - walletFile.write(encodedWallet) - return walletFilePath - - -def getWalletByPath(walletFilePath): - with open(walletFilePath) as walletFile: - wallet = decode(walletFile.read(), keys=True) - return wallet - - def getLastSavedWalletFileName(dir): + # TODO move that to WalletStorageHelper def getLastModifiedTime(file): return os.stat(file).st_mtime_ns @@ -590,3 +532,25 @@ def pop_keys(mapping: Dict, cond: Callable): rem.append(k) for i in rem: mapping.pop(i) + + +def check_if_all_equal_in_list(lst): + return lst.count(lst[0]) == len(lst) + + +def compare_3PC_keys(key1, key2) -> int: + """ + Return >0 if key2 is greater than key1, <0 if lesser, 0 otherwise + """ + if key1[0] == key2[0]: + return key2[1] - key1[1] + else: + return key2[0] - key1[0] + + +def min_3PC_key(keys) -> Tuple[int, int]: + return min(keys, key=lambda k: (k[0], k[1])) + + +def max_3PC_key(keys) -> Tuple[int, int]: + return max(keys, key=lambda k: (k[0], k[1])) diff --git a/plenum/config.py b/plenum/config.py index 33f40f21cd..2b3bf8e0d8 100644 --- a/plenum/config.py +++ b/plenum/config.py @@ -116,9 +116,6 @@ # Timeout factor after which a node starts requesting transactions CatchupTransactionsTimeout = 5 -# Timeout after which the view change is performed -ViewChangeTimeout = 10 - # Log configuration logRotationWhen = 'D' @@ -176,7 +173,7 @@ # After ordering every `CHK_FREQ` batches, replica sends a CHECKPOINT -CHK_FREQ = 10000 +CHK_FREQ = 100 # Difference between low water mark and high water mark LOG_SIZE = 3*CHK_FREQ @@ -186,3 +183,15 @@ CLIENT_REPLY_TIMEOUT = 15 CLIENT_MAX_RETRY_ACK = 5 CLIENT_MAX_RETRY_REPLY = 5 + +VIEW_CHANGE_TIMEOUT = 60 # seconds +MAX_CATCHUPS_DONE_DURING_VIEW_CHANGE = 5 + +# permissions for keyring dirs/files +KEYRING_DIR_MODE = 0o700 # drwx------ +KEYRING_FILE_MODE = 0o600 # -rw------- + +# This timeout is high enough so that even if some PRE-PREPAREs are stashed +# because of being delivered out of order or being out of watermarks or not +# having finalised requests. +ACCEPTABLE_DEVIATION_PREPREPARE_SECS = 100 # seconds diff --git a/plenum/persistence/leveldb_hash_store.py b/plenum/persistence/leveldb_hash_store.py index ec3ca68f9a..bb45463c0f 100644 --- a/plenum/persistence/leveldb_hash_store.py +++ b/plenum/persistence/leveldb_hash_store.py @@ -17,6 +17,10 @@ def __init__(self, dataDir): self.leavesDb = None self.open() + @property + def is_persistent(self) -> bool: + return True + def writeLeaf(self, leafHash): self.leavesDb.put(str(self.leafCount + 1), leafHash) self.leafCount += 1 diff --git a/plenum/persistence/storage.py b/plenum/persistence/storage.py index 6b70cbbad3..8d70c09fe8 100644 --- a/plenum/persistence/storage.py +++ b/plenum/persistence/storage.py @@ -4,7 +4,7 @@ from ledger.stores.text_file_store import TextFileStore from plenum.common.exceptions import DataDirectoryNotFound, KeyValueStorageConfigNotFound from plenum.common.constants import StorageType, KeyValueStorageType -from plenum.common.types import Reply +from plenum.common.messages.node_messages import Reply from state.kv.kv_in_memory import KeyValueStorageInMemory from state.kv.kv_store import KeyValueStorage from state.kv.kv_store_leveldb import KeyValueStorageLeveldb diff --git a/plenum/server/client_authn.py b/plenum/server/client_authn.py index 41f0e66f35..ee5e87722d 100644 --- a/plenum/server/client_authn.py +++ b/plenum/server/client_authn.py @@ -94,6 +94,10 @@ def authenticate(self, raise InvalidSignatureFormat from ex ser = self.serializeForSig(msg, topLevelKeysToIgnore=[f.SIG.nm]) verkey = self.getVerkey(identifier) + + if verkey is None: + raise CouldNotAuthenticate + vr = DidVerifier(verkey, identifier=identifier) isVerified = vr.verify(sig, ser) if not isVerified: diff --git a/plenum/server/domain_req_handler.py b/plenum/server/domain_req_handler.py index 2525f1500a..b3b614f1fc 100644 --- a/plenum/server/domain_req_handler.py +++ b/plenum/server/domain_req_handler.py @@ -38,16 +38,16 @@ def validate(self, req: Request, config=None): req.reqId, error) - def _reqToTxn(self, req: Request): - txn = reqToTxn(req) + def _reqToTxn(self, req: Request, cons_time: int): + txn = reqToTxn(req, cons_time) for processor in self.reqProcessors: res = processor.process(req) txn.update(res) return txn - def apply(self, req: Request): - txn = self._reqToTxn(req) + def apply(self, req: Request, cons_time: int): + txn = self._reqToTxn(req, cons_time) (start, end), _ = self.ledger.appendTxns([self.transform_txn_for_ledger(txn)]) self.updateState(txnsWithSeqNo(start, end, [txn])) return txn @@ -79,10 +79,9 @@ def countStewards(self) -> int: Note: This is inefficient, a production use case of this function should require an efficient storage mechanism """ - # TODO: do not load all transactions!!! - allTxns = self.ledger.getAllTxn().values() - return sum(1 for txn in allTxns if (txn[TXN_TYPE] == NYM) and - (txn.get(ROLE) == STEWARD)) + # THIS SHOULD NOT BE DONE FOR PRODUCTION + return sum(1 for _, txn in self.ledger.getAllTxn() if + (txn[TXN_TYPE] == NYM) and (txn.get(ROLE) == STEWARD)) def stewardThresholdExceeded(self, config) -> bool: """We allow at most `stewardThreshold` number of stewards to be added diff --git a/plenum/server/message_req_processor.py b/plenum/server/message_req_processor.py new file mode 100644 index 0000000000..89c34ebcca --- /dev/null +++ b/plenum/server/message_req_processor.py @@ -0,0 +1,267 @@ +from typing import Dict +from typing import List + +from plenum.common.constants import LEDGER_STATUS, PREPREPARE, CONSISTENCY_PROOF, \ + PROPAGATE +from plenum.common.messages.fields import RequestIdentifierField +from plenum.common.messages.node_messages import MessageReq, MessageRep, \ + LedgerStatus, PrePrepare, ConsistencyProof, Propagate +from plenum.common.request import Request +from plenum.common.types import f +from plenum.server import replica +from stp_core.common.log import getlogger + + +logger = getlogger() + + +class MessageReqProcessor: + # This is a mixin, it's mixed with node. + def __init__(self): + self.validation_handlers = { + LEDGER_STATUS: self._validate_requested_ledger_status, + CONSISTENCY_PROOF: self._validate_requested_cons_proof, + PREPREPARE: self._validate_requested_preprepare, + PROPAGATE: self._validate_requested_propagate + } + + self.req_handlers = { + LEDGER_STATUS: self._serve_ledger_status_request, + CONSISTENCY_PROOF: self._serve_cons_proof_request, + PREPREPARE: self._serve_preprepare_request, + PROPAGATE: self._serve_propagate_request + } + + self.rep_handlers = { + LEDGER_STATUS: self._process_requested_ledger_status, + CONSISTENCY_PROOF: self._process_requested_cons_proof, + PREPREPARE: self._process_requested_preprepare, + PROPAGATE: self._process_requested_propagate + } + + def process_message_req(self, msg: MessageReq, frm): + # Assumes a shared memory architecture. In case of multiprocessing, + # RPC architecture, use deques to communicate the message and node will + # maintain a unique internal message id to correlate responses. + msg_type = msg.msg_type + resp = self.req_handlers[msg_type](msg) + + if resp is False: + return + + self.sendToNodes(MessageRep(**{ + f.MSG_TYPE.nm: msg_type, + f.PARAMS.nm: msg.params, + f.MSG.nm: resp + }), names=[frm, ]) + + def process_message_rep(self, msg: MessageRep, frm): + msg_type = msg.msg_type + if msg.msg is None: + logger.debug('{} got null response for requested {} from {}'. + format(self, msg_type, frm)) + return + return self.rep_handlers[msg_type](msg, frm) + + def valid_requested_msg(self, msg_type, **kwargs): + return self.validation_handlers[msg_type](**kwargs) + + def request_msg(self, typ, params: Dict, frm: List[str]=None): + self.sendToNodes(MessageReq(**{ + f.MSG_TYPE.nm: typ, + f.PARAMS.nm: params + }), names=frm) + + def _validate_requested_ledger_status(self, **kwargs): + if kwargs['ledger_id'] in self.ledger_ids: + if 'ledger_status' in kwargs: + try: + # TODO: move this validation into MessageBase validation. + # TODO: avoid duplication of code here: create an instance of requested class in a one place (a factory?) + # depending on the msg_type + + # the input is expected as a dict (serialization with ujson==1.33) + return LedgerStatus(**kwargs['ledger_status']) + except TypeError as ex: + logger.warning( + '{} could not create LEDGER_STATUS out of {}'. + format(self, **kwargs['ledger_status'])) + else: + return True + + def _serve_ledger_status_request(self, msg): + params = msg.params + ledger_id = params.get(f.LEDGER_ID.nm) + if self.valid_requested_msg(msg.msg_type, ledger_id=ledger_id): + return self.getLedgerStatus(ledger_id) + else: + self.discard(msg, 'cannot serve request', + logMethod=logger.debug) + return False + + def _process_requested_ledger_status(self, msg, frm): + params = msg.params + ledger_id = params.get(f.LEDGER_ID.nm) + ledger_status = msg.msg + ledger_status = self.valid_requested_msg(msg.msg_type, + ledger_id=ledger_id, + ledger_status=ledger_status) + if ledger_status: + self.ledgerManager.processLedgerStatus(ledger_status, frm=frm) + return + self.discard(msg, + 'cannot process requested message response', + logMethod=logger.debug) + + def _validate_requested_cons_proof(self, **kwargs): + if kwargs['ledger_id'] in self.ledger_ids and \ + (isinstance(kwargs['seq_no_start'], int) and kwargs[ + 'seq_no_start'] > 0) and \ + (isinstance(kwargs['seq_no_end'], int) and kwargs[ + 'seq_no_end'] > 0): + if 'cons_proof' in kwargs: + try: + # the input is expected as a dict (serialization with ujson==1.33) + return ConsistencyProof(**kwargs['cons_proof']) + except TypeError as ex: + logger.warning( + '{} could not create CONSISTENCY_PROOF out of {}'. + format(self, **kwargs['cons_proof'])) + else: + return True + + def _serve_cons_proof_request(self, msg): + params = msg.params + ledger_id = params.get(f.LEDGER_ID.nm) + seq_no_start = params.get(f.SEQ_NO_START.nm) + seq_no_end = params.get(f.SEQ_NO_END.nm) + if self.valid_requested_msg(msg.msg_type, ledger_id=ledger_id, + seq_no_start=seq_no_start, + seq_no_end=seq_no_end): + return self.ledgerManager._buildConsistencyProof(ledger_id, + seq_no_start, + seq_no_end) + else: + self.discard(msg, 'cannot serve request', + logMethod=logger.debug) + return False + + def _process_requested_cons_proof(self, msg, frm): + params = msg.params + ledger_id = params.get(f.LEDGER_ID.nm) + seq_no_start = params.get(f.SEQ_NO_START.nm) + seq_no_end = params.get(f.SEQ_NO_END.nm) + cons_proof = msg.msg + cons_proof = self.valid_requested_msg(msg.msg_type, + ledger_id=ledger_id, + seq_no_start=seq_no_start, + seq_no_end=seq_no_end, + cons_proof=cons_proof) + if cons_proof: + self.ledgerManager.processConsistencyProof(cons_proof, frm=frm) + return + self.discard(msg, + 'cannot process requested message response', + logMethod=logger.debug) + + def _validate_requested_preprepare(self, **kwargs): + if kwargs['inst_id'] in range(len(self.replicas)) and \ + kwargs['view_no'] == self.viewNo and \ + isinstance(kwargs['pp_seq_no'], int) and \ + kwargs['pp_seq_no'] > 0: + if 'pp' in kwargs: + try: + # the input is expected as a dict (serialization with ujson==1.33) + pp = PrePrepare(**kwargs['pp']) + if pp.instId != kwargs['inst_id'] or pp.viewNo != kwargs['view_no']: + logger.warning('{} found PREPREPARE {} not satisfying ' + 'query criteria'.format(self, *kwargs['pp'])) + return + return pp + except TypeError as ex: + logger.warning( + '{} could not create PREPREPARE out of {}'. + format(self, **kwargs['pp'])) + else: + return True + + def _serve_preprepare_request(self, msg): + params = msg.params + inst_id = params.get(f.INST_ID.nm) + view_no = params.get(f.VIEW_NO.nm) + pp_seq_no = params.get(f.PP_SEQ_NO.nm) + if self.valid_requested_msg(msg.msg_type, inst_id=inst_id, + view_no=view_no, pp_seq_no=pp_seq_no): + return self.replicas[inst_id].getPrePrepare(view_no, pp_seq_no) + else: + self.discard(msg, 'cannot serve request', + logMethod=logger.debug) + return False + + def _process_requested_preprepare(self, msg, frm): + params = msg.params + inst_id = params.get(f.INST_ID.nm) + view_no = params.get(f.VIEW_NO.nm) + pp_seq_no = params.get(f.PP_SEQ_NO.nm) + pp = msg.msg + pp = self.valid_requested_msg(msg.msg_type, inst_id=inst_id, + view_no=view_no, pp_seq_no=pp_seq_no, + pp=pp) + if pp: + frm = replica.Replica.generateName(frm, inst_id) + self.replicas[inst_id].process_requested_pre_prepare(pp, + sender=frm) + return + self.discard(msg, + 'cannot process requested message response', + logMethod=logger.debug) + + def _validate_requested_propagate(self, **kwargs): + if not (RequestIdentifierField().validate((kwargs['identifier'], + kwargs['req_id']))): + if 'propagate' in kwargs: + try: + # the input is expected as a dict (serialization with ujson==1.33) + ppg = Propagate(**kwargs['propagate']) + if ppg.request[f.IDENTIFIER.nm] != kwargs['identifier'] or \ + ppg.request[f.REQ_ID.nm] != kwargs['req_id']: + logger.warning('{} found PROPAGATE {} not ' + 'satisfying query criteria'.format(self, *kwargs['ppg'])) + return + return ppg + except TypeError as ex: + logger.warning( + '{} could not create PROPAGATE out of {}'. + format(self, **kwargs['propagate'])) + else: + return True + + def _serve_propagate_request(self, msg): + params = msg.params + identifier = params.get(f.IDENTIFIER.nm) + req_id = params.get(f.REQ_ID.nm) + if self.valid_requested_msg(msg.msg_type, identifier=identifier, + req_id=req_id): + req_key = (identifier, req_id) + if req_key in self.requests and self.requests[req_key].finalised: + sender_client = self.requestSender.get(req_key) + req = self.requests[req_key].finalised + return self.createPropagate(req, sender_client) + else: + self.discard(msg, 'cannot serve request', + logMethod=logger.debug) + return False + + def _process_requested_propagate(self, msg, frm): + params = msg.params + identifier = params.get(f.IDENTIFIER.nm) + req_id = params.get(f.REQ_ID.nm) + ppg = msg.msg + ppg = self.valid_requested_msg(msg.msg_type, identifier=identifier, + req_id=req_id, propagate=ppg) + if ppg: + self.processPropagate(ppg, frm) + else: + self.discard(msg, + 'cannot process requested message response', + logMethod=logger.debug) diff --git a/plenum/server/models.py b/plenum/server/models.py index a3fe87cf18..1868cb8cb6 100644 --- a/plenum/server/models.py +++ b/plenum/server/models.py @@ -3,7 +3,7 @@ """ from typing import NamedTuple, Set, Tuple, Dict -from plenum.common.types import Commit, Prepare +from plenum.common.messages.node_messages import Prepare, Commit ThreePhaseVotes = NamedTuple("ThreePhaseVotes", [ ("voters", Set[str])]) @@ -75,8 +75,8 @@ def hasPrepare(self, prepare: Prepare) -> bool: def hasPrepareFrom(self, prepare: Prepare, voter: str) -> bool: return super().hasVote(prepare, voter) - def hasQuorum(self, prepare: Prepare, f: int) -> bool: - return self.hasEnoughVotes(prepare, 2 * f) + def hasQuorum(self, prepare: Prepare, quorum: int) -> bool: + return self.hasEnoughVotes(prepare, quorum) class Commits(TrackedMsgs): @@ -112,8 +112,8 @@ def hasCommit(self, commit: Commit) -> bool: def hasCommitFrom(self, commit: Commit, voter: str) -> bool: return super().hasVote(commit, voter) - def hasQuorum(self, commit: Commit, f: int) -> bool: - return self.hasEnoughVotes(commit, 2 * f + 1) + def hasQuorum(self, commit: Commit, quorum: int) -> bool: + return self.hasEnoughVotes(commit, quorum) class InstanceChanges(TrackedMsgs): @@ -144,5 +144,5 @@ def hasView(self, viewNo: int) -> bool: def hasInstChngFrom(self, viewNo: int, voter: str) -> bool: return super().hasVote(viewNo, voter) - def hasQuorum(self, viewNo: int, f: int) -> bool: - return self.hasEnoughVotes(viewNo, 2 * f + 1) + def hasQuorum(self, viewNo: int, quorum: int) -> bool: + return self.hasEnoughVotes(viewNo, quorum) diff --git a/plenum/server/monitor.py b/plenum/server/monitor.py index aba0840084..525f01e445 100644 --- a/plenum/server/monitor.py +++ b/plenum/server/monitor.py @@ -355,13 +355,13 @@ def getThroughputs(self, masterInstId: int): masterThrp = self.getThroughput(masterInstId) totalReqs, totalTm = self.getInstanceMetrics(forAllExcept=masterInstId) + backupThrp = totalReqs / totalTm if totalTm else None if masterThrp == 0: if self.numOrderedRequests[masterInstId] == (0, 0): avgReqsPerInst = totalReqs / self.instances.count if avgReqsPerInst <= 1: # too early to tell if we need an instance change masterThrp = None - backupThrp = totalReqs / totalTm if totalTm else None return masterThrp, backupThrp def getThroughput(self, instId: int) -> float: @@ -376,7 +376,7 @@ def getThroughput(self, instId: int) -> float: if instId >= self.instances.count: return None reqs, tm = self.numOrderedRequests[instId] - return reqs / tm if tm else None + return reqs / tm if tm else 0 def getInstanceMetrics(self, forAllExcept: int) -> Tuple[Optional[int], Optional[float]]: """ diff --git a/plenum/server/msg_filter.py b/plenum/server/msg_filter.py new file mode 100644 index 0000000000..bd869054f5 --- /dev/null +++ b/plenum/server/msg_filter.py @@ -0,0 +1,43 @@ +from abc import abstractmethod +from typing import Optional + +from stp_core.common.log import getlogger + +logger = getlogger() + +class MessageFilter: + + @abstractmethod + def filter_node_to_node(self, msg) -> Optional[str]: + raise NotImplementedError + + @abstractmethod + def filter_client_to_node(self, req) -> Optional[str]: + raise NotImplementedError + + +class MessageFilterEngine: + def __init__(self): + self.__filters = {} + + def add_filter(self, name: str, filter: MessageFilter): + self.__filters[name] = filter + + def remove_filter(self, name: str): + self.__filters.pop(name, None) + + def filter_node_to_node(self, msg) -> Optional[str]: + for fltr in self.__filters.values(): + filter_desc = fltr.filter_node_to_node(msg) + if filter_desc: + logger.debug("Filtered node-to-node msg {} since {}".format(msg, filter_desc)) + return filter_desc + return None + + def filter_client_to_node(self, req) -> Optional[str]: + for fltr in self.__filters.values(): + filter_desc = fltr.filter_client_to_node(req) + if filter_desc: + logger.debug("Filtered client request {} since {}".format(req, filter_desc)) + return filter_desc + return None \ No newline at end of file diff --git a/plenum/server/node.py b/plenum/server/node.py index 8f65adccb3..898285c4ed 100644 --- a/plenum/server/node.py +++ b/plenum/server/node.py @@ -1,15 +1,11 @@ -import asyncio -import json import os -import random -import shutil import time from binascii import unhexlify -from collections import OrderedDict from collections import deque, defaultdict from contextlib import closing -from typing import Dict, Any, Mapping, Iterable, List, Optional, \ - Sequence, Set, Tuple +from typing import Dict, Any, Mapping, Iterable, List, Optional, Set, Tuple + +from intervaltree import IntervalTree from ledger.compact_merkle_tree import CompactMerkleTree from ledger.serializers.compact_serializer import CompactSerializer @@ -17,49 +13,39 @@ from ledger.stores.hash_store import HashStore from ledger.stores.memory_hash_store import MemoryHashStore from ledger.util import F -from orderedset._orderedset import OrderedSet +from orderedset import OrderedSet from plenum.client.wallet import Wallet from plenum.common.config_util import getConfig -from plenum.common.constants import TXN_TYPE, TXN_TIME, POOL_TXN_TYPES, \ - TARGET_NYM, ROLE, STEWARD, NYM, VERKEY, OP_FIELD_NAME, CLIENT_STACK_SUFFIX, \ - CLIENT_BLACKLISTER_SUFFIX, NODE_BLACKLISTER_SUFFIX, \ - NODE_PRIMARY_STORAGE_SUFFIX, NODE_HASH_STORE_SUFFIX, HS_FILE, DATA, ALIAS, \ - NODE_IP, HS_LEVELDB, POOL_LEDGER_ID, DOMAIN_LEDGER_ID, LedgerState from plenum.common.exceptions import SuspiciousNode, SuspiciousClient, \ MissingNodeOp, InvalidNodeOp, InvalidNodeMsg, InvalidClientMsgType, \ - InvalidClientOp, InvalidClientRequest, BaseExc, \ + InvalidClientRequest, BaseExc, \ InvalidClientMessageException, KeysNotFoundException as REx, BlowUp from plenum.common.has_file_storage import HasFileStorage from plenum.common.keygen_utils import areKeysSetup from plenum.common.ledger import Ledger from plenum.common.ledger_manager import LedgerManager from plenum.common.message_processor import MessageProcessor +from plenum.common.messages.node_message_factory import node_message_factory from plenum.common.motor import Motor from plenum.common.plugin_helper import loadPlugins from plenum.common.request import Request, SafeRequest -from plenum.common.roles import Roles from plenum.common.signer_simple import SimpleSigner from plenum.common.stacks import nodeStackClass, clientStackClass from plenum.common.startable import Status, Mode from plenum.common.throttler import Throttler from plenum.common.txn_util import getTxnOrderedFields -from plenum.common.types import Propagate, \ - Reply, Nomination, TaggedTuples, Primary, \ - Reelection, PrePrepare, Prepare, Commit, \ - Ordered, RequestAck, InstanceChange, Batch, OPERATION, BlacklistMsg, f, \ - RequestNack, HA, LedgerStatus, ConsistencyProof, CatchupReq, CatchupRep, \ - PLUGIN_TYPE_VERIFICATION, PLUGIN_TYPE_PROCESSING, PoolLedgerTxns, \ - ConsProofRequest, ElectionType, ThreePhaseType, Checkpoint, ThreePCState, \ - Reject -from plenum.common.util import friendlyEx, getMaxFailures, pop_keys +from plenum.common.messages.node_messages import * +from plenum.common.types import PLUGIN_TYPE_VERIFICATION, PLUGIN_TYPE_PROCESSING, OPERATION +from plenum.common.util import friendlyEx, getMaxFailures, pop_keys, \ + compare_3PC_keys, get_utc_epoch from plenum.common.verifier import DidVerifier from plenum.persistence.leveldb_hash_store import LevelDbHashStore from plenum.persistence.req_id_to_txn import ReqIdrToTxn from plenum.persistence.storage import Storage, initStorage, initKeyValueStorage -from plenum.persistence.util import txnsWithMerkleInfo -from plenum.server import primary_elector +from plenum.server.message_req_processor import MessageReqProcessor +from plenum.server.primary_selector import PrimarySelector from plenum.server import replica from plenum.server.blacklister import Blacklister from plenum.server.blacklister import SimpleBlacklister @@ -75,25 +61,29 @@ from plenum.server.pool_manager import HasPoolManager, TxnPoolManager, \ RegistryPoolManager from plenum.server.primary_decider import PrimaryDecider -from plenum.server.primary_elector import PrimaryElector from plenum.server.propagator import Propagator +from plenum.server.quorums import Quorums from plenum.server.router import Router from plenum.server.suspicion_codes import Suspicions from state.pruning_state import PruningState from stp_core.common.log import getlogger from stp_core.crypto.signer import Signer +from stp_core.network.exceptions import RemoteNotFound from stp_core.network.network_interface import NetworkInterface from stp_core.ratchet import Ratchet +from stp_core.types import HA from stp_zmq.zstack import ZStack - +from plenum.common.constants import openTxns from state.state import State +from plenum.common.messages.node_messages import ViewChangeDone + pluginManager = PluginManager() logger = getlogger() class Node(HasActionQueue, Motor, Propagator, MessageProcessor, HasFileStorage, - HasPoolManager, PluginLoaderHelper): + HasPoolManager, PluginLoaderHelper, MessageReqProcessor): """ A node in a plenum system. """ @@ -101,6 +91,8 @@ class Node(HasActionQueue, Motor, Propagator, MessageProcessor, HasFileStorage, suspicions = {s.code: s.reason for s in Suspicions.get_list()} keygenScript = "init_plenum_keys" _client_request_class = SafeRequest + ledger_ids = [POOL_LEDGER_ID, DOMAIN_LEDGER_ID] + _wallet_class = Wallet def __init__(self, name: str, @@ -132,6 +124,8 @@ def __init__(self, self.basedirpath = basedirpath or config.baseDir self.dataDir = self.config.nodeDataDir or "data/nodes" + self._view_change_timeout = self.config.VIEW_CHANGE_TIMEOUT + HasFileStorage.__init__(self, name, baseDir=self.basedirpath, dataDir=self.dataDir) self.ensureKeysAreSetup() @@ -185,9 +179,11 @@ def __init__(self, self.cliNodeReg = self.poolManager.cliNodeReg HasActionQueue.__init__(self) - # Motor.__init__(self) + Propagator.__init__(self) + MessageReqProcessor.__init__(self) + self.primaryDecider = primaryDecider self.nodeInBox = deque() @@ -205,6 +201,8 @@ def __init__(self, 'data': {} } + self._elector = None # type: PrimaryDecider + self.instances = Instances() # QUESTION: Why does the monitor need blacklister? self.monitor = Monitor(self.name, @@ -237,9 +235,6 @@ def __init__(self, self.viewNo = 0 # type: int - self.rank = self.getRank(self.name, self.nodeReg) - - self.elector = None # type: PrimaryDecider # Requests that are to be given to the elector by the node self.msgsToElector = deque() @@ -248,15 +243,6 @@ def __init__(self, if self.poolLedger: self.states[POOL_LEDGER_ID] = self.poolManager.state - nodeRoutes = [(Propagate, self.processPropagate), - (InstanceChange, self.processInstanceChange)] - - nodeRoutes.extend((msgTyp, self.sendToElector) for msgTyp in - [Nomination, Primary, Reelection]) - - nodeRoutes.extend((msgTyp, self.sendToReplica) for msgTyp in - [PrePrepare, Prepare, Commit, Checkpoint, - ThreePCState]) self.perfCheckFreq = self.config.PerfCheckFreq self.nodeRequestSpikeMonitorData = { @@ -281,32 +267,38 @@ def __init__(self, # prohibited from being in this is ClientRequest and Propagation, # which both require client signature verification self.authnWhitelist = (Nomination, Primary, Reelection, - Batch, + Batch, ViewChangeDone, PrePrepare, Prepare, Checkpoint, Commit, InstanceChange, LedgerStatus, ConsistencyProof, CatchupReq, CatchupRep, - ConsProofRequest, ThreePCState) + ThreePCState, MessageReq, MessageRep, CurrentState) # Map of request identifier, request id to client name. Used for # dispatching the processed requests to the correct client remote - # TODO: This should be persisted in - # case the node crashes before sending the reply to the client self.requestSender = {} # Dict[Tuple[str, int], str] - nodeRoutes.extend([ - (LedgerStatus, self.ledgerManager.processLedgerStatus), + # CurrentState + self.nodeMsgRouter = Router( + (Propagate, self.processPropagate), + (InstanceChange, self.processInstanceChange), + (MessageReq, self.process_message_req), + (MessageRep, self.process_message_rep), + (PrePrepare, self.sendToReplica), + (Prepare, self.sendToReplica), + (Commit, self.sendToReplica), + (Checkpoint, self.sendToReplica), + (ThreePCState, self.sendToReplica), + (LedgerStatus, self.ledgerManager.processLedgerStatus), (ConsistencyProof, self.ledgerManager.processConsistencyProof), - (ConsProofRequest, self.ledgerManager.processConsistencyProofReq), - (CatchupReq, self.ledgerManager.processCatchupReq), - (CatchupRep, self.ledgerManager.processCatchupRep) - ]) - - self.nodeMsgRouter = Router(*nodeRoutes) + (CatchupReq, self.ledgerManager.processCatchupReq), + (CatchupRep, self.ledgerManager.processCatchupRep), + (CurrentState, self.process_current_state_message) + ) self.clientMsgRouter = Router( - (Request, self.processRequest), + (Request, self.processRequest), (LedgerStatus, self.ledgerManager.processLedgerStatus), - (CatchupReq, self.ledgerManager.processCatchupReq), + (CatchupReq, self.ledgerManager.processCatchupReq), ) # Ordered requests received from replicas while the node was not @@ -316,7 +308,7 @@ def __init__(self, # Set of (identifier, reqId) of all transactions that were received # while catching up. Used to detect overlap between stashed requests # and received replies while catching up. - self.reqsFromCatchupReplies = set() + # self.reqsFromCatchupReplies = set() # Any messages that are intended for view numbers higher than the # current view. @@ -332,31 +324,67 @@ def __init__(self, logger.debug("total plugins loaded in node: {}".format(tp)) # TODO: this is already happening in `start`, why here then? self.logNodeInfo() - self._id = None self._wallet = None self.seqNoDB = self.loadSeqNoDB() - # Stores the last txn seqNo that was executed for a ledger in a batch - self.batchToSeqNos = OrderedDict() # type: OrderedDict[int, int] - self.view_change_in_progress = False + # Stores the 3 phase keys for last `ProcessedBatchMapsToKeep` batches, + # the key is the ledger id and value is an interval tree with each + # interval being the range of txns and value being the 3 phase key of + # the batch in which those transactions were included. The txn range is + # exclusive of last seq no so to store txns from 1 to 100 add a range + # of `1:101` + self.txn_seq_range_to_3phase_key = {} # type: Dict[int, IntervalTree] + self._view_change_in_progress = False + + # Number of rounds of catchup done during a view change. + self.catchup_rounds_without_txns = 0 @property def id(self): - if not self._id and isinstance(self.poolManager, TxnPoolManager): - for txn in self.poolLedger.getAllTxn().values(): - if self.name == txn[DATA][ALIAS]: - self._id = txn[TARGET_NYM] - return self._id + if isinstance(self.poolManager, TxnPoolManager): + return self.poolManager.id + return None @property def wallet(self): if not self._wallet: - wallet = Wallet(self.name) + wallet = self._wallet_class(self.name) + # TODO: Should use DidSigner to move away from cryptonyms signer = SimpleSigner(seed=unhexlify(self.nodestack.keyhex)) wallet.addIdentifier(signer=signer) self._wallet = wallet return self._wallet + @property + def elector(self) -> PrimaryDecider: + return self._elector + + @elector.setter + def elector(self, value): + # clear old routes + if self._elector: + self.nodeMsgRouter.remove(self._elector.supported_msg_types) + self._elector = value + # set up new routes + if self._elector: + self.nodeMsgRouter.extend( + (msgTyp, self.sendToElector) for msgTyp in + self._elector.supported_msg_types) + + @property + def view_change_in_progress(self): + return self._view_change_in_progress + + @view_change_in_progress.setter + def view_change_in_progress(self, value): + self._view_change_in_progress = value + + def utc_epoch(self) -> int: + """ + Returns the UTC epoch according to it's local clock + """ + return get_utc_epoch() + def initPoolManager(self, nodeRegistry, ha, cliname, cliha): HasPoolManager.__init__(self, nodeRegistry, ha, cliname, cliha) @@ -384,6 +412,7 @@ def setF(self): self.f = getMaxFailures(self.totalNodes) self.requiredNumberOfInstances = self.f + 1 # per RBFT self.minimumNodes = (2 * self.f) + 1 # minimum for a functional pool + self.quorums = Quorums(self.totalNodes) @property def poolLedger(self): @@ -395,20 +424,22 @@ def poolLedger(self): def domainLedger(self): return self.primaryStorage + def build_ledger_status(self, ledger_id): + ledger = self.getLedger(ledger_id) + ledger_size = ledger.size + three_pc_key = self.three_phase_key_for_txn_seq_no(ledger_id, + ledger_size) + v, p = three_pc_key if three_pc_key else (None, None) + return LedgerStatus(ledger_id, ledger.size, v, p, ledger.root_hash) + @property def poolLedgerStatus(self): - return LedgerStatus(POOL_LEDGER_ID, self.poolLedger.size, - self.poolLedger.root_hash) \ - if self.poolLedger else None + if self.poolLedger: + return self.build_ledger_status(POOL_LEDGER_ID) @property def domainLedgerStatus(self): - return LedgerStatus(DOMAIN_LEDGER_ID, self.domainLedger.size, - self.domainLedger.root_hash) - - @property - def ledger_ids(self): - return [POOL_LEDGER_ID, DOMAIN_LEDGER_ID] + return self.build_ledger_status(DOMAIN_LEDGER_ID) def getLedgerRootHash(self, ledgerId, isCommitted=True): ledgerInfo = self.ledgerManager.getLedgerInfoByType(ledgerId) @@ -426,9 +457,17 @@ def stateRootHash(self, ledgerId, isCommitted=True): return state.committedHeadHash if isCommitted else state.headHash @property - def isParticipating(self): + def is_synced(self): + return Mode.is_done_syncing(self.mode) + + @property + def isParticipating(self) -> bool: return self.mode == Mode.participating + def start_participating(self): + logger.info('{} started participating'.format(self)) + self.mode = Mode.participating + @property def nodeStackClass(self) -> NetworkInterface: return nodeStackClass @@ -485,9 +524,9 @@ def getLedgerManager(self) -> LedgerManager: def init_ledger_manager(self): # TODO: this and tons of akin stuff should be exterminated self.ledgerManager.addLedger(DOMAIN_LEDGER_ID, - self.domainLedger, - postCatchupCompleteClbk=self.postDomainLedgerCaughtUp, - postTxnAddedToLedgerClbk=self.postTxnFromCatchupAddedToLedger) + self.domainLedger, + postCatchupCompleteClbk=self.postDomainLedgerCaughtUp, + postTxnAddedToLedgerClbk=self.postTxnFromCatchupAddedToLedger) self.on_new_ledger_added(DOMAIN_LEDGER_ID) if isinstance(self.poolManager, TxnPoolManager): self.ledgerManager.addLedger(POOL_LEDGER_ID, self.poolLedger, @@ -553,15 +592,18 @@ def start(self, loop): self.logNodeInfo() - @staticmethod - def getRank(name: str, allNames: Sequence[str]): - return sorted(allNames).index(name) + @property + def rank(self) -> int: + return self.poolManager.rank + + def get_name_by_rank(self, rank): + return self.poolManager.get_name_by_rank(rank) def newPrimaryDecider(self): if self.primaryDecider: return self.primaryDecider else: - return primary_elector.PrimaryElector(self) + return PrimarySelector(self) @property def connectedNodeCount(self) -> int: @@ -681,7 +723,7 @@ async def serviceReplicas(self, limit) -> int: serviceReplicaMsgs, serviceReplicaInBox and serviceReplicaOutBox """ a = self.serviceReplicaMsgs(limit) - b = await self.serviceReplicaOutBox(limit) + b = self.serviceReplicaOutBox(limit) c = self.serviceReplicaInBox(limit) return a + b + c @@ -717,6 +759,7 @@ async def serviceElector(self) -> int: return 0 o = self.serviceElectorOutBox() i = await self.serviceElectorInbox() + # TODO: Why is protected method accessed here? a = self.elector._serviceActions() return o + i + a @@ -739,35 +782,40 @@ def onConnsChanged(self, joined: Set[str], left: Set[str]): self.status = Status.starting self.elector.nodeCount = self.connectedNodeCount - if self.master_primary in joined: + if self.master_primary_name in joined: self.lost_primary_at = None - if self.master_primary in left: + if self.master_primary_name in left: logger.debug('{} lost connection to primary of master'.format(self)) self.lost_master_primary() - if self.isReady(): self.checkInstances() - # TODO: Should we only send election messages when lagged or - # otherwise too? - if isinstance(self.elector, PrimaryElector) and joined: - msgs = self.elector.getElectionMsgsForLaggedNodes() - logger.debug("{} has msgs {} for new nodes {}". - format(self, msgs, joined)) - for joinedNode in joined: - self.sendElectionMsgsToLaggingNode(joinedNode, msgs) - # Communicate current view number if any view change - # happened to the connected node - if self.viewNo > 0: - logger.debug("{} communicating view number {} to {}" - .format(self, self.viewNo-1, joinedNode)) - rid = self.nodestack.getRemote(joinedNode).uid - self.send( - self._create_instance_change_msg(self.viewNo, 0), - rid) - + for node in joined: + self.send_current_state_to_lagging_node(node) # Send ledger status whether ready (connected to enough nodes) or not - for n in joined: - self.send_ledger_status_to_newly_connected_node(n) + for node in joined: + self.send_ledger_status_to_newly_connected_node(node) + + def _sync_ledger(self, ledger_id): + """ + Sync specific ledger with other nodes + """ + self.ledgerManager.setLedgerCanSync(ledger_id, True) + for node_name in self.nodeReg: + try: + self._ask_for_ledger_status(node_name, ledger_id) + except RemoteNotFound: + logger.debug('{} did not find any remote for {} to send ' + 'request for ledger status'.format(self, node_name)) + continue + + def _ask_for_ledger_status(self, node_name: str, ledger_id): + """ + Ask other node for LedgerStatus + """ + self.request_msg(LEDGER_STATUS, {f.LEDGER_ID.nm: ledger_id}, + [node_name,]) + logger.debug("{} asking {} for ledger status of ledger {}" + .format(self, node_name, ledger_id)) def send_ledger_status_to_newly_connected_node(self, node_name): self.sendPoolLedgerStatus(node_name) @@ -775,17 +823,15 @@ def send_ledger_status_to_newly_connected_node(self, node_name): # peers otherwise very few peers will know that this node is lagging # behind and it will not receive sufficient consistency proofs to # verify the exact state of the ledger. - if self.mode in (Mode.discovered, Mode.participating): + # if self.mode in (Mode.discovered, Mode.participating): + if Mode.is_done_discovering(self.mode): self.sendDomainLedgerStatus(node_name) def newNodeJoined(self, txn): self.setF() new_replicas = self.adjustReplicas() - if self.adjustReplicas() > 0: - while new_replicas > 0: - self.elector.start_election_for_instance( - self.replicas[-new_replicas].instId) - new_replicas -= 1 + if new_replicas > 0: + self.decidePrimaries() def nodeLeft(self, txn): self.setF() @@ -809,12 +855,31 @@ def getClientStackNameOfNode(nodeName: str): def getClientStackHaOfNode(self, nodeName: str) -> HA: return self.cliNodeReg.get(self.getClientStackNameOfNode(nodeName)) - def sendElectionMsgsToLaggingNode(self, nodeName: str, msgs: List[Any]): + def send_current_state_to_lagging_node(self, nodeName: str): rid = self.nodestack.getRemote(nodeName).uid - for msg in msgs: - logger.debug("{} sending election message {} to lagged node {}". - format(self, msg, nodeName)) - self.send(msg, rid) + election_messages = self.elector.get_msgs_for_lagged_nodes() + message = CurrentState(viewNo=self.viewNo, + primary=election_messages) + + logger.debug("{} sending current state {} to lagged node {}". + format(self, message, nodeName)) + self.send(message, rid) + + def process_current_state_message(self, msg: CurrentState, frm): + logger.debug("{} processing current state {} from {}" + .format(self, msg, frm)) + try: + # TODO: parsing of internal messages should be done with other way + # We should consider reimplementing validation so that it can + # work with internal messages. It should not only validate them, + # but also set parsed as field values + messages = [ViewChangeDone(**message) for message in msg.primary] + for message in messages: + self.sendToElector(message, frm) + except TypeError as ex: + self.discard(msg, + reason="invalid election messages", + logMethod=logger.warning) def _statusChanged(self, old: Status, new: Status) -> None: """ @@ -826,6 +891,7 @@ def _statusChanged(self, old: Status, new: Status) -> None: pass def checkInstances(self) -> None: + # TODO: Is this method really needed? """ Check if this node has the minimum required number of protocol instances, i.e. f+1. If not, add a replica. If no election is in @@ -836,7 +902,6 @@ def checkInstances(self) -> None: logger.debug("{} choosing to start election on the basis of count {} " "and nodes {}".format(self, self.connectedNodeCount, self.nodestack.conns)) - self._schedule(self.decidePrimaries) def adjustReplicas(self): """ @@ -856,7 +921,7 @@ def adjustReplicas(self): return newReplicas def _dispatch_stashed_msg(self, msg, frm): - if isinstance(msg, ElectionType): + if isinstance(msg, (ElectionType, ViewChangeDone)): self.sendToElector(msg, frm) return True elif isinstance(msg, ThreePhaseType): @@ -874,7 +939,7 @@ def processStashedMsgsForReplica(self, instId: int): if not self._dispatch_stashed_msg(msg, frm): self.discard(msg, reason="Unknown message type for replica id " "{}".format(instId), - logMethod=logger.warn) + logMethod=logger.warning) i += 1 logger.debug("{} processed {} stashed msgs for replica {}". format(self, i, instId)) @@ -888,7 +953,7 @@ def processStashedMsgsForView(self, view_no: int): if not self._dispatch_stashed_msg(msg, frm): self.discard(msg, reason="Unknown message type for view no " "{}".format(view_no), - logMethod=logger.warn) + logMethod=logger.warning) i += 1 logger.debug("{} processed {} stashed msgs for view no {}". format(self, i, view_no)) @@ -900,6 +965,26 @@ def decidePrimaries(self): """ self.elector.decidePrimaries() + def _check_view_change_completed(self): + """ + This thing checks whether new primary was elected. + If it was not - starts view change again + """ + logger.debug('{} running the scheduled check for view change ' + 'completion'.format(self)) + if not self.view_change_in_progress: + return False + + next_view_no = self.viewNo + 1 + logger.debug("view change to view {} is not completed in time, " + "starting view change for view {}" + .format(self.viewNo, next_view_no)) + logger.info("{} initiating a view change to {} from {}". + format(self, next_view_no, self.viewNo)) + self.sendInstanceChange(next_view_no, + Suspicions.INSTANCE_CHANGE_TIMEOUT) + return True + def createReplica(self, instId: int, isMaster: bool) -> 'replica.Replica': """ Create a new replica with the specified parameters. @@ -961,7 +1046,7 @@ def serviceReplicaMsgs(self, limit: int=None) -> int: self.replicas[idx].inBox.append(msg) return msgCount - async def serviceReplicaOutBox(self, limit: int=None) -> int: + def serviceReplicaOutBox(self, limit: int=None) -> int: """ Process `limit` number of replica messages. Here processing means appending to replica inbox. @@ -980,17 +1065,7 @@ async def serviceReplicaOutBox(self, limit: int=None) -> int: Checkpoint)): self.send(msg) elif isinstance(msg, Ordered): - # Checking for request in received catchup replies as a - # request ordering might have started when the node was not - # participating but by the time ordering finished, node - # might have started participating - recvd = self.gotInCatchupReplies(msg) - if self.isParticipating and not recvd: - self.processOrdered(msg) - else: - logger.debug("{} stashing {} since mode is {} and {}". - format(self, msg, self.mode, recvd)) - self.stashedOrderedReqs.append(msg) + self.try_processing_ordered(msg) elif isinstance(msg, Reject): reqKey = (msg.identifier, msg.reqId) reject = Reject(*reqKey, @@ -1027,7 +1102,7 @@ def serviceElectorOutBox(self, limit: int=None) -> int: while self.elector.outBox and (not limit or msgCount < limit): msgCount += 1 msg = self.elector.outBox.popleft() - if isinstance(msg, ElectionType): + if isinstance(msg, (ElectionType, ViewChangeDone)): self.send(msg) elif isinstance(msg, BlacklistMsg): nodeName = getattr(msg, f.NODE_NAME.nm) @@ -1077,14 +1152,24 @@ def primaryReplicaNo(self) -> Optional[int]: return self._primary_replica_no @property - def master_primary(self) -> Optional[str]: + def master_primary_name(self) -> Optional[str]: """ Return the name of the primary node of the master instance """ - if self.replicas[0].primaryName: - return self.replicas[0].getNodeName(self.replicas[0].primaryName) + + master_primary_name = self.master_replica.primaryName + if master_primary_name: + return self.master_replica.getNodeName(master_primary_name) return None + @property + def master_last_ordered_3PC(self) -> Tuple[int, int]: + return self.master_replica.last_ordered_3pc + + @property + def master_replica(self): + return self.replicas[0] + @staticmethod def is_valid_view_or_inst(n): return not(n is None or not isinstance(n, int) or n < 0) @@ -1125,6 +1210,8 @@ def msgHasAcceptableViewNo(self, msg, frm) -> bool: elif viewNo > self.viewNo: if viewNo not in self.msgsForFutureViews: self.msgsForFutureViews[viewNo] = deque() + logger.debug('{} stashing a message for a future view: {}'. + format(self, msg)) self.msgsForFutureViews[viewNo].append((msg, frm)) else: return True @@ -1148,7 +1235,8 @@ def sendToElector(self, msg, frm): :param msg: the message to send :param frm: the name of the node which sent this `msg` """ - if self.msgHasAcceptableInstId(msg, frm) and \ + if (isinstance(msg, ViewChangeDone) or + self.msgHasAcceptableInstId(msg, frm)) and \ self.msgHasAcceptableViewNo(msg, frm): logger.debug("{} sending message to elector: {}". format(self, (msg, frm))) @@ -1190,24 +1278,21 @@ def validateNodeMsg(self, wrappedMsg): .format(frm), logger.info) return None - op = msg.pop(OP_FIELD_NAME, None) - if not op: - raise MissingNodeOp - cls = TaggedTuples.get(op, None) - if not cls: - raise InvalidNodeOp(op) try: - cMsg = cls(**msg) + message = node_message_factory.get_instance(**msg) + except (MissingNodeOp, InvalidNodeOp) as ex: + raise ex except Exception as ex: raise InvalidNodeMsg(str(ex)) + try: - self.verifySignature(cMsg) + self.verifySignature(message) except BaseExc as ex: - raise SuspiciousNode(frm, ex, cMsg) from ex + raise SuspiciousNode(frm, ex, message) from ex logger.debug("{} received node message from {}: {}". - format(self, frm, cMsg), + format(self, frm, message), extra={"cli": False}) - return cMsg, frm + return message, frm def unpackNodeMsg(self, msg, frm) -> None: """ @@ -1217,6 +1302,9 @@ def unpackNodeMsg(self, msg, frm) -> None: :param msg: a node message :param frm: the name of the node that sent this `msg` """ + # TODO: why do we unpack batches here? Batching is a feature of + # a transport, it should be encapsulated. + if isinstance(msg, Batch): logger.debug("{} processing a batch {}".format(self, msg)) for m in msg.messages: @@ -1286,7 +1374,6 @@ def handleInvalidClientMsg(self, ex, wrappedMsg): def validateClientMsg(self, wrappedMsg): """ Validate a message sent by a client. - :param wrappedMsg: a message from a client :return: Tuple of clientMessage and client address """ @@ -1296,17 +1383,14 @@ def validateClientMsg(self, wrappedMsg): .format(frm), logger.info) return None + needStaticValidation = False if all(attr in msg.keys() for attr in [OPERATION, f.IDENTIFIER.nm, f.REQ_ID.nm]): - self.doStaticValidation(msg[f.IDENTIFIER.nm], - msg[f.REQ_ID.nm], - msg[OPERATION]) cls = self._client_request_class + needStaticValidation = True elif OP_FIELD_NAME in msg: - op = msg.pop(OP_FIELD_NAME) - cls = TaggedTuples.get(op, None) - if not cls: - raise InvalidClientOp(op, msg.get(f.REQ_ID.nm)) + op = msg[OP_FIELD_NAME] + cls = node_message_factory.get_type(op) if cls not in (Batch, LedgerStatus, CatchupReq): raise InvalidClientMsgType(cls, msg.get(f.REQ_ID.nm)) else: @@ -1322,6 +1406,11 @@ def validateClientMsg(self, wrappedMsg): raise InvalidClientRequest(msg.get(f.IDENTIFIER.nm), msg.get(f.REQ_ID.nm)) from ex + if needStaticValidation: + self.doStaticValidation(msg[f.IDENTIFIER.nm], + msg[f.REQ_ID.nm], + msg[OPERATION]) + if self.isSignatureVerificationNeeded(msg): self.verifySignature(cMsg) # Suspicions should only be raised when lot of sig failures are @@ -1380,17 +1469,26 @@ async def processClientInBox(self): format(self.clientstack.name, frm, req), extra={"cli": True, "tags": ["node-msg-processing"]}) + try: await self.clientMsgRouter.handle(m) except InvalidClientMessageException as ex: self.handleInvalidClientMsg(ex, m) + def _reject_msg(self, msg, frm, reason): + reqKey = (msg.identifier, msg.reqId) + reject = Reject(*reqKey, + reason) + self.transmitToClient(reject, frm) + def postPoolLedgerCaughtUp(self, **kwargs): self.mode = Mode.discovered # The node might have discovered more nodes, so see if schedule # election if needed. if isinstance(self.poolManager, TxnPoolManager): self.checkInstances() + + # TODO: why we do it this way? # Initialising node id in case where node's information was not present # in pool ledger at the time of starting, happens when a non-genesis # node starts @@ -1401,12 +1499,9 @@ def catchup_next_ledger_after_pool(self): self.start_domain_ledger_sync() def start_domain_ledger_sync(self): - self.ledgerManager.setLedgerCanSync(DOMAIN_LEDGER_ID, True) - for nm in self.nodestack.connecteds: - self.sendDomainLedgerStatus(nm) + self._sync_ledger(DOMAIN_LEDGER_ID) self.ledgerManager.processStashedLedgerStatuses(DOMAIN_LEDGER_ID) - def postDomainLedgerCaughtUp(self, **kwargs): """ Process any stashed ordered requests and set the mode to @@ -1416,17 +1511,23 @@ def postDomainLedgerCaughtUp(self, **kwargs): pass def preLedgerCatchUp(self, ledger_id): + # Process any Ordered requests. This causes less transactions to be + # requested during catchup. Also commits any uncommitted state that + # can be committed + logger.debug('{} going to process any ordered requests before starting' + ' catchup.'.format(self)) + self.force_process_ordered() + self.processStashedOrderedReqs() + # make the node Syncing self.mode = Mode.syncing # revert uncommitted txns and state for unordered requests - self.replicas[0].revert_unordered_batches(ledger_id) - + r = self.master_replica.revert_unordered_batches() + logger.debug('{} reverted {} batches before starting catch up for ' + 'ledger {}'.format(self, r, ledger_id)) def postTxnFromCatchupAddedToLedger(self, ledgerId: int, txn: Any): - self.reqsFromCatchupReplies.add((txn.get(f.IDENTIFIER.nm), - txn.get(f.REQ_ID.nm))) - rh = self.postRecvTxnFromCatchup(ledgerId, txn) if rh: rh.updateState([txn], isCommitted=True) @@ -1444,21 +1545,107 @@ def postRecvTxnFromCatchup(self, ledgerId: int, txn: Any): rh = self.reqHandler return rh + # TODO: should be renamed to `post_all_ledgers_caughtup` def allLedgersCaughtUp(self): - if self.ledgerManager.lastCaughtUpPpSeqNo > 0: - # TODO: currently we use the same ppSeqNo for all instances - for replica in self.replicas: - replica.caught_up_till_pp_seq_no(self.ledgerManager.lastCaughtUpPpSeqNo) - - self.mode = Mode.participating + if self.num_txns_caught_up_in_last_catchup() == 0: + self.catchup_rounds_without_txns += 1 + last_caught_up_3PC = self.ledgerManager.last_caught_up_3PC + if compare_3PC_keys(self.master_last_ordered_3PC, + last_caught_up_3PC) > 0: + self.master_replica.caught_up_till_3pc(last_caught_up_3PC) + logger.info('{} caught up till {}'.format(self, last_caught_up_3PC), + extra={'cli': True}) + + # TODO: Maybe a slight optimisation is to check result of + # `self.num_txns_caught_up_in_last_catchup()` self.processStashedOrderedReqs() - # TODO: next line not needed - self.checkInstances() - def getLedger(self, ledgerId): + if self.is_catchup_needed(): + logger.debug('{} needs to catchup again'.format(self)) + self.start_catchup() + else: + logger.info('{} does not need any more catchups'.format(self), + extra={'cli': True}) + self.no_more_catchups_needed() + + def is_catchup_needed(self) -> bool: + """ + Check if received a quorum of view change done messages and if yes + check if caught up till the + Check if all requests ordered till last prepared certificate + Check if last catchup resulted in no txns + """ + if self.caught_up_for_current_view(): + logger.info('{} is caught up for the current view {}'. + format(self, self.viewNo)) + return False + logger.debug('{} is not caught up for the current view {}'. + format(self, self.viewNo)) + if self.num_txns_caught_up_in_last_catchup() == 0: + if self.has_ordered_till_last_prepared_certificate(): + logger.debug('{} ordered till last prepared certificate'.format(self)) + return False + if self.catchup_rounds_without_txns >= self.config.MAX_CATCHUPS_DONE_DURING_VIEW_CHANGE: + logger.debug('{} has completed {} catchup rounds'. + format(self, self.catchup_rounds_without_txns)) + # No more 3PC messages will be processed since maximum catchup + # rounds have been done + self.master_replica.last_prepared_before_view_change = None + return False + return True + + def caught_up_for_current_view(self) -> bool: + if not self.elector._hasViewChangeQuorum: + logger.debug('{} does not have view change quorum for view {}'. + format(self, self.viewNo)) + return False + vc = self.elector.has_sufficient_same_view_change_done_messages + if not vc: + logger.debug('{} does not have acceptable ViewChangeDone for ' + 'view {}'.format(self, self.viewNo)) + return False + ledger_info = vc[1] + for lid, size, root_hash in ledger_info: + ledger = self.ledgerManager.ledgerRegistry[lid].ledger + if size == 0: + continue + if ledger.size < size: + return False + if ledger.hashToStr(ledger.tree.merkle_tree_hash(0, size)) != root_hash: + return False + return True + + def has_ordered_till_last_prepared_certificate(self) -> bool: + lst = self.master_replica.last_prepared_before_view_change + if lst is None: + return True + return compare_3PC_keys(lst, self.master_replica.last_ordered_3pc) >= 0 + + def num_txns_caught_up_in_last_catchup(self) -> int: + count = sum([l.num_txns_caught_up for l in + self.ledgerManager.ledgerRegistry.values()]) + logger.debug('{} caught up to {} txns in the last catchup'. + format(self, count)) + return count + + def no_more_catchups_needed(self): + # This method is called when no more catchups needed + self.mode = Mode.synced + self.decidePrimaries() + # TODO: need to think of a better way + # If the node was not participating but has now found a primary, + # then set mode to participating, can happen if a catchup is triggered + # without a view change or node start + if not self.isParticipating and self.master_replica.hasPrimary: + logger.debug('{} starting to participate since catchup is done, ' + 'primaries are selected but mode was not set to ' + 'participating'.format(self)) + self.start_participating() + + def getLedger(self, ledgerId) -> Ledger: return self.ledgerManager.getLedgerInfoByType(ledgerId).ledger - def getState(self, ledgerId): + def getState(self, ledgerId) -> PruningState: return self.states.get(ledgerId) def post_txn_from_catchup_added_to_domain_ledger(self, txn): @@ -1480,8 +1667,7 @@ def getLedgerStatus(self, ledgerId: int): def sendLedgerStatus(self, nodeName: str, ledgerId: int): ledgerStatus = self.getLedgerStatus(ledgerId) if ledgerStatus: - rid = self.nodestack.getRemote(nodeName).uid - self.send(ledgerStatus, rid) + self.sendToNodes(ledgerStatus, [nodeName]) else: logger.debug("{} not sending ledger {} status to {} as it is null" .format(self, ledgerId, nodeName)) @@ -1509,20 +1695,21 @@ def doDynamicValidation(self, request: Request): else: self.domainDynamicValidation(request) - def applyReq(self, request: Request): + def applyReq(self, request: Request, cons_time: int): """ - Apply request to appropriate ledger and state + Apply request to appropriate ledger and state. `cons_time` is the + UTC epoch at which consensus was reached. """ if self.ledgerIdForRequest(request) == POOL_LEDGER_ID: - return self.poolManager.applyReq(request) + return self.poolManager.applyReq(request, cons_time) else: - return self.domainRequestApplication(request) + return self.domainRequestApplication(request, cons_time) def domainDynamicValidation(self, request: Request): self.reqHandler.validate(request, self.config) - def domainRequestApplication(self, request: Request): - return self.reqHandler.apply(request) + def domainRequestApplication(self, request: Request, cons_time: int): + return self.reqHandler.apply(request, cons_time) def processRequest(self, request: Request, frm: str): """ @@ -1554,18 +1741,22 @@ def processRequest(self, request: Request, frm: str): ledgerId = self.ledgerIdForRequest(request) ledger = self.getLedger(ledgerId) - reply = self.getReplyFromLedger(ledger, request) - if reply: - logger.debug("{} returning REPLY from already processed " - "REQUEST: {}".format(self, request)) - self.transmitToClient(reply, frm) + + if request.operation[TXN_TYPE] == GET_TXN: + self.handle_get_txn_req(request, frm) else: - if not self.isProcessingReq(*request.key): - self.startedProcessingReq(*request.key, frm) - # If not already got the propagate request(PROPAGATE) for the - # corresponding client request(REQUEST) - self.recordAndPropagate(request, frm) - self.transmitToClient(RequestAck(*request.key), frm) + reply = self.getReplyFromLedger(ledger, request) + if reply: + logger.debug("{} returning REPLY from already processed " + "REQUEST: {}".format(self, request)) + self.transmitToClient(reply, frm) + else: + if not self.isProcessingReq(*request.key): + self.startedProcessingReq(*request.key, frm) + # If not already got the propagate request(PROPAGATE) for the + # corresponding client request(REQUEST) + self.recordAndPropagate(request, frm) + self.send_ack_to_client(request.key, frm) # noinspection PyUnusedLocal def processPropagate(self, msg: Propagate, frm): @@ -1589,10 +1780,12 @@ def processPropagate(self, msg: Propagate, frm): if not self.isProcessingReq(*request.key): self.startedProcessingReq(*request.key, clientName) + elif clientName is not None and not self.is_sender_known_for_req(*request.key): + # Since some propagates might not include the client name + self.set_sender_for_req(*request.key, clientName) + self.requests.addPropagate(request, frm) - # # Only propagate if the node is participating in the consensus process - # # which happens when the node has completed the catchup process self.propagate(request, clientName) self.tryForwarding(request) @@ -1605,6 +1798,38 @@ def isProcessingReq(self, identifier, reqId) -> bool: def doneProcessingReq(self, identifier, reqId): self.requestSender.pop((identifier, reqId)) + def is_sender_known_for_req(self, identifier, reqId): + return self.requestSender.get((identifier, reqId)) is not None + + def set_sender_for_req(self, identifier, reqId, frm): + self.requestSender[identifier, reqId] = frm + + def send_ack_to_client(self, req_key, to_client): + self.transmitToClient(RequestAck(*req_key), to_client) + + def handle_get_txn_req(self, request: Request, frm: str): + """ + Handle GET_TXN request + """ + self.send_ack_to_client(request.key, frm) + ledgerId = self.ledgerIdForRequest(request) + ledger = self.getLedger(ledgerId) + txn = self.getReplyFromLedger(ledger=ledger, + seq_no=request.operation[DATA]) + + result = { + f.IDENTIFIER.nm: request.identifier, + f.REQ_ID.nm: request.reqId, + TXN_TYPE: request.operation[TXN_TYPE], + DATA: None + } + + if txn: + result[DATA] = json.dumps(txn.result) + result[f.SEQ_NO.nm] = txn.result[f.SEQ_NO.nm] + + self.transmitToClient(Reply(result), frm) + def processOrdered(self, ordered: Ordered): """ Process and orderedRequest. @@ -1630,8 +1855,8 @@ def processOrdered(self, ordered: Ordered): if len(reqs) == len(req_idrs): logger.debug("{} executing Ordered batch {} {} of {} requests". format(self.name, view_no, pp_seq_no, len(req_idrs))) - self.executeBatch(pp_seq_no, pp_time, reqs, ledger_id, state_root, - txn_root) + self.executeBatch(view_no, pp_seq_no, pp_time, reqs, ledger_id, + state_root, txn_root) r = True else: logger.warning('{} did not find {} finalized requests, but ' @@ -1645,6 +1870,37 @@ def processOrdered(self, ordered: Ordered): self.monitor.requestOrdered(req_idrs, inst_id, byMaster=r) return r + def force_process_ordered(self): + """ + Take any messages from replica that have been ordered and process + them, this should be done rarely, like before catchup starts + so a more current LedgerStatus can be sent. + can be called either + 1. when node is participating, this happens just before catchup starts + so the node can have the latest ledger status or + 2. when node is not participating but a round of catchup is about to be + started, here is forces all the replica ordered messages to be appended + to the stashed ordered requests and the stashed ordered requests are + processed with appropriate checks + """ + for r in self.replicas: + i = 0 + for msg in r._remove_ordered_from_queue(): + # self.processOrdered(msg) + self.try_processing_ordered(msg) + i += 1 + logger.debug( + '{} processed {} Ordered batches for instance {} before ' + 'starting catch up'.format(self, i, r.instId)) + + def try_processing_ordered(self, msg): + if self.isParticipating: + self.processOrdered(msg) + else: + logger.debug("{} stashing {} since mode is {}". + format(self, msg, self.mode)) + self.stashedOrderedReqs.append(msg) + def processEscalatedException(self, ex): """ Process an exception escalated from a Replica @@ -1696,14 +1952,14 @@ def processInstanceChange(self, instChg: InstanceChange, frm: str) -> None: def do_view_change_if_possible(self, view_no): # TODO: Need to handle skewed distributions which can arise due to # malicious nodes sending messages early on - r, msg = self.canViewChange(view_no) - if r: + can, whyNot = self.canViewChange(view_no) + if can: logger.info("{} initiating a view change to {} from {}". format(self, view_no, self.viewNo)) self.startViewChange(view_no) else: - logger.debug(msg) - return r + logger.debug(whyNot) + return can def checkPerformance(self): """ @@ -1792,16 +2048,12 @@ def initInsChngThrottling(self): ratchet = Ratchet(a=2, b=0.05, c=1, base=2, peak=windowSize) self.insChngThrottler = Throttler(windowSize, ratchet.get) - @property - def quorum(self) -> int: - r""" - Return the quorum of this RBFT system. Equal to :math:`2f + 1`. - """ - return (2 * self.f) + 1 - - def primary_found(self): + def primary_selected(self, instance_id): # If the node has primary replica of master instance - self.monitor.hasMasterPrimary = self.primaryReplicaNo == 0 + if instance_id == 0: + # TODO: 0 should be replaced with configurable constant + self.monitor.hasMasterPrimary = self.primaryReplicaNo == 0 + if self.view_change_in_progress and self.all_instances_have_primary: self.on_view_change_complete(self.viewNo) @@ -1815,7 +2067,8 @@ def canViewChange(self, proposedViewNo: int) -> (bool, str): number and its view is less than or equal to the proposed view """ msg = None - if not self.instanceChanges.hasQuorum(proposedViewNo, self.f): + quorum = self.quorums.view_change.value + if not self.instanceChanges.hasQuorum(proposedViewNo, quorum): msg = '{} has no quorum for view {}'.format(self, proposedViewNo) elif not proposedViewNo > self.viewNo: msg = '{} is in higher view more than {}'.format(self, proposedViewNo) @@ -1849,26 +2102,40 @@ def lost_master_primary(self): self._schedule(self.propose_view_change, self.config.ToleratePrimaryDisconnection) - # TODO: consider moving this to pool manager def startViewChange(self, proposedViewNo: int): """ Trigger the view change process. :param proposedViewNo: the new view number after view change. """ + # TODO: consider moving this to pool manager + # TODO: view change is a special case, which can have different + # implementations - we need to make this logic pluggable + self.view_change_in_progress = True + self._schedule(action=self._check_view_change_completed, + seconds=self._view_change_timeout) + self.master_replica.on_view_change_start() self.viewNo = proposedViewNo logger.debug("{} resetting monitor stats after view change". format(self)) self.monitor.reset() - self.processStashedMsgsForView(proposedViewNo) + self.processStashedMsgsForView(self.viewNo) # Now communicate the view change to the elector which will # contest primary elections across protocol all instances - self.elector.viewChanged(self.viewNo) + self.elector.view_change_started(self.viewNo) self._primary_replica_no = None pop_keys(self.msgsForFutureViews, lambda x: x <= self.viewNo) self.initInsChngThrottling() self.logNodeInfo() + # Keep on doing catchup until >(n-f) nodes LedgerStatus same on have a + # prepared certificate the first PRE-PREPARE of the new view + logger.info('{} changed to view {}, will start catchup now'. + format(self, self.viewNo)) + # Set to 0 even when set to 0 in `on_view_change_complete` since + # catchup might be started due to several reasons. + self.catchup_rounds_without_txns = 0 + self.start_catchup() def on_view_change_complete(self, view_no): """ @@ -1877,6 +2144,30 @@ def on_view_change_complete(self, view_no): """ self.view_change_in_progress = False self.instanceChanges.pop(view_no-1, None) + self.master_replica.on_view_change_done() + self.catchup_rounds_without_txns = 0 + + def start_catchup(self): + # Process any already Ordered requests by the replica + + if self.mode == Mode.starting: + logger.info('{} does not start the catchup procedure ' + 'because it is already in this state'.format(self)) + return + self.force_process_ordered() + self.mode = Mode.starting + self.ledgerManager.prepare_ledgers_for_sync() + ledger_id = DOMAIN_LEDGER_ID + if self._is_there_pool_ledger(): + # Pool ledger should be synced first + # Sync up for domain ledger will be called in + # its post-syncup callback + ledger_id = POOL_LEDGER_ID + self._sync_ledger(ledger_id) + + def _is_there_pool_ledger(self): + # TODO isinstance is not OK + return isinstance(self.poolManager, TxnPoolManager) def ordered_prev_view_msgs(self, inst_id, pp_seqno): logger.debug('{} ordered previous view batch {} by instance {}'. @@ -1912,35 +2203,52 @@ def authNr(self, req): return self.clientAuthNr def isSignatureVerificationNeeded(self, msg: Any): + op = msg.get(OPERATION) + if op: + if op.get(TXN_TYPE) in openTxns: + return False return True - def ppSeqNoForTxnSeqNo(self, ledgerId, seqNo): - # Looking in reverse since its more likely to be recent - for ppSeqNo, (lid, txnSeqNo) in reversed(self.batchToSeqNos.items()): - if lid == ledgerId and txnSeqNo == seqNo: - return ppSeqNo - return 0 + def three_phase_key_for_txn_seq_no(self, ledger_id, seq_no): + if ledger_id in self.txn_seq_range_to_3phase_key: + # point query in interval tree + s = self.txn_seq_range_to_3phase_key[ledger_id][seq_no] + if s: + # There should not be more than one interval for any seq no in + # the tree + assert len(s) == 1 + return s.pop().data + return None - def executeBatch(self, ppSeqNo: int, ppTime: float, reqs: List[Request], - ledgerId, stateRoot, txnRoot) -> None: + def executeBatch(self, view_no, pp_seq_no: int, pp_time: float, + reqs: List[Request], ledger_id, state_root, + txn_root) -> None: """ Execute the REQUEST sent to this Node - :param viewNo: the view number (See glossary) - :param ppTime: the time at which PRE-PREPARE was sent + :param view_no: the view number (See glossary) + :param pp_time: the time at which PRE-PREPARE was sent :param reqs: list of client REQUESTs """ - committedTxns = self.requestExecuter[ledgerId](ppTime, reqs, stateRoot, - txnRoot) + committedTxns = self.requestExecuter[ledger_id](pp_time, reqs, + state_root, txn_root) if committedTxns: - lastTxnSeqNo = committedTxns[-1][F.seqNo.name] - self.batchToSeqNos[ppSeqNo] = (ledgerId, lastTxnSeqNo) - logger.display('{} storing ppSeqno {} for ledger {} seqNo {}'. - format(self, ppSeqNo, ledgerId, lastTxnSeqNo)) - if len(self.batchToSeqNos) > self.config.ProcessedBatchMapsToKeep: - x = self.batchToSeqNos.popitem(last=False) - logger.debug('{} popped {} from batch to txn seqNo map'. - format(self, x)) + first_txn_seq_no = committedTxns[0][F.seqNo.name] + last_txn_seq_no = committedTxns[-1][F.seqNo.name] + if ledger_id not in self.txn_seq_range_to_3phase_key: + self.txn_seq_range_to_3phase_key[ledger_id] = IntervalTree() + # adding one to end of range since its exclusive + intrv_tree = self.txn_seq_range_to_3phase_key[ledger_id] + intrv_tree[first_txn_seq_no:last_txn_seq_no+1] = (view_no, pp_seq_no) + logger.debug('{} storing 3PC key {} for ledger {} range {}'. + format(self, (view_no, pp_seq_no), ledger_id, + (first_txn_seq_no, last_txn_seq_no))) + if len(intrv_tree) > self.config.ProcessedBatchMapsToKeep: + # Remove the first element from the interval tree + old = intrv_tree[intrv_tree.begin()].pop() + intrv_tree.remove(old) + logger.debug('{} popped {} from txn to batch seqNo map'. + format(self, old)) def updateSeqNoMap(self, committedTxns): self.seqNoDB.addBatch((txn[f.IDENTIFIER.nm], txn[f.REQ_ID.nm], @@ -2011,8 +2319,13 @@ def sendRepliesToClients(self, committedTxns, ppTime): def sendReplyToClient(self, reply, reqKey): if self.isProcessingReq(*reqKey): - logger.debug('{} sending reply for {} to client'.format(self, reqKey)) - self.transmitToClient(reply, self.requestSender[reqKey]) + sender = self.requestSender[reqKey] + if sender: + logger.debug('{} sending reply for {} to client'.format(self, reqKey)) + self.transmitToClient(reply, self.requestSender[reqKey]) + else: + logger.debug('{} not sending reply for {}, since do not ' + 'know client'.format(self, reqKey)) self.doneProcessingReq(*reqKey) def addNewRole(self, txn): @@ -2027,29 +2340,35 @@ def addNewRole(self, txn): v = DidVerifier(verkey, identifier=identifier) if identifier not in self.clientAuthNr.clients: role = txn.get(ROLE) - if role not in (STEWARD, None): - logger.error("Role if present must be {}". - format(Roles.STEWARD.name)) + if role not in (STEWARD, TRUSTEE, None): + logger.error("Role if present must be {} and not {}". + format(Roles.STEWARD.name, role)) return self.clientAuthNr.addIdr(identifier, verkey=v.verkey, role=role) - @staticmethod - def initStateFromLedger(state: State, ledger: Ledger, reqHandler): - # If the trie is empty then initialize it by applying - # txns from ledger + def initStateFromLedger(self, state: State, ledger: Ledger, reqHandler): + """ + If the trie is empty then initialize it by applying + txns from ledger. + """ if state.isEmpty: - txns = [_ for _ in ledger.getAllTxn().values()] - reqHandler.updateState(txns, isCommitted=True) - state.commit(rootHash=state.headHash) + logger.info('{} found state to be empty, recreating from ' + 'ledger'.format(self)) + for seq_no, txn in ledger.getAllTxn(): + txn[f.SEQ_NO.nm] = seq_no + txn = self.update_txn_with_extra_data(txn) + reqHandler.updateState([txn, ], isCommitted=True) + state.commit(rootHash=state.headHash) def initDomainState(self): self.initStateFromLedger(self.states[DOMAIN_LEDGER_ID], self.domainLedger, self.reqHandler) def addGenesisNyms(self): - for _, txn in self.domainLedger.getAllTxn().items(): + # THIS SHOULD NOT BE DONE FOR PRODUCTION + for _, txn in self.domainLedger.getAllTxn(): if txn.get(TXN_TYPE) == NYM: self.addNewRole(txn) @@ -2061,19 +2380,22 @@ def processStashedOrderedReqs(self): i = 0 while self.stashedOrderedReqs: msg = self.stashedOrderedReqs.popleft() - if msg.ppSeqNo <= self.ledgerManager.lastCaughtUpPpSeqNo: - logger.debug('{} ignoring stashed ordered msg {} since ledger ' - 'manager has lastCaughtUpPpSeqNo as {}'. - format(self, msg, - self.ledgerManager.lastCaughtUpPpSeqNo)) - continue - if not self.gotInCatchupReplies(msg): - if msg.instId == 0: - logger.debug('{} applying stashed Ordered msg {}'. - format(self, msg)) - for reqKey in msg.reqIdr: - req = self.requests[reqKey].finalised - self.applyReq(req) + if msg.instId == 0: + if compare_3PC_keys((msg.viewNo, msg.ppSeqNo), + self.ledgerManager.last_caught_up_3PC) >= 0: + logger.debug('{} ignoring stashed ordered msg {} since ledger ' + 'manager has last_caught_up_3PC as {}'. + format(self, msg, + self.ledgerManager.last_caught_up_3PC)) + continue + logger.debug('{} applying stashed Ordered msg {}'.format(self, msg)) + # Since the PRE-PREPAREs ans PREPAREs corresponding to these + # stashed ordered requests was not processed. + for reqKey in msg.reqIdr: + req = self.requests[reqKey].finalised + self.applyReq(req, msg.ppTime) + self.processOrdered(msg) + else: self.processOrdered(msg) i += 1 logger.debug("{} processed {} stashed ordered requests".format(self, i)) @@ -2082,10 +2404,6 @@ def processStashedOrderedReqs(self): self.monitor.reset() return i - def gotInCatchupReplies(self, msg): - reqIdr = getattr(msg, f.REQ_IDR.nm) - return set(reqIdr).intersection(self.reqsFromCatchupReplies) - def sync3PhaseState(self): for replica in self.replicas: self.send(replica.threePhaseState) @@ -2225,12 +2543,17 @@ def send(self, msg: Any, *rids: Iterable[int], signer: Signer = None): .format(self, msg, recipientsNum, remoteNames)) self.nodestack.send(msg, *rids, signer=signer) - def getReplyFromLedger(self, ledger, request): + def sendToNodes(self, msg: Any, names: Iterable[str]=None): + # TODO: This method exists in `Client` too, refactor to avoid duplication + rids = [rid for rid, r in self.nodestack.remotes.items() if r.name in names] if names else [] + self.send(msg, *rids) + + def getReplyFromLedger(self, ledger, request=None, seq_no=None): # DoS attack vector, client requesting already processed request id # results in iterating over ledger (or its subset) - seqNo = self.seqNoDB.get(request.identifier, request.reqId) - if seqNo: - txn = ledger.getBySeqNo(int(seqNo)) + seq_no = seq_no if seq_no else self.seqNoDB.get(request.identifier, request.reqId) + if seq_no: + txn = ledger.getBySeqNo(int(seq_no)) if txn: txn.update(ledger.merkleInfo(txn.get(F.seqNo.name))) txn = self.update_txn_with_extra_data(txn) @@ -2261,39 +2584,34 @@ def logstats(self): """ Print the node's current statistics to log. """ - lines = [] - l = lines.append - l("node {} current stats".format(self)) - l("--------------------------------------------------------") - l("node inbox size : {}".format(len(self.nodeInBox))) - l("client inbox size : {}". - format(len(self.clientInBox))) - l("age (seconds) : {}". - format(time.time() - self.created)) - l("next check for reconnect: {}". - format(time.perf_counter() - self.nodestack.nextCheck)) - l("node connections : {}".format(self.nodestack.conns)) - l("f : {}".format(self.f)) - l("master instance : {}".format(self.instances.masterId)) - l("replicas : {}".format(len(self.replicas))) - l("view no : {}".format(self.viewNo)) - l("rank : {}".format(self.rank)) - l("msgs to replicas : {}". - format(len(self.msgsToReplicas))) - l("msgs to elector : {}". - format(len(self.msgsToElector))) - l("action queue : {} {}". - format(len(self.actionQueue), id(self.actionQueue))) - l("action queue stash : {} {}". - format(len(self.aqStash), id(self.aqStash))) + lines = [ + "node {} current stats".format(self), + "--------------------------------------------------------", + "node inbox size : {}".format(len(self.nodeInBox)), + "client inbox size : {}".format(len(self.clientInBox)), + "age (seconds) : {}".format(time.time() - self.created), + "next check for reconnect: {}".format(time.perf_counter() - + self.nodestack.nextCheck), + "node connections : {}".format(self.nodestack.conns), + "f : {}".format(self.f), + "master instance : {}".format(self.instances.masterId), + "replicas : {}".format(len(self.replicas)), + "view no : {}".format(self.viewNo), + "rank : {}".format(self.rank), + "msgs to replicas : {}".format(len(self.msgsToReplicas)), + "msgs to elector : {}".format(len(self.msgsToElector)), + "action queue : {} {}".format(len(self.actionQueue), + id(self.actionQueue)), + "action queue stash : {} {}".format(len(self.aqStash), + id(self.aqStash)), + ] logger.info("\n".join(lines), extra={"cli": False}) def collectNodeInfo(self): nodeAddress = None if self.poolLedger: - txns = self.poolLedger.getAllTxn() - for key, txn in txns.items(): + for _, txn in self.poolLedger.getAllTxn(): data = txn[DATA] if data[ALIAS] == self.name: nodeAddress = data[NODE_IP] @@ -2320,4 +2638,3 @@ def logNodeInfo(self): with closing(open(os.path.join(self.config.baseDir, 'node_info'), 'w')) \ as logNodeInfoFile: logNodeInfoFile.write(json.dumps(self.nodeInfo['data'])) - diff --git a/plenum/server/notifier_plugin_manager.py b/plenum/server/notifier_plugin_manager.py index 866089e682..ad7bf4a428 100644 --- a/plenum/server/notifier_plugin_manager.py +++ b/plenum/server/notifier_plugin_manager.py @@ -84,9 +84,9 @@ def importPlugins(self): module = importlib.import_module(plugin) self.plugins.append(module) i += 1 + logger.info("Successfully imported Notifier Plugin: {}".format(plugin)) except Exception as e: - logger.error('Importing module {} failed due to {}' - .format(plugin, e)) + logger.error('Importing Notifier Plugin {} failed due to {}'.format(plugin, e)) return i, len(plugins) def _sendMessage(self, topic, message): diff --git a/plenum/server/pool_manager.py b/plenum/server/pool_manager.py index 6647ef35f6..5b41981663 100644 --- a/plenum/server/pool_manager.py +++ b/plenum/server/pool_manager.py @@ -1,8 +1,11 @@ import ipaddress import os +from abc import abstractmethod +from collections import OrderedDict + import base58 -from typing import Dict, Tuple +from typing import Dict, Tuple, Optional from functools import lru_cache from copy import deepcopy @@ -17,7 +20,7 @@ from plenum.common.stack_manager import TxnStackManager from plenum.common.types import NodeDetail from plenum.persistence.storage import initKeyValueStorage -from plenum.persistence.util import txnsWithMerkleInfo, pop_merkle_info +from plenum.persistence.util import pop_merkle_info from plenum.server.pool_req_handler import PoolRequestHandler from plenum.server.suspicion_codes import Suspicions from state.pruning_state import PruningState @@ -30,20 +33,58 @@ class PoolManager: + @abstractmethod def getStackParamsAndNodeReg(self, name, basedirpath, nodeRegistry=None, ha=None, cliname=None, cliha=None): """ Returns a tuple(nodestack, clientstack, nodeReg) """ - raise NotImplementedError @property - def merkleRootHash(self): - raise NotImplementedError + @abstractmethod + def merkleRootHash(self) -> str: + """ + """ @property - def txnSeqNo(self): - raise NotImplementedError + @abstractmethod + def txnSeqNo(self) -> int: + """ + """ + + @staticmethod + def _get_rank(needle_id: str, haystack_ids: List[str]): + # Return the rank of the node where rank is defined by the order in + # which node was added to the pool or on the alphabetical order of name + # if using RegistryPoolManager + return haystack_ids.index(needle_id) + + @property + @abstractmethod + def id(self): + """ + """ + + @abstractmethod + def get_rank_of(self, node_id) -> int: + """ + """ + + @property + def rank(self) -> Optional[int]: + # Nodes have a total order defined in them, rank is the node's + # position in that order + if self._rank is None: + self._rank = self.get_rank_of(self.id) + return self._rank + + @abstractmethod + def get_name_by_rank(self, rank): + # Needed for communicating primary name to others and also nodeReg + # uses node names (alias) and not ids + # TODO: Should move to using node ids and not node names (alias) + """ + """ class HasPoolManager: @@ -66,6 +107,8 @@ def __init__(self, node, ha=None, cliname=None, cliha=None): self.config = node.config self.basedirpath = node.basedirpath self._ledger = None + self._id = None + self._rank = None TxnStackManager.__init__(self, self.name, self.basedirpath, isNode=True) self.state = self.loadState() self.reqHandler = self.getPoolReqHandler() @@ -203,7 +246,7 @@ def addNewNodeAndConnect(self, txn): self.node.newNodeJoined(txn) def node_about_to_be_disconnected(self, nodeName): - if self.node.master_primary == nodeName: + if self.node.master_primary_name == nodeName: self.node.sendInstanceChange(self.node.viewNo + 1, Suspicions.PRIMARY_ABOUT_TO_BE_DISCONNECTED) @@ -282,39 +325,28 @@ def getNodeName(self, nym): return nodeTxn[DATA][ALIAS] def doStaticValidation(self, identifier, reqId, operation): - if operation[TXN_TYPE] == NODE: - if not (DATA in operation and isinstance(operation[DATA], dict)): - error = "'{}' is missed or not a dict".format(DATA) - raise InvalidClientRequest(identifier, reqId, error) - # VerKey must be base58 - if len(set(operation[TARGET_NYM]) - set(base58.alphabet)) != 0: - error = "'{}' is not a base58 string".format(TARGET_NYM) - raise InvalidClientRequest(identifier, reqId, error) - - data = operation[DATA] - for fn, validator in self._dataFieldsValidators: - if fn in data and not validator(data[fn]): - error = "'{}' ('{}') is invalid".format(fn, data[fn]) - raise InvalidClientRequest(identifier, reqId, error) + pass def doDynamicValidation(self, request: Request): self.reqHandler.validate(request) - def applyReq(self, request: Request): - return self.reqHandler.apply(request) + def applyReq(self, request: Request, cons_time: int): + return self.reqHandler.apply(request, cons_time) @property - def merkleRootHash(self): + def merkleRootHash(self) -> str: return self.ledger.root_hash @property - def txnSeqNo(self): + def txnSeqNo(self) -> int: return self.ledger.seqNo def getNodeData(self, nym): _, nodeTxn = self.getNodeInfoFromLedger(nym) return nodeTxn[DATA] + # Question: Why are `_isIpAddressValid` and `_isPortValid` part of + # pool_manager? @staticmethod def _isIpAddressValid(ipAddress): try: @@ -328,12 +360,41 @@ def _isIpAddressValid(ipAddress): def _isPortValid(port): return isinstance(port, int) and 0 < port <= 65535 + @property + def id(self): + if not self._id: + for _, txn in self.ledger.getAllTxn(): + if self.name == txn[DATA][ALIAS]: + self._id = txn[TARGET_NYM] + return self._id + + @property + def node_ids_in_ordered_by_rank(self) -> List: + ids = OrderedDict() + for _, txn in self.ledger.getAllTxn(): + ids[txn[TARGET_NYM]] = True + return list(ids.keys()) + + def get_rank_of(self, node_id) -> Optional[int]: + if self.id is None: + # This can happen if a non-genesis node starts + return None + return self._get_rank(node_id, self.node_ids_in_ordered_by_rank) + + def get_name_by_rank(self, rank): + # This is expensive but only required while start or view change + id = self.node_ids_in_ordered_by_rank[rank] + # We don't allow changing ALIAS + for _, txn in self.ledger.getAllTxn(): + if txn[TARGET_NYM] == id and DATA in txn and ALIAS in txn[DATA]: + return txn[DATA][ALIAS] + class RegistryPoolManager(PoolManager): # This is the old way of managing the pool nodes information and # should be deprecated. def __init__(self, name, basedirpath, nodeRegistry, ha, cliname, cliha): - + self._rank = None self.nstack, self.cstack, self.nodeReg, self.cliNodeReg = \ self.getStackParamsAndNodeReg(name=name, basedirpath=basedirpath, nodeRegistry=nodeRegistry, ha=ha, @@ -420,3 +481,17 @@ def merkleRootHash(self): @property def txnSeqNo(self): raise UnsupportedOperation + + @property + def id(self): + return self.nstack['name'] + + @property + def node_names_ordered_by_rank(self) -> List: + return sorted(self.nodeReg.keys()) + + def get_rank_of(self, node_id) -> int: + return self._get_rank(node_id, self.node_names_ordered_by_rank) + + def get_name_by_rank(self, rank): + return self.node_names_ordered_by_rank[rank] diff --git a/plenum/server/pool_req_handler.py b/plenum/server/pool_req_handler.py index ca2c76fe7f..25872050d1 100644 --- a/plenum/server/pool_req_handler.py +++ b/plenum/server/pool_req_handler.py @@ -39,10 +39,10 @@ def validate(self, req: Request, config=None): raise UnauthorizedClientRequest(req.identifier, req.reqId, error) - def apply(self, req: Request): + def apply(self, req: Request, cons_time: int): typ = req.operation.get(TXN_TYPE) if typ == NODE: - txn = reqToTxn(req) + txn = reqToTxn(req, cons_time) (start, end), _ = self.ledger.appendTxns([txn]) self.updateState(txnsWithSeqNo(start, end, [txn])) return txn diff --git a/plenum/server/primary_decider.py b/plenum/server/primary_decider.py index fbd6cf4178..7000c7a3b0 100644 --- a/plenum/server/primary_decider.py +++ b/plenum/server/primary_decider.py @@ -1,12 +1,22 @@ +from typing import Iterable from collections import deque from plenum.common.message_processor import MessageProcessor +from plenum.common.types import f from plenum.server.has_action_queue import HasActionQueue +from plenum.server.router import Router, Route +from stp_core.common.log import getlogger +from typing import List +from abc import ABCMeta, abstractmethod +logger = getlogger() + + +class PrimaryDecider(HasActionQueue, MessageProcessor, metaclass=ABCMeta): -class PrimaryDecider(HasActionQueue, MessageProcessor): def __init__(self, node): HasActionQueue.__init__(self) + self.node = node self.name = node.name self.f = node.f @@ -17,14 +27,113 @@ def __init__(self, node): self.nodeCount = 0 self.inBox = deque() self.outBox = deque() + self.inBoxRouter = Router(*self.routes) + + # Need to keep track of who was primary for the master protocol + # instance for previous view, this variable only matters between + # elections, the elector will set it before doing triggering new + # election and will reset it after primary is decided for master + # instance + self.previous_master_primary = None + + def __repr__(self): + return "{}".format(self.name) + + @property + def was_master_primary_in_prev_view(self): + return self.previous_master_primary == self.name + + @property + def master_replica(self): + return self.node.master_replica + + @property + @abstractmethod + def routes(self) -> Iterable[Route]: + pass + + @property + def supported_msg_types(self) -> Iterable[type]: + return [k for k, v in self.routes] - def decidePrimaries(self): + @abstractmethod + def decidePrimaries(self) -> None: """ - Choose the primary replica for each protocol instance in the system - using a PrimaryDecider. + Start election of the primary replica for each protocol instance """ - raise NotImplementedError + pass - async def serviceQueues(self, limit): - return 0 + def filterMsgs(self, wrappedMsgs: deque) -> deque: + """ + Filters messages by view number so that only the messages that have the + current view number are retained. + + :param wrappedMsgs: the messages to filter + """ + filtered = deque() + while wrappedMsgs: + wrappedMsg = wrappedMsgs.popleft() + msg, sender = wrappedMsg + if hasattr(msg, f.VIEW_NO.nm): + reqViewNo = getattr(msg, f.VIEW_NO.nm) + if reqViewNo == self.viewNo: + filtered.append(wrappedMsg) + else: + self.discard(wrappedMsg, + "its view no {} is less than the elector's {}" + .format(reqViewNo, self.viewNo), + logger.debug) + else: + filtered.append(wrappedMsg) + + return filtered + async def serviceQueues(self, limit=None) -> int: + """ + Service at most `limit` messages from the inBox. + + :param limit: the maximum number of messages to service + :return: the number of messages successfully processed + """ + + return await self.inBoxRouter.handleAll(self.filterMsgs(self.inBox), + limit) + + def view_change_started(self, viewNo: int): + """ + Notifies primary decider about the fact that view changed to let it + prepare for election, which then will be started from outside by + calling decidePrimaries() + """ + if viewNo <= self.viewNo: + logger.warning("Provided view no {} is not greater than the " + "current view no {}".format(viewNo, self.viewNo)) + return False + self.viewNo = viewNo + self.previous_master_primary = self.node.master_primary_name + for replica in self.replicas: + replica.primaryName = None + return True + + @abstractmethod + def get_msgs_for_lagged_nodes(self) -> List[object]: + """ + Returns election messages from the last view change + """ + pass + + def send(self, msg): + """ + Send a message to the node on which this replica resides. + + :param msg: the message to send + """ + logger.debug("{}'s elector sending {}".format(self.name, msg)) + self.outBox.append(msg) + + @abstractmethod + def start_election_for_instance(self, instance_id): + """ + Called when starting election for a particular protocol instance + """ + pass diff --git a/plenum/server/primary_elector.py b/plenum/server/primary_elector.py index 03074fd5e2..7580e64397 100644 --- a/plenum/server/primary_elector.py +++ b/plenum/server/primary_elector.py @@ -3,15 +3,15 @@ import time from collections import Counter, deque from functools import partial -from typing import Sequence, Any, Union, List +from typing import Sequence, Any, Union, List, Iterable -from plenum.common.types import Nomination, Reelection, Primary, f -from plenum.common.util import mostCommonElement, get_strong_quorum +from plenum.common.types import f +from plenum.common.messages.node_messages import Nomination, Reelection, Primary +from plenum.common.util import mostCommonElement from stp_core.common.log import getlogger from plenum.server import replica from plenum.server.primary_decider import PrimaryDecider -from plenum.server.router import Router - +from plenum.server.router import Router, Route logger = getlogger() @@ -35,10 +35,6 @@ class PrimaryElector(PrimaryDecider): def __init__(self, node): super().__init__(node) - # TODO: How does primary decider ensure that a node does not have a - # primary while its catching up - self.node = node - # Flag variable which indicates which replica has nominated for itself self.replicaNominatedForItself = None @@ -74,10 +70,13 @@ def __init__(self, node): # elections, the elector will set it before doing triggering new # election and will reset it after primary is decided for master # instance - self.previous_master_primary = None + # self.previous_master_primary = None - def __repr__(self): - return "{}".format(self.name) + @property + def routes(self) -> Iterable[Route]: + return [(Nomination, self.processNominate), + (Primary, self.processPrimary), + (Reelection, self.processReelection)] @property def hasPrimaryReplica(self) -> bool: @@ -86,9 +85,9 @@ def hasPrimaryReplica(self) -> bool: """ return any([r.isPrimary for r in self.replicas]) - @property - def was_master_primary_in_prev_view(self): - return self.previous_master_primary == self.name + # @property + # def was_master_primary_in_prev_view(self): + # return self.previous_master_primary == self.name def setDefaults(self, instId: int): """ @@ -111,31 +110,6 @@ def prepareReplicaForElection(self, replica: 'replica.Replica'): if instId not in self.nominations: self.setDefaults(instId) - def filterMsgs(self, wrappedMsgs: deque) -> deque: - """ - Filters messages by view number so that only the messages that have the - current view number are retained. - - :param wrappedMsgs: the messages to filter - """ - filtered = deque() - while wrappedMsgs: - wrappedMsg = wrappedMsgs.popleft() - msg, sender = wrappedMsg - if hasattr(msg, f.VIEW_NO.nm): - reqViewNo = getattr(msg, f.VIEW_NO.nm) - if reqViewNo == self.viewNo: - filtered.append(wrappedMsg) - else: - self.discard(wrappedMsg, - "its view no {} is less than the elector's {}" - .format(reqViewNo, self.viewNo), - logger.debug) - else: - filtered.append(wrappedMsg) - - return filtered - def didReplicaNominate(self, instId: int): """ Return whether this replica nominated a candidate for election @@ -154,24 +128,8 @@ def didReplicaDeclarePrimary(self, instId: int): return instId in self.primaryDeclarations and \ self.replicas[instId].name in self.primaryDeclarations[instId] - async def serviceQueues(self, limit=None): - """ - Service at most `limit` messages from the inBox. - - :param limit: the maximum number of messages to service - :return: the number of messages successfully processed - """ - return await self.inBoxRouter.handleAll(self.filterMsgs(self.inBox), - limit) - - @property - def quorum(self) -> int: - r""" - Return the quorum of this RBFT system. Equal to :math:`2f + 1`. - """ - return get_strong_quorum(f=self.f) - - def decidePrimaries(self): # overridden method of PrimaryDecider + # overridden method of PrimaryDecider + def decidePrimaries(self): self.scheduleElection() def scheduleElection(self): @@ -191,9 +149,9 @@ def startElection(self): self.nominateItself() - def start_election_for_instance(self, inst_id): - # Called when starting election for a particular protocol instance - self.prepareReplicaForElection(self.replicas[inst_id]) + # overridden method of PrimaryDecider + def start_election_for_instance(self, instance_id): + self.prepareReplicaForElection(self.replicas[instance_id]) self._schedule(self.nominateItself, random.random()) def nominateItself(self): @@ -246,11 +204,11 @@ def nominateReplica(self, instId): replica = self.replicas[instId] if not self.didReplicaNominate(instId): self.nominations[instId][replica.name] = (replica.name, - replica.lastOrderedPPSeqNo) + replica.last_ordered_3pc[1]) logger.info("{} nominating itself for instance {}". format(replica, instId), extra={"cli": "PLAIN", "tags": ["node-nomination"]}) - self.sendNomination(replica.name, instId, self.viewNo, replica.lastOrderedPPSeqNo) + self.sendNomination(replica.name, instId, self.viewNo, replica.last_ordered_3pc[1]) else: logger.debug( "{} already nominated, so hanging back".format(replica)) @@ -387,13 +345,11 @@ def select_primary(self, inst_id: int, prim: Primary): # which would be greater than or equal to f+1 would vote for the # same node - replica = self.replicas[inst_id] - - if replica.isPrimary is not None: - logger.debug( - "{} Primary already selected; ignoring PRIMARY msg".format( - replica)) - return + if replica.hasPrimary: + logger.debug("{} Primary already selected; " + "ignoring PRIMARY msg" + .format(replica)) + return if self.hasPrimaryQuorum(inst_id): if replica.isPrimary is None: @@ -421,7 +377,7 @@ def select_primary(self, inst_id: int, prim: Primary): if self.replicaNominatedForItself == inst_id: self.replicaNominatedForItself = None - self.node.primary_found() + self.node.primary_selected() self.scheduleElection() else: @@ -729,16 +685,7 @@ def hasPrimaryDecisionTimerExpired(self, instId: int) -> bool: return (time.perf_counter() - self.scheduledPrimaryDecisions[instId]) \ > (1 * self.nodeCount) - def send(self, msg): - """ - Send a message to the node on which this replica resides. - - :param msg: the message to send - """ - logger.debug("{}'s elector sending {}".format(self.name, msg)) - self.outBox.append(msg) - - def viewChanged(self, viewNo: int): + def view_change_started(self, viewNo: int): """ Actions to perform when a view change occurs. @@ -749,14 +696,7 @@ def viewChanged(self, viewNo: int): :param viewNo: the new view number. """ - if viewNo > self.viewNo: - self.previous_master_primary = self.node.master_primary - - self.viewNo = viewNo - - for replica in self.replicas: - replica.primaryName = None - + if super().view_change_started(viewNo): # Reset to defaults values for different data structures as new # elections would begin for r in self.replicas: @@ -764,9 +704,25 @@ def viewChanged(self, viewNo: int): self.replicaNominatedForItself = None self.nominateRandomReplica() - else: - logger.warning("Provided view no {} is not greater than the " - "current view no {}".format(viewNo, self.viewNo)) + + # if viewNo > self.viewNo: + # self.previous_master_primary = self.node.master_primary + # + # self.viewNo = viewNo + # + # for replica in self.replicas: + # replica.primaryName = None + # + # # Reset to defaults values for different data structures as new + # # elections would begin + # for r in self.replicas: + # self.setDefaults(r.instId) + # self.replicaNominatedForItself = None + # + # self.nominateRandomReplica() + # else: + # logger.warning("Provided view no {} is not greater than the " + # "current view no {}".format(viewNo, self.viewNo)) def getElectionMsgsForInstance(self, instId: int) -> \ Sequence[Union[Nomination, Primary]]: @@ -779,7 +735,7 @@ def getElectionMsgsForInstance(self, instId: int) -> \ # primary declaration for the selected primary if replica.isPrimary is not None: msgs.append(Primary(replica.primaryName, instId, self.viewNo, - replica.lastOrderedPPSeqNo)) + replica.last_ordered_3pc[1])) else: # If a primary for this instance has not been selected then send # nomination and primary declaration that this node made for the @@ -792,8 +748,7 @@ def getElectionMsgsForInstance(self, instId: int) -> \ msgs.append(Primary(nm, instId, self.viewNo, seqNo)) return msgs - def getElectionMsgsForLaggedNodes(self) -> \ - List[Union[Nomination, Primary]]: + def get_msgs_for_lagged_nodes(self) -> List[Union[Nomination, Primary]]: """ Get nomination and primary messages for instance with id `instId` that need to be sent to a node which has lagged behind (for example, a newly diff --git a/plenum/server/primary_selector.py b/plenum/server/primary_selector.py index 50b5605b03..df45782ad9 100644 --- a/plenum/server/primary_selector.py +++ b/plenum/server/primary_selector.py @@ -1,41 +1,345 @@ +from typing import Iterable, List, Optional, Tuple + +from plenum.common.messages.node_messages import ViewChangeDone +from plenum.server.router import Route from stp_core.common.log import getlogger -from plenum.server import replica from plenum.server.primary_decider import PrimaryDecider +from plenum.server.replica import Replica +from plenum.common.util import mostCommonElement logger = getlogger() -# TODO: Assumes that all nodes are up. Should select only -# those nodes which are up class PrimarySelector(PrimaryDecider): + """ + Simple implementation of primary decider. + Decides on a primary in round-robin fashion. + Assumes that all nodes are up + """ + def __init__(self, node): super().__init__(node) - self.nodeNamesByRank = sorted(self.nodeNames) + self.previous_master_primary = None + self._ledger_manager = self.node.ledgerManager + + self.set_defaults() + + def set_defaults(self): + # Tracks view change done message + self._view_change_done = {} # replica name -> data + + # Set when an appropriate view change quorum is found which has + # sufficient same ViewChangeDone messages + self.primary_verified = False + + self._has_view_change_from_primary = False + + self._has_acceptable_view_change_quorum = False + + self._accepted_view_change_done_message = None + + @property + def quorum(self) -> int: + return self.node.quorums.view_change_done.value - def decidePrimaries(self): # overridden method of PrimaryDecider - self.scheduleSelection() + @property + def routes(self) -> Iterable[Route]: + return [(ViewChangeDone, self._processViewChangeDoneMessage)] - def scheduleSelection(self): + # overridden method of PrimaryDecider + def get_msgs_for_lagged_nodes(self) -> List[ViewChangeDone]: + # Should not return a list, only done for compatibility with interface """ - Schedule election at some time in the future. Currently the election - starts immediately. + Returns the last accepted `ViewChangeDone` message. + If no view change has happened returns ViewChangeDone + with view no 0 to a newly joined node """ - self._schedule(self.startSelection) + # TODO: Consider a case where more than one node joins immediately, + # then one of the node might not have an accepted + # ViewChangeDone message + messages = [] + accpeted = self._accepted_view_change_done_message + if accpeted: + messages.append(ViewChangeDone(self.viewNo, *accpeted)) + elif self.name in self._view_change_done: + messages.append(ViewChangeDone(self.viewNo, + *self._view_change_done[self.name])) + else: + logger.debug('{} has no ViewChangeDone message to send for view {}'. + format(self, self.viewNo)) + return messages + + # overridden method of PrimaryDecider + def decidePrimaries(self): + if self.node.is_synced and self.master_replica.isPrimary is None: + self._send_view_change_done_message() + self._startSelection() + + # Question: Master is always 0, until we change that rule why incur cost + # of a method call, also name is confusing + def _is_master_instance(self, instance_id): + # TODO: get master instance from outside + # Instance 0 is always master + return instance_id == 0 + + def _processViewChangeDoneMessage(self, + msg: ViewChangeDone, + sender: str) -> bool: + """ + Processes ViewChangeDone messages. Once n-f messages have been + received, decides on a primary for specific replica. + + :param msg: ViewChangeDone message + :param sender: the name of the node from which this message was sent + """ + + logger.debug("{}'s primary selector started processing of " + "ViewChangeDone msg from {} : {}" + .format(self.name, sender, msg)) + + view_no = msg.viewNo + + if self.viewNo != view_no: + self.discard(msg, + '{} got Primary from {} for view no {} ' + 'whereas current view no is {}' + .format(self, sender, view_no, self.viewNo), + logMethod=logger.warning) + return False + + new_primary_name = msg.name + ledger_info = msg.ledgerInfo + + if new_primary_name == self.previous_master_primary: + self.discard(msg, + '{} got Primary from {} for {} who was primary of ' + 'master in previous view too' + .format(self, sender, new_primary_name), + logMethod=logger.warning) + return False + + # Since a node can send ViewChangeDone more than one time + self._track_view_change_done(sender, + new_primary_name, + ledger_info) + + if self.master_replica.hasPrimary: + self.discard(msg, + "it already decided primary which is {}". + format(self.master_replica.primaryName), + logger.debug) + return False + + self._startSelection() + + def _verify_view_change(self): + if not self.has_acceptable_view_change_quorum: + return False + + rv = self.has_sufficient_same_view_change_done_messages + if rv is None: + return False + + if not self._verify_primary(*rv): + return False + + return True + + def _verify_primary(self, new_primary, ledger_info): + """ + This method is called when sufficient number of ViewChangeDone + received and makes steps to switch to the new primary + """ + + expected_primary = self.next_primary_node_name(0) + if new_primary != expected_primary: + logger.error("{} expected next primary to be {}, but majority " + "declared {} instead for view {}" + .format(self.name, expected_primary, new_primary, + self.viewNo)) + return False + + self.primary_verified = True + return True + # TODO: check if ledger status is expected + + def _track_view_change_done(self, sender_name, new_primary_name, + ledger_summary): + data = (new_primary_name, ledger_summary) + self._view_change_done[sender_name] = data + + @property + def _hasViewChangeQuorum(self): + # This method should just be present for master instance. + """ + Checks whether n-f nodes completed view change and whether one + of them is the next primary + """ + num_of_ready_nodes = len(self._view_change_done) + diff = self.quorum - num_of_ready_nodes + if diff > 0: + logger.debug('{} needs {} ViewChangeDone messages'.format(self, diff)) + return False + + logger.info("{} got view change quorum ({} >= {})" + .format(self.name, + num_of_ready_nodes, + self.quorum)) + return True + + @property + def has_view_change_from_primary(self) -> bool: + if not self._has_view_change_from_primary: + next_primary_name = self.next_primary_node_name(0) + + if next_primary_name not in self._view_change_done: + logger.debug("{} has not received ViewChangeDone from the next " + "primary {}". + format(self.name, next_primary_name)) + return False + else: + self._has_view_change_from_primary = True + + logger.debug('{} received ViewChangeDone from primary {}' + .format(self, self.next_primary_node_name(0))) + return True + + @property + def has_acceptable_view_change_quorum(self): + if not self._has_acceptable_view_change_quorum: + self._has_acceptable_view_change_quorum = \ + self._hasViewChangeQuorum and self.has_view_change_from_primary + return self._has_acceptable_view_change_quorum + + @property + def has_sufficient_same_view_change_done_messages(self) -> Optional[Tuple]: + # Returns whether has a quorum of ViewChangeDone messages that are same + # TODO: Does not look like optimal implementation. + if self._accepted_view_change_done_message is None and \ + self._view_change_done: + votes = self._view_change_done.values() + votes = [(nm, tuple(tuple(i) for i in info)) for nm, info in votes] + new_primary, ledger_info = mostCommonElement(votes) + if votes.count((new_primary, ledger_info)) >= self.quorum: + logger.debug('{} found acceptable primary {} and ledger info {}'. + format(self, new_primary, ledger_info)) + self._accepted_view_change_done_message = (new_primary, + ledger_info) + else: + logger.debug('{} does not have acceptable primary'.format(self)) + + return self._accepted_view_change_done_message + + @property + def is_behind_for_view(self) -> bool: + # Checks if the node is currently behind the accepted state for this + # view, only makes sense to call when the node has an acceptable + # view change quorum + _, accepted_ledger_summary = self.has_sufficient_same_view_change_done_messages + for (_, own_ledger_size, _), (_, accepted_ledger_size, _) in \ + zip(self.ledger_summary, accepted_ledger_summary): + if own_ledger_size < accepted_ledger_size: + print(own_ledger_size, accepted_ledger_size) + return True + return False + + def _startSelection(self): + if not self._verify_view_change(): + logger.debug('{} cannot start primary selection found failure in ' + 'primary verification. This can happen due to lack ' + 'of appropriate ViewChangeDone messages'.format(self)) + return + + if not self.node.is_synced: + logger.info('{} cannot start primary selection since mode is {}' + .format(self, self.node.mode)) + return + + if self.is_behind_for_view: + logger.info('{} is synced and has an acceptable view change quorum ' + 'but is behind the accepted state'.format(self)) + self.node.start_catchup() + return - def startSelection(self): logger.debug("{} starting selection".format(self)) - for idx, r in enumerate(self.replicas): - prim = (self.viewNo + idx) % self.nodeCount - primaryName = replica.Replica.generateName( - self.nodeNamesByRank[prim], - idx) - logger.debug("{} has primary {}".format(r.name, primaryName)) - r.primaryName = primaryName - - def viewChanged(self, viewNo: int): - if viewNo > self.viewNo: - self.viewNo = viewNo - self.startSelection() - else: - logger.warning("Provided view no {} is not greater than the " - "current view no {}".format(viewNo, self.viewNo)) + for instance_id, replica in enumerate(self.replicas): + if replica.primaryName is not None: + logger.debug('{} already has a primary'.format(replica)) + continue + new_primary_name = self.next_primary_replica_name(instance_id) + logger.display("{} selected primary {} for instance {} (view {})" + .format(replica, + new_primary_name, + instance_id, + self.viewNo), + extra={"cli": "ANNOUNCE", + "tags": ["node-election"]}) + + if instance_id == 0: + self.previous_master_primary = None + # The node needs to be set in participating mode since when + # the replica is made aware of the primary, it will start + # processing stashed requests and hence the node needs to be + # participating. + self.node.start_participating() + + replica.primaryChanged(new_primary_name) + self.node.primary_selected(instance_id) + + logger.display("{} declares view change {} as completed for " + "instance {}, " + "new primary is {}, " + "ledger info is {}" + .format(replica, + self.viewNo, + instance_id, + new_primary_name, + self.ledger_summary), + extra={"cli": "ANNOUNCE", + "tags": ["node-election"]}) + + def _get_primary_id(self, view_no, instance_id): + return (view_no + instance_id) % self.node.totalNodes + + def next_primary_node_name(self, instance_id): + return self.node.get_name_by_rank(self._get_primary_id( + self.viewNo, instance_id)) + + def next_primary_replica_name(self, instance_id): + """ + Returns name of the next node which is supposed to be a new Primary + in round-robin fashion + """ + return Replica.generateName( + nodeName=self.next_primary_node_name(instance_id), + instId=instance_id) + + def _send_view_change_done_message(self): + """ + Sends ViewChangeDone message to other protocol participants + """ + new_primary_name = self.next_primary_node_name(0) + ledger_summary = self.ledger_summary + message = ViewChangeDone(self.viewNo, + new_primary_name, + ledger_summary) + self.send(message) + self._track_view_change_done(self.name, + new_primary_name, ledger_summary) + + def view_change_started(self, viewNo: int): + """ + :param viewNo: the new view number. + """ + if super().view_change_started(viewNo): + self.set_defaults() + + # overridden method of PrimaryDecider + def start_election_for_instance(self, instance_id): + raise NotImplementedError("Election can be started for " + "all instances only") + + @property + def ledger_summary(self): + return [li.ledger_summary for li in + self._ledger_manager.ledgerRegistry.values()] diff --git a/plenum/server/propagator.py b/plenum/server/propagator.py index f906b7a48c..fc5bc86340 100644 --- a/plenum/server/propagator.py +++ b/plenum/server/propagator.py @@ -1,12 +1,15 @@ -from collections import OrderedDict -from collections import deque -from typing import Dict, Tuple, Union -import weakref +from collections import OrderedDict, Counter, defaultdict +from itertools import groupby -from plenum.common.types import Propagate +from typing import Dict, Tuple, Union, Optional + +from orderedset._orderedset import OrderedSet +from plenum.common.constants import PROPAGATE +from plenum.common.messages.node_messages import Propagate from plenum.common.request import Request, ReqKey +from plenum.common.types import f +from plenum.server.quorums import Quorum from stp_core.common.log import getlogger -from plenum.common.util import checkIfMoreThanFSameItems logger = getlogger() @@ -24,13 +27,21 @@ def __init__(self, request: Request): self.propagates = {} self.finalised = None - def isFinalised(self, f): - if self.finalised is None: - req = checkIfMoreThanFSameItems([v.__getstate__() for v in - self.propagates.values()], f) - if req: - self.finalised = Request.fromState(req) - return self.finalised + def req_with_acceptable_quorum(self, quorum: Quorum): + digests = defaultdict(set) + # this is workaround because we are getting a propagate from somebody with + # non-str (byte) name + for sender, req in filter(lambda x: type(x[0]) == str, self.propagates.items()): + digests[req.digest].add(sender) + if quorum.is_reached(len(digests[req.digest])): + return req + + def set_finalised(self, req): + # TODO: make it much explicitly and simpler + # !side affect! if `req` is an instance of a child of `Request` class + # here we construct the parent from child it is rather implicit that + # `finalised` contains not the same type than `propagates` has + self.finalised = Request.fromState(req.__getstate__()) class Requests(OrderedDict): @@ -85,19 +96,13 @@ def votes(self, req) -> int: votes = 0 return votes - def canForward(self, req: Request, requiredVotes: int) -> (bool, str): - """ - Check whether the request specified is eligible to be forwarded to the - protocol instances. - """ + def req_with_acceptable_quorum(self, req: Request, quorum: Quorum): state = self[req.key] - if state.forwarded: - msg = 'already forwarded' - elif not state.isFinalised(requiredVotes): - msg = 'not finalised' - else: - msg = None - return not bool(msg), msg + return state.req_with_acceptable_quorum(quorum) + + def set_finalised(self, req: Request): + state = self[req.key] + state.set_finalised(req) def hasPropagated(self, req: Request, sender: str) -> bool: """ @@ -116,8 +121,11 @@ def digest(self, reqKey: Tuple) -> str: class Propagator: + MAX_REQUESTED_KEYS_TO_KEEP = 1000 + def __init__(self): self.requests = Requests() + self.requested_propagates_for = OrderedSet() # noinspection PyUnresolvedReferences def propagate(self, request: Request, clientName): @@ -130,19 +138,15 @@ def propagate(self, request: Request, clientName): logger.trace("{} already propagated {}".format(self, request)) else: self.requests.addPropagate(request, self.name) - # Only propagate if the node is participating in the consensus - # process which happens when the node has completed the - # catchup process. QUESTION: WHY? - if self.isParticipating: - propagate = self.createPropagate(request, clientName) - logger.info("{} propagating {} request {} from client {}". - format(self, request.identifier, request.reqId, - clientName), - extra={"cli": True, "tags": ["node-propagate"]}) - self.send(propagate) + propagate = self.createPropagate(request, clientName) + logger.info("{} propagating {} request {} from client {}". + format(self, request.identifier, request.reqId, + clientName), + extra={"cli": True, "tags": ["node-propagate"]}) + self.send(propagate) @staticmethod - def createPropagate(request: Union[Request, dict], identifier) -> Propagate: + def createPropagate(request: Union[Request, dict], client_name) -> Propagate: """ Create a new PROPAGATE for the given REQUEST. @@ -155,12 +159,12 @@ def createPropagate(request: Union[Request, dict], identifier) -> Propagate: logger.debug("Creating PROPAGATE for REQUEST {}".format(request)) request = request.as_dict if isinstance(request, Request) else \ request - if isinstance(identifier, bytes): - identifier = identifier.decode() - return Propagate(request, identifier) + if isinstance(client_name, bytes): + client_name = client_name.decode() + return Propagate(request, client_name) # noinspection PyUnresolvedReferences - def canForward(self, request: Request) -> (bool, str): + def canForward(self, request: Request): """ Determine whether to forward client REQUESTs to replicas, based on the following logic: @@ -176,7 +180,21 @@ def canForward(self, request: Request) -> (bool, str): :param request: the client REQUEST """ - return self.requests.canForward(request, self.f + 1) + + if self.requests.forwarded(request): + return 'already forwarded' + + # If not enough Propogates, don't bother comparing + if not self.quorums.propagate.is_reached(self.requests.votes(request)): + return 'not finalised' + + req = self.requests.req_with_acceptable_quorum(request, + self.quorums.propagate) + if req: + self.requests.set_finalised(req) + return None + else: + return 'not finalised' # noinspection PyUnresolvedReferences def forward(self, request: Request): @@ -186,8 +204,8 @@ def forward(self, request: Request): :param request: the REQUEST to propagate """ key = request.key + logger.debug('{} forwarding request {} to replicas'.format(self, key)) for q in self.msgsToReplicas: - logger.debug('{} forwarding request {} to replicas'.format(self, key)) q.append(ReqKey(*key)) self.monitor.requestUnOrdered(*key) @@ -202,8 +220,6 @@ def recordAndPropagate(self, request: Request, clientName): :param clientName: """ self.requests.add(request) - # # Only propagate if the node is participating in the consensus process - # # which happens when the node has completed the catchup process self.propagate(request, clientName) self.tryForwarding(request) @@ -213,12 +229,37 @@ def tryForwarding(self, request: Request): See the method `canForward` for the conditions to check before forwarding a request. """ - r, msg = self.canForward(request) - if r: + cannot_reason_msg = self.canForward(request) + if cannot_reason_msg is None: # If haven't got the client request(REQUEST) for the corresponding # propagate request(PROPAGATE) but have enough propagate requests # to move ahead self.forward(request) else: logger.debug("{} not forwarding request {} to its replicas " - "since {}".format(self, request, msg)) + "since {}".format(self, request, cannot_reason_msg)) + + def request_propagates(self, req_keys): + """ + Request PROPAGATEs for the given request keys. Since replicas can + request PROPAGATEs independently of each other, check if it has + been requested recently + :param req_keys: + :return: + """ + i = 0 + for (idr, req_id) in req_keys: + if (idr, req_id) not in self.requested_propagates_for: + self.request_msg(PROPAGATE, {f.IDENTIFIER.nm: idr, + f.REQ_ID.nm: req_id}) + self._add_to_recently_requested((idr, req_id)) + i += 1 + else: + logger.debug('{} already requested PROPAGATE recently for {}'. + format(self, (idr, req_id))) + return i + + def _add_to_recently_requested(self, key): + while len(self.requested_propagates_for) > self.MAX_REQUESTED_KEYS_TO_KEEP: + self.requested_propagates_for.pop(last=False) + self.requested_propagates_for.add(key) diff --git a/plenum/server/quorums.py b/plenum/server/quorums.py new file mode 100644 index 0000000000..a0ce46a450 --- /dev/null +++ b/plenum/server/quorums.py @@ -0,0 +1,29 @@ +from plenum.common.util import getMaxFailures + + +class Quorum: + + def __init__(self, value: int): + self.value = value + + def is_reached(self, msg_count: int) -> bool: + return msg_count >= self.value + + +class Quorums: + + def __init__(self, n): + f = getMaxFailures(n) + self.f = f + self.propagate = Quorum(f + 1) + self.prepare = Quorum(n - f - 1) + self.commit = Quorum(n - f) + self.reply = Quorum(f + 1) + self.view_change = Quorum(n - f) + self.election = Quorum(n - f) + self.view_change_done = Quorum(n - f) + self.same_consistency_proof = Quorum(f + 1) + self.consistency_proof = Quorum(f + 1) + self.ledger_status = Quorum(f + 1) + self.checkpoint = Quorum(2 * f) + self.timestamp = Quorum(f + 1) diff --git a/plenum/server/replica.py b/plenum/server/replica.py index 8d6d2980ee..8c5eac3244 100644 --- a/plenum/server/replica.py +++ b/plenum/server/replica.py @@ -1,9 +1,5 @@ import time -from binascii import hexlify, unhexlify from collections import deque, OrderedDict -from enum import IntEnum -from enum import unique -from operator import itemgetter from typing import Dict, List, Union from typing import Optional, Any from typing import Set @@ -11,25 +7,45 @@ from hashlib import sha256 from orderedset import OrderedSet -from sortedcontainers import SortedDict +from sortedcontainers import SortedList import plenum.server.node from plenum.common.config_util import getConfig from plenum.common.exceptions import SuspiciousNode, \ InvalidClientMessageException, UnknownIdentifier from plenum.common.signing import serialize -from plenum.common.types import PrePrepare, \ - Prepare, Commit, Ordered, ThreePhaseMsg, ThreePhaseKey, ThreePCState, \ - CheckpointState, Checkpoint, Reject, f, InstanceChange +from plenum.common.messages.node_messages import * from plenum.common.request import ReqDigest, Request, ReqKey from plenum.common.message_processor import MessageProcessor -from plenum.common.util import updateNamedTuple +from plenum.common.util import updateNamedTuple, compare_3PC_keys, max_3PC_key, \ + mostCommonElement from stp_core.common.log import getlogger from plenum.server.has_action_queue import HasActionQueue from plenum.server.models import Commits, Prepares from plenum.server.router import Router from plenum.server.suspicion_codes import Suspicions +from sortedcontainers import SortedDict as _SortedDict +if 'peekitem' in dir(_SortedDict): + SortedDict = _SortedDict +else: + # Since older versions of `SortedDict` lack `peekitem` + class SortedDict(_SortedDict): + def peekitem(self, index=-1): + # This method is copied from `SortedDict`'s source code + """Return (key, value) item pair at index. + + Unlike ``popitem``, the sorted dictionary is not modified. Index + defaults to -1, the last/greatest key in the dictionary. Specify + ``index=0`` to lookup the first/least key in the dictiony. + + If index is out of range, raise IndexError. + + """ + key = self._list[index] + return key, self[key] + + logger = getlogger() LOG_TAGS = { @@ -72,6 +88,8 @@ def __repr__(self): class Replica(HasActionQueue, MessageProcessor): + STASHED_CHECKPOINTS_BEFORE_CATCHUP = 1 + def __init__(self, node: 'plenum.server.node.Node', instId: int, isMaster: bool = False): """ @@ -83,28 +101,25 @@ def __init__(self, node: 'plenum.server.node.Node', instId: int, """ HasActionQueue.__init__(self) self.stats = Stats(TPCStat) - self.config = getConfig() - routerArgs = [(ReqKey, self.readyFor3PC)] - - for r in [PrePrepare, Prepare, Commit]: - routerArgs.append((r, self.processThreePhaseMsg)) - - routerArgs.append((Checkpoint, self.processCheckpoint)) - routerArgs.append((ThreePCState, self.process3PhaseState)) - - self.inBoxRouter = Router(*routerArgs) + self.inBoxRouter = Router( + (ReqKey, self.readyFor3PC), + (PrePrepare, self.processThreePhaseMsg), + (Prepare, self.processThreePhaseMsg), + (Commit, self.processThreePhaseMsg), + (Checkpoint, self.processCheckpoint), + (ThreePCState, self.process3PhaseState), + ) self.threePhaseRouter = Router( - (PrePrepare, self.processPrePrepare), - (Prepare, self.processPrepare), - (Commit, self.processCommit) + (PrePrepare, self.processPrePrepare), + (Prepare, self.processPrepare), + (Commit, self.processCommit) ) self.node = node self.instId = instId - self.name = self.generateName(node.name, self.instId) self.outBox = deque() @@ -132,6 +147,9 @@ def __init__(self, node: 'plenum.server.node.Node', instId: int, # instance is self._primaryName = None # type: Optional[str] + # TODO: Rename since it will contain all messages till primary is + # selected, primary selection is only done once pool ledger is + # caught up # Requests waiting to be processed once the replica is able to decide # whether it is primary or not self.postElectionMsgs = deque() @@ -141,13 +159,15 @@ def __init__(self, node: 'plenum.server.node.Node', instId: int, # forwarded the request by the node but is getting 3 phase messages. # The value is a list since a malicious entry might send PRE-PREPARE # with a different digest and since we dont have the request finalised - # yet, we store all PRE-PPREPARES + # yet, we store all PRE-PPREPAREs self.prePreparesPendingFinReqs = [] # type: List[Tuple[PrePrepare, str, Set[Tuple[str, int]]]] # PrePrepares waiting for previous PrePrepares, key being tuple of view # number and pre-prepare sequence numbers and value being tuple of # PrePrepare and sender - self.prePreparesPendingPrevPP = SortedDict(lambda k: k[1]) + # TODO: Since pp_seq_no will start from 1 in each view, the comparator + # of SortedDict needs to change + self.prePreparesPendingPrevPP = SortedDict(lambda k: (k[0], k[1])) # PREPAREs that are stored by non primary replica for which it has not # got any PRE-PREPARE. Dictionary that stores a tuple of view no and @@ -166,13 +186,13 @@ def __init__(self, node: 'plenum.server.node.Node', instId: int, # which it has broadcasted to all other non primary replicas # Key of dictionary is a 2 element tuple with elements viewNo, # pre-prepare seqNo and value is the received PRE-PREPARE - self.sentPrePrepares = SortedDict(lambda k: k[1]) + self.sentPrePrepares = SortedDict(lambda k: (k[0], k[1])) # type: Dict[Tuple[int, int], PrePrepare] # Dictionary of received PRE-PREPAREs. Key of dictionary is a 2 # element tuple with elements viewNo, pre-prepare seqNo and value # is the received PRE-PREPARE - self.prePrepares = SortedDict(lambda k: k[1]) + self.prePrepares = SortedDict(lambda k: (k[0], k[1])) # type: Dict[Tuple[int, int], PrePrepare] # Dictionary of received Prepare requests. Key of dictionary is a 2 @@ -187,7 +207,7 @@ def __init__(self, node: 'plenum.server.node.Node', instId: int, # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], Set[str]]] # Set of tuples to keep track of ordered requests. Each tuple is - # (viewNo, ppSeqNo) + # (viewNo, ppSeqNo). self.ordered = OrderedSet() # type: OrderedSet[Tuple[int, int]] # Dictionary to keep track of the which replica was primary during each @@ -204,10 +224,14 @@ def __init__(self, node: 'plenum.server.node.Node', instId: int, # viewNo and value a map of pre-prepare sequence number to commit self.stashed_out_of_order_commits = {} # type: Dict[int,Dict[int,Commit]] - self.checkpoints = SortedDict(lambda k: k[0]) + self.checkpoints = SortedDict(lambda k: k[1]) - self.stashedRecvdCheckpoints = {} # type: Dict[Tuple, - # Dict[str, Checkpoint]] + # Stashed checkpoints for each view. The key of the outermost + # dictionary is the view_no, value being a dictionary with key as the + # range of the checkpoint and its value again being a mapping between + # senders and their sent checkpoint + self.stashedRecvdCheckpoints = {} # type: Dict[int, Dict[Tuple, + # Dict[str, Checkpoint]]] self.stashingWhileOutsideWaterMarks = deque() @@ -227,41 +251,62 @@ def __init__(self, node: 'plenum.server.node.Node', instId: int, # the request key needs to be removed once its ordered self.requestQueues[ledger_id] = OrderedSet() - self.batches = OrderedDict() # type: OrderedDict[int, Tuple[int, float, bytes]] + self.batches = OrderedDict() # type: OrderedDict[Tuple[int, int], + # Tuple[int, float, bytes]] # TODO: Need to have a timer for each ledger self.lastBatchCreated = time.perf_counter() - self.lastOrderedPPSeqNo = 0 + # self.lastOrderedPPSeqNo = 0 + # Three phase key for the last ordered batch + self.last_ordered_3pc = (0, 0) + + # 3 phase key for the last prepared certificate before view change + # started, applicable only to master instance + self.last_prepared_before_view_change = None + + # Tracks for which keys PRE-PREPAREs have been requested. + # Cleared in `gc` + self.requested_pre_prepares = {} # type: Dict[Tuple[int, int], Tuple[str, str, str]] - # Keeps the `lastOrderedPPSeqNo` and ledger_summary for each view no. - # GC when ordered last batch of the view - self.view_ends_at = OrderedDict() + # Time of the last PRE-PREPARE which satisfied all validation rules + # (time, digest, roots were all correct). This time is not to be + # reverted even if the PRE-PREPAREs are not ordered. This implies that + # the next primary would have seen all accepted PRE-PREPAREs or another + # view change will happen + self.last_accepted_pre_prepare_time = None + + # Keeps a map of PRE-PREPAREs which did not satisfy timestamp + # criteria, they can be accepted if >f PREPAREs are encountered. + # This is emptied on view change. With each PRE-PREPARE, a flag is + # stored which indicates whether there are sufficient acceptable + # PREPAREs or not + self.pre_prepares_stashed_for_incorrect_time = OrderedDict() def ledger_uncommitted_size(self, ledgerId): if not self.isMaster: return None return self.node.getLedger(ledgerId).uncommitted_size - def txnRootHash(self, ledgerId, toHex=True): + def txnRootHash(self, ledger_str, to_str=True): if not self.isMaster: return None - ledger = self.node.getLedger(ledgerId) + ledger = self.node.getLedger(ledger_str) h = ledger.uncommittedRootHash # If no uncommittedHash since this is the beginning of the tree # or no transactions affecting the ledger were made after the # last changes were committed root = h if h else ledger.tree.root_hash - if toHex: - root = hexlify(root).decode() + if to_str: + root = ledger.hashToStr(root) return root - def stateRootHash(self, ledgerId, toHex=True): + def stateRootHash(self, ledger_id, to_str=True): if not self.isMaster: return None - root = self.node.getState(ledgerId).headHash - if toHex: - root = hexlify(root).decode() + root = self.node.getState(ledger_id).headHash + if to_str: + root = base58.b58encode(root) return root @property @@ -299,13 +344,13 @@ def requests(self): def ledger_ids(self): return self.node.ledger_ids - def shouldParticipate(self, viewNo: int, ppSeqNo: int) -> bool: - """ - Replica should only participating in the consensus process and the - replica did not stash any of this request's 3-phase request - """ - return self.node.isParticipating and (viewNo, ppSeqNo) \ - not in self.stashingWhileCatchingUp + @property + def quorums(self): + return self.node.quorums + + @property + def utc_epoch(self): + return self.node.utc_epoch() @staticmethod def generateName(nodeName: str, instId: int): @@ -330,6 +375,10 @@ def isPrimary(self): return self._primaryName == self.name if self._primaryName is not None \ else None + @property + def hasPrimary(self): + return self.primaryName is not None + @property def primaryName(self): """ @@ -347,48 +396,117 @@ def primaryName(self, value: Optional[str]) -> None: :param value: the value to set isPrimary to """ self.primaryNames[self.viewNo] = value + self.compact_primary_names() if not value == self._primaryName: self._primaryName = value logger.debug("{} setting primaryName for view no {} to: {}". format(self, self.viewNo, value)) - if self.isMaster: - self.removeObsoletePpReqs() + if value is None: + # Since the GC needs to happen after a primary has been decided. + return + self._gc_before_new_view() + self._reset_watermarks_before_new_view() self._stateChanged() - def primaryChanged(self, primaryName, lastOrderedPPSeqNo): - if self.lastOrderedPPSeqNo < lastOrderedPPSeqNo: - self.lastOrderedPPSeqNo = lastOrderedPPSeqNo + def compact_primary_names(self): + min_allowed_view_no = self.viewNo - 1 + views_to_remove = [] + for view_no in self.primaryNames: + if view_no >= min_allowed_view_no: + break + views_to_remove.append(view_no) + for view_no in views_to_remove: + self.primaryNames.pop(view_no) + + def primaryChanged(self, primaryName): + self.batches.clear() + if self.isMaster: + # Since there is no temporary state data structure and state root + # is explicitly set to correct value + for lid in self.ledger_ids: + try: + ledger = self.node.getLedger(lid) + except KeyError: + continue + ledger.reset_uncommitted() + self.primaryName = primaryName - if primaryName == self.name: - assert self.lastOrderedPPSeqNo >= lastOrderedPPSeqNo - self._lastPrePrepareSeqNo = self.lastOrderedPPSeqNo - - def removeObsoletePpReqs(self): - # If replica was primary in previous view then remove every sent - # Pre-Prepare with less than f+1 Prepares. - viewNos = self.primaryNames.keys() - if len(viewNos) > 1: - viewNos = list(viewNos) - lastViewNo = viewNos[-2] - if self.primaryNames[lastViewNo] == self.name: - lastViewPPs = [pp for pp in self.sentPrePrepares.values() if - pp.viewNo == lastViewNo] - obs = set() - for pp in lastViewPPs: - if not self.prepares.hasEnoughVotes(pp, self.f): - obs.add((pp.viewNo, pp.ppSeqNo)) - - for key in sorted(list(obs), key=itemgetter(1), reverse=True): - ppReq = self.sentPrePrepares[key] - count, _, prevStateRoot = self.batches[key[1]] - self.batches.pop(key[1]) - self.revert(ppReq.ledgerId, prevStateRoot, count) - self.sentPrePrepares.pop(key) - self.prepares.pop(key, None) + self._setup_for_non_master() + + def shouldParticipate(self, viewNo: int, ppSeqNo: int) -> bool: + """ + Replica should only participating in the consensus process and the + replica did not stash any of this request's 3-phase request + """ + return self.node.isParticipating and (viewNo, ppSeqNo) \ + not in self.stashingWhileCatchingUp + + def on_view_change_start(self): + assert self.isMaster + lst = self.last_prepared_certificate_in_view() + self.last_prepared_before_view_change = lst + logger.debug('{} setting last prepared for master to {}'.format(self, lst)) + + def on_view_change_done(self): + assert self.isMaster + self.last_prepared_before_view_change = None + + def get_lowest_probable_prepared_certificate_in_view(self, view_no) -> Optional[int]: + """ + Return lowest pp_seq_no of the view for which can be prepared but + choose from unprocessed PRE-PREPAREs and PREPAREs. + """ + # TODO: Naive implementation, dont need to iterate over the complete + # data structures, fix this later + seq_no_pp = SortedList() # pp_seq_no of PRE-PREPAREs + # pp_seq_no of PREPAREs with count of PREPAREs for each + seq_no_p = set() + + for (v, p) in self.prePreparesPendingPrevPP: + if v == view_no: + seq_no_pp.add(p) + if v > view_no: + break + + for (v, p), pr in self.preparesWaitingForPrePrepare.items(): + if v == view_no and len(pr) >= self.quorums.prepare.value: + seq_no_p.add(p) + + for n in seq_no_pp: + if n in seq_no_p: + return n + return None + + def _setup_for_non_master(self): + """ + Since last ordered view_no and pp_seq_no are only communicated for + master instance, `last_ordered_3pc` if backup instance and clear + last view messages + :return: + """ + if not self.isMaster: + # If not master instance choose last ordered seq no to be 1 less + # the lowest prepared certificate in this view + lowest_prepared = self.get_lowest_probable_prepared_certificate_in_view( + self.viewNo) + # TODO: This assumes some requests will be present, fix this once + # view change is completely implemented + lowest_ordered = 0 if lowest_prepared is None \ + else lowest_prepared - 1 + self.last_ordered_3pc = (self.viewNo, lowest_ordered) + logger.debug('Setting last ordered for non-master {} as {}'. + format(self, self.last_ordered_3pc)) + self._clear_last_view_message_for_non_master(self.viewNo) + + def _clear_last_view_message_for_non_master(self, current_view): + assert not self.isMaster + for v in list(self.stashed_out_of_order_commits.keys()): + if v < current_view: + self.stashed_out_of_order_commits.pop(v) def is_primary_in_view(self, viewNo: int) -> Optional[bool]: """ - Return whether a primary has been selected for this view number. + Return whether this replica was primary in the given view """ return self.primaryNames[viewNo] == self.name @@ -471,31 +589,24 @@ def viewNo(self): """ return self.node.viewNo - def isMsgFromPrimary(self, msg, sender: str) -> bool: - """ - Return whether this message was from primary replica - :param msg: - :param sender: - :return: - """ - return self.primaryName == sender - def trackBatches(self, pp: PrePrepare, prevStateRootHash): # pp.discarded indicates the index from where the discarded requests # starts hence the count of accepted requests, prevStateRoot is # tracked to revert this PRE-PREPARE logger.debug('{} tracking batch for {} with state root {}'. format(self, pp, prevStateRootHash)) - self.batches[pp.ppSeqNo] = [pp.discarded, pp.ppTime, prevStateRootHash] + self.batches[(pp.viewNo, pp.ppSeqNo)] = [pp.ledgerId, pp.discarded, + pp.ppTime, prevStateRootHash] def send3PCBatch(self): r = 0 for lid, q in self.requestQueues.items(): + # TODO: make the condition more apparent if len(q) >= self.config.Max3PCBatchSize or ( self.lastBatchCreated + self.config.Max3PCBatchWait < time.perf_counter() and len(q) > 0): - oldStateRootHash = self.stateRootHash(lid, toHex=False) + oldStateRootHash = self.stateRootHash(lid, to_str=False) ppReq = self.create3PCBatch(lid) self.sendPrePrepare(ppReq) self.trackBatches(ppReq, oldStateRootHash) @@ -509,7 +620,7 @@ def send3PCBatch(self): def batchDigest(reqs): return sha256(b''.join([r.digest.encode() for r in reqs])).hexdigest() - def processReqDuringBatch(self, req: Request, validReqs: List, + def processReqDuringBatch(self, req: Request, cons_time: int, validReqs: List, inValidReqs: List, rejects: List): """ This method will do dynamic validation and apply requests, also it @@ -518,7 +629,7 @@ def processReqDuringBatch(self, req: Request, validReqs: List, try: if self.isMaster: self.node.doDynamicValidation(req) - self.node.applyReq(req) + self.node.applyReq(req, cons_time) except (InvalidClientMessageException, UnknownIdentifier) as ex: logger.warning('{} encountered exception {} while processing {}, ' 'will reject'.format(self, ex, req)) @@ -531,8 +642,9 @@ def create3PCBatch(self, ledger_id): ppSeqNo = self.lastPrePrepareSeqNo + 1 logger.info("{} creating batch {} for ledger {} with state root {}". format(self, ppSeqNo, ledger_id, - self.stateRootHash(ledger_id, toHex=False))) - tm = time.time() * 1000 + self.stateRootHash(ledger_id, to_str=False))) + tm = self.utc_epoch + validReqs = [] inValidReqs = [] rejects = [] @@ -540,11 +652,11 @@ def create3PCBatch(self, ledger_id): and self.requestQueues[ledger_id]: key = self.requestQueues[ledger_id].pop(0) # Remove the first element fin_req = self.requests[key].finalised - self.processReqDuringBatch(fin_req, validReqs, inValidReqs, rejects) + self.processReqDuringBatch(fin_req, tm, validReqs, inValidReqs, rejects) reqs = validReqs+inValidReqs digest = self.batchDigest(reqs) - prePrepareReq = PrePrepare(self.instId, + pre_prepare = PrePrepare(self.instId, self.viewNo, ppSeqNo, tm, @@ -558,11 +670,12 @@ def create3PCBatch(self, ledger_id): logger.display('{} created a PRE-PREPARE with {} requests for ledger {}' .format(self, len(validReqs), ledger_id)) self.lastPrePrepareSeqNo = ppSeqNo + self.last_accepted_pre_prepare_time = tm if self.isMaster: self.outBox.extend(rejects) self.node.onBatchCreated(ledger_id, - self.stateRootHash(ledger_id, toHex=False)) - return prePrepareReq + self.stateRootHash(ledger_id, to_str=False)) + return pre_prepare def sendPrePrepare(self, ppReq: PrePrepare): self.sentPrePrepares[ppReq.viewNo, ppReq.ppSeqNo] = ppReq @@ -600,14 +713,6 @@ def processPostElectionMsgs(self): logger.debug("{} processing pended msg {}".format(self, msg)) self.dispatchThreePhaseMsg(*msg) - @property - def quorum(self) -> int: - r""" - Return the quorum of this RBFT system. Equal to :math:`2f + 1`. - Return None if `f` is not yet determined. - """ - return self.node.quorum - def dispatchThreePhaseMsg(self, msg: ThreePhaseMsg, sender: str) -> Any: """ Create a three phase request to be handled by the threePhaseRouter. @@ -621,6 +726,11 @@ def dispatchThreePhaseMsg(self, msg: ThreePhaseMsg, sender: str) -> Any: "achieved stable checkpoint for 3 phase message", logger.debug) return + + if self.has_already_ordered(msg.viewNo, msg.ppSeqNo): + self.discard(msg, 'already ordered 3 phase message', logger.debug) + return + if self.isPpSeqNoBetweenWaterMarks(msg.ppSeqNo): try: if self.can_pp_seq_no_be_in_view(msg.viewNo, msg.ppSeqNo): @@ -648,12 +758,23 @@ def processThreePhaseMsg(self, msg: ThreePhaseMsg, sender: str): :param sender: name of the node that sent this message """ if self.isPrimary is None: - self.postElectionMsgs.append((msg, sender)) - logger.debug("Replica {} pended request {} from {}". - format(self, msg, sender)) - return + if not self.can_process_since_view_change_in_progress(msg): + self.postElectionMsgs.append((msg, sender)) + logger.debug("Replica {} pended request {} from {}". + format(self, msg, sender)) + return self.dispatchThreePhaseMsg(msg, sender) + def can_process_since_view_change_in_progress(self, msg): + r = isinstance(msg, Commit) and \ + self.last_prepared_before_view_change and \ + compare_3PC_keys((msg.viewNo, msg.ppSeqNo), + self.last_prepared_before_view_change) >= 0 + if r: + logger.debug('{} can process {} since view change is in progress' + .format(self, msg)) + return r + def processPrePrepare(self, pp: PrePrepare, sender: str): """ Validate and process the PRE-PREPARE specified. @@ -668,21 +789,24 @@ def processPrePrepare(self, pp: PrePrepare, sender: str): # Converting each req_idrs from list to tuple pp = updateNamedTuple(pp, **{f.REQ_IDR.nm: [(i, r) for i, r in pp.reqIdr]}) - oldStateRoot = self.stateRootHash(pp.ledgerId, toHex=False) - if self.canProcessPrePrepare(pp, sender): - self.addToPrePrepares(pp) - if not self.node.isParticipating: - self.stashingWhileCatchingUp.add(key) - logger.debug('{} stashing PRE-PREPARE{}'.format(self, key)) - return + oldStateRoot = self.stateRootHash(pp.ledgerId, to_str=False) + try: + if self.canProcessPrePrepare(pp, sender): + self.addToPrePrepares(pp) + if not self.node.isParticipating: + self.stashingWhileCatchingUp.add(key) + logger.debug('{} stashing PRE-PREPARE{}'.format(self, key)) + return - if self.isMaster: - self.node.onBatchCreated(pp.ledgerId, - self.stateRootHash(pp.ledgerId, - toHex=False)) - self.trackBatches(pp, oldStateRoot) - logger.debug("{} processed incoming PRE-PREPARE{}".format(self, key), - extra={"tags": ["processing"]}) + if self.isMaster: + self.node.onBatchCreated(pp.ledgerId, + self.stateRootHash(pp.ledgerId, + to_str=False)) + self.trackBatches(pp, oldStateRoot) + logger.debug("{} processed incoming PRE-PREPARE{}".format(self, key), + extra={"tags": ["processing"]}) + except SuspiciousNode as ex: + self.node.reportSuspiciousNodeEx(ex) def tryPrepare(self, pp: PrePrepare): """ @@ -778,6 +902,7 @@ def doPrepare(self, pp: PrePrepare): prepare = Prepare(self.instId, pp.viewNo, pp.ppSeqNo, + pp.ppTime, pp.digest, pp.stateRootHash, pp.txnRootHash @@ -806,28 +931,44 @@ def nonFinalisedReqs(self, reqKeys: List[Tuple[str, int]]): """ return {key for key in reqKeys if not self.requests.isFinalised(key)} - def __is_next_pre_prepare(self, ppSeqNo: int): - if ppSeqNo != self.__last_pp_seq_no + 1: + def __is_next_pre_prepare(self, view_no: int, pp_seq_no: int): + if view_no == self.viewNo and pp_seq_no == 1: + # First PRE-PREPARE in a new view + return True + + (last_pp_view_no, last_pp_seq_no) = self.__last_pp_3pc + + if last_pp_view_no > view_no: + return False + + if last_pp_view_no < view_no: + assert view_no == self.viewNo + last_pp_seq_no = 0 + + if pp_seq_no - last_pp_seq_no != 1: logger.debug('{} missing PRE-PREPAREs between {} and {}'. - format(self, ppSeqNo, self.__last_pp_seq_no)) + format(self, pp_seq_no, last_pp_seq_no)) + # TODO: think of a better way, urgently + self._setup_for_non_master() return False + return True @property - def __last_pp_seq_no(self): - lastPp = self.lastPrePrepare - if lastPp: - # TODO: Is it possible that lastPp.ppSeqNo is less than - # self.lastOrderedPPSeqNo? Maybe if the node does not disconnect - # but does no work for some time or is missing PRE-PREPARES - lastPpSeqNo = lastPp.ppSeqNo if lastPp.ppSeqNo > \ - self.lastOrderedPPSeqNo \ - else self.lastOrderedPPSeqNo - else: - lastPpSeqNo = self.lastOrderedPPSeqNo - return lastPpSeqNo + def __last_pp_3pc(self): + last_pp = self.lastPrePrepare + if not last_pp: + return self.last_ordered_3pc + + last_3pc = (last_pp.viewNo, last_pp.ppSeqNo) + if compare_3PC_keys(self.last_ordered_3pc, last_3pc) > 0: + return last_3pc + + return self.last_ordered_3pc def revert(self, ledgerId, stateRootHash, reqCount): + # A batch should only be reverted if all batches that came after it + # have been reverted ledger = self.node.getLedger(ledgerId) state = self.node.getState(ledgerId) logger.info('{} reverting {} txns and state root from {} to {} for' @@ -843,19 +984,25 @@ def validate_pre_prepare(self, pp: PrePrepare, sender: str): and state. It will not commit though (the ledger on disk will not change, neither the committed state root hash will change) """ + if not self.is_pre_prepare_time_acceptable(pp): + self.pre_prepares_stashed_for_incorrect_time[pp.viewNo, pp.ppSeqNo] = (pp, sender, False) + raise SuspiciousNode(sender, Suspicions.PPR_TIME_WRONG, pp) + validReqs = [] inValidReqs = [] rejects = [] if self.isMaster: # If this PRE-PREPARE is not valid then state and ledger should be # reverted - oldStateRoot = self.stateRootHash(pp.ledgerId, toHex=False) - logger.debug('{} state root before processing {} is {}'. - format(self, pp, oldStateRoot)) + oldStateRoot = self.stateRootHash(pp.ledgerId, to_str=False) + oldTxnRoot = self.txnRootHash(pp.ledgerId) + logger.debug('{} state root before processing {} is {}, {}'. + format(self, pp, oldStateRoot, oldTxnRoot)) for reqKey in pp.reqIdr: req = self.requests[reqKey].finalised - self.processReqDuringBatch(req, validReqs, inValidReqs, rejects) + self.processReqDuringBatch(req, pp.ppTime, validReqs, inValidReqs, + rejects) if len(validReqs) != pp.discarded: if self.isMaster: @@ -898,13 +1045,15 @@ def canProcessPrePrepare(self, pp: PrePrepare, sender: str) -> bool: # TODO: Check whether it is rejecting PRE-PREPARE from previous view # PRE-PREPARE should not be sent from non primary if not self.isMsgFromPrimary(pp, sender): - raise SuspiciousNode(sender, Suspicions.PPR_FRM_NON_PRIMARY, pp) + # Since PRE-PREPARE might be requested from others + if (pp.viewNo, pp.ppSeqNo) not in self.requested_pre_prepares: + raise SuspiciousNode(sender, Suspicions.PPR_FRM_NON_PRIMARY, pp) # A PRE-PREPARE is being sent to primary if self.isPrimaryForMsg(pp) is True: raise SuspiciousNode(sender, Suspicions.PPR_TO_PRIMARY, pp) - # A PRE-PREPARE is sent that has already been received + # Already has a PRE-PREPARE with same 3 phase key if (pp.viewNo, pp.ppSeqNo) in self.prePrepares: raise SuspiciousNode(sender, Suspicions.DUPLICATE_PPR_SENT, pp) @@ -915,16 +1064,25 @@ def canProcessPrePrepare(self, pp: PrePrepare, sender: str) -> bool: # do not make change to state or ledger return True - if pp.ppSeqNo <= self.__last_pp_seq_no: + if compare_3PC_keys((pp.viewNo, pp.ppSeqNo), self.__last_pp_3pc) > 0: return False # ignore old pre-prepare + # Do not combine the next if conditions, the idea is to exit as soon + # as possible non_fin_reqs = self.nonFinalisedReqs(pp.reqIdr) - - non_next_upstream_pp = pp.ppSeqNo > self.__last_pp_seq_no and \ - not self.__is_next_pre_prepare(pp.ppSeqNo) - - if non_fin_reqs or non_next_upstream_pp: + if non_fin_reqs: self.enqueue_pre_prepare(pp, sender, non_fin_reqs) + # TODO: An optimisation might be to not request PROPAGATEs if some + # PROPAGATEs are present or a client request is present and + # sufficient PREPAREs and PRE-PREPARE are present, then the digest + # can be compared but this is expensive as the PREPARE + # and PRE-PREPARE contain a combined digest + self.node.request_propagates(non_fin_reqs) + return False + + non_next_pp = not self.__is_next_pre_prepare(pp.viewNo, pp.ppSeqNo) + if non_next_pp: + self.enqueue_pre_prepare(pp, sender) return False self.validate_pre_prepare(pp, sender) @@ -940,12 +1098,13 @@ def addToPrePrepares(self, pp: PrePrepare) -> None: key = (pp.viewNo, pp.ppSeqNo) self.prePrepares[key] = pp self.lastPrePrepareSeqNo = pp.ppSeqNo + self.last_accepted_pre_prepare_time = pp.ppTime self.dequeuePrepares(*key) self.dequeueCommits(*key) self.stats.inc(TPCStat.PrePrepareRcvd) self.tryPrepare(pp) - def hasPrepared(self, request) -> bool: + def has_sent_prepare(self, request) -> bool: return self.prepares.hasPrepareFrom(request, self.name) def canPrepare(self, ppReq) -> (bool, str): @@ -957,7 +1116,7 @@ def canPrepare(self, ppReq) -> (bool, str): """ if not self.shouldParticipate(ppReq.viewNo, ppReq.ppSeqNo): return False, 'should not participate in consensus for {}'.format(ppReq) - if self.hasPrepared(ppReq): + if self.has_sent_prepare(ppReq): return False, 'has already sent PREPARE for {}'.format(ppReq) return True, '' @@ -987,7 +1146,7 @@ def validatePrepare(self, prepare: Prepare, sender: str) -> bool: raise SuspiciousNode(sender, Suspicions.DUPLICATE_PR_SENT, prepare) # If PRE-PREPARE not received for the PREPARE, might be slow network if not ppReq: - self.enqueuePrepare(prepare, sender) + self.enqueue_prepare(prepare, sender) return False # If primary replica if primaryStatus is True: @@ -999,7 +1158,7 @@ def validatePrepare(self, prepare: Prepare, sender: str) -> bool: raise SuspiciousNode(sender, Suspicions.UNKNOWN_PR_SENT, prepare) if primaryStatus is None and not ppReq: - self.enqueuePrepare(prepare, sender) + self.enqueue_prepare(prepare, sender) return False if prepare.digest != ppReq.digest: @@ -1022,6 +1181,7 @@ def addToPrepares(self, prepare: Prepare, sender: str): :param prepare: the PREPARE to add to the list """ self.prepares.addVote(prepare, sender) + self.dequeueCommits(prepare.viewNo, prepare.ppSeqNo) self.tryCommit(prepare) def getPrePrepare(self, viewNo, ppSeqNo): @@ -1033,16 +1193,15 @@ def getPrePrepare(self, viewNo, ppSeqNo): @property def lastPrePrepare(self): - lastSeqNo = 0 + last_3pc = (0, 0) lastPp = None if self.sentPrePrepares: - (_, s), pp = self.peekitem(self.sentPrePrepares, -1) - lastSeqNo = s + (v, s), pp = self.sentPrePrepares.peekitem(-1) + last_3pc = (v, s) lastPp = pp if self.prePrepares: - (_, s), pp = self.peekitem(self.prePrepares, -1) - if s > lastSeqNo: - lastSeqNo = s + (v, s), pp = self.prePrepares.peekitem(-1) + if compare_3PC_keys(last_3pc, (v, s)) > 0: lastPp = pp return lastPp @@ -1057,16 +1216,17 @@ def canCommit(self, prepare: Prepare) -> (bool, str): Decision criteria: - - If this replica has got just 2f PREPARE requests then commit request. - - If less than 2f PREPARE requests then probably there's no consensus on + - If this replica has got just n-f-1 PREPARE requests then commit request. + - If less than n-f-1 PREPARE requests then probably there's no consensus on the request; don't commit - - If more than 2f then already sent COMMIT; don't commit + - If more than n-f-1 then already sent COMMIT; don't commit :param prepare: the PREPARE """ if not self.shouldParticipate(prepare.viewNo, prepare.ppSeqNo): return False, 'should not participate in consensus for {}'.format(prepare) - if not self.prepares.hasQuorum(prepare, self.f): + quorum = self.quorums.prepare.value + if not self.prepares.hasQuorum(prepare, quorum): return False, 'does not have prepare quorum for {}'.format(prepare) if self.hasCommitted(prepare): return False, 'has already sent COMMIT for {}'.format(prepare) @@ -1085,8 +1245,10 @@ def validateCommit(self, commit: Commit, sender: str) -> bool: self.enqueueCommit(commit, sender) return False - if (key not in self.prepares and - key not in self.preparesWaitingForPrePrepare): + # TODO: Fix problem that can occur with a primary and non-primary(s) + # colluding and the honest nodes being slow + if (key not in self.prepares and key not in self.sentPrePrepares) and \ + key not in self.preparesWaitingForPrePrepare: logger.debug("{} rejecting COMMIT{} due to lack of prepares". format(self, key)) # raise SuspiciousNode(sender, Suspicions.UNKNOWN_CM_SENT, commit) @@ -1107,32 +1269,30 @@ def addToCommits(self, commit: Commit, sender: str): self.commits.addVote(commit, sender) self.tryOrder(commit) - def hasOrdered(self, viewNo, ppSeqNo) -> bool: - return (viewNo, ppSeqNo) in self.ordered - def canOrder(self, commit: Commit) -> Tuple[bool, Optional[str]]: """ Return whether the specified commitRequest can be returned to the node. Decision criteria: - - If have got just 2f+1 Commit requests then return request to node - - If less than 2f+1 of commit requests then probably don't have + - If have got just n-f Commit requests then return request to node + - If less than n-f of commit requests then probably don't have consensus on the request; don't return request to node - - If more than 2f+1 then already returned to node; don't return request + - If more than n-f then already returned to node; don't return request to node :param commit: the COMMIT """ - if not self.commits.hasQuorum(commit, self.f): - return False, "no quorum: {} commits where f is {}".\ - format(commit, self.f) + quorum = self.quorums.commit.value + if not self.commits.hasQuorum(commit, quorum): + return False, "no quorum ({}): {} commits where f is {}".\ + format(quorum, commit, self.f) key = (commit.viewNo, commit.ppSeqNo) - if self.hasOrdered(*key): + if self.has_already_ordered(*key): return False, "already ordered" - if not self.all_prev_ordered(commit): + if commit.ppSeqNo > 1 and not self.all_prev_ordered(commit): viewNo, ppSeqNo = commit.viewNo, commit.ppSeqNo if viewNo not in self.stashed_out_of_order_commits: self.stashed_out_of_order_commits[viewNo] = {} @@ -1149,9 +1309,10 @@ def all_prev_ordered(self, commit: Commit): """ # TODO: This method does a lot of work, choose correct data # structures to make it efficient. + viewNo, ppSeqNo = commit.viewNo, commit.ppSeqNo - if self.ordered and self.ordered[-1] == (viewNo, ppSeqNo-1): + if self.last_ordered_3pc == (viewNo, ppSeqNo-1): # Last ordered was in same view as this COMMIT return True @@ -1177,8 +1338,8 @@ def process_stashed_out_of_order_commits(self): # were stashed due to lack of commits before them and orders them if it can logger.debug('{} trying to order from out of order commits. {} {}'. format(self, self.ordered, self.stashed_out_of_order_commits)) - if self.ordered: - lastOrdered = self.ordered[-1] + if self.last_ordered_3pc: + lastOrdered = self.last_ordered_3pc vToRemove = set() for v in self.stashed_out_of_order_commits: if v < lastOrdered[0] and self.stashed_out_of_order_commits[v]: @@ -1215,20 +1376,24 @@ def isLowestCommitInView(self, commit): logger.debug('{} encountered {} which belongs to a later view' .format(self, commit)) return False - if view_no != self.viewNo and view_no not in self.view_ends_at: - logger.debug('{} encountered {} from past view for which dont know ' - 'the end of view'.format(self, commit)) - return False + return commit.ppSeqNo == 1 + + def last_prepared_certificate_in_view(self) -> Optional[Tuple[int, int]]: + # Pick the latest sent COMMIT in the view. + # TODO: Consider stashed messages too? + assert self.isMaster + return max_3PC_key(self.commits.keys()) if self.commits else None - ppSeqNos = [] - for v, p in self.commits: - if v == commit.viewNo: - ppSeqNos.append(p) - return min(ppSeqNos) == commit.ppSeqNo if ppSeqNos else True + def has_prepared(self, key): + return self.getPrePrepare(*key) and self.prepares.hasQuorum( + ThreePhaseKey(*key), self.quorums.prepare.value) def doOrder(self, commit: Commit): key = (commit.viewNo, commit.ppSeqNo) logger.info("{} ordering COMMIT{}".format(self, key)) + return self.order_3pc_key(key) + + def order_3pc_key(self, key): pp = self.getPrePrepare(*key) assert pp self.addToOrdered(*key) @@ -1250,7 +1415,7 @@ def doOrder(self, commit: Commit): 'catchup process'.format(self, pp.ppSeqNo)) for reqKey in pp.reqIdr[:pp.discarded]: req = self.requests[reqKey].finalised - self.node.applyReq(req) + self.node.applyReq(req, pp.ppTime) self.stashingWhileCatchingUp.remove(key) for k in pp.reqIdr: @@ -1266,31 +1431,54 @@ def doOrder(self, commit: Commit): self.addToCheckpoint(pp.ppSeqNo, pp.digest) return True - def processCheckpoint(self, msg: Checkpoint, sender: str): - logger.debug('{} received checkpoint {} from {}'. - format(self, msg, sender)) + def processCheckpoint(self, msg: Checkpoint, sender: str) -> bool: + """ + Process checkpoint messages + + :return: whether processed (True) or stashed (False) + """ + + logger.debug('{} processing checkpoint {} from {}' + .format(self, msg, sender)) + seqNoEnd = msg.seqNoEnd if self.isPpSeqNoStable(seqNoEnd): - self.discard(msg, reason="Checkpoint already stable", + self.discard(msg, + reason="Checkpoint already stable", logMethod=logger.debug) - return + return True seqNoStart = msg.seqNoStart key = (seqNoStart, seqNoEnd) - if key in self.checkpoints and self.checkpoints[key].digest: - ckState = self.checkpoints[key] - if ckState.digest == msg.digest: - ckState.receivedDigests[sender] = msg.digest - else: - logger.error("{} received an incorrect digest {} for " - "checkpoint {} from {}".format(self, - msg.digest, - key, - sender)) - return - self.checkIfCheckpointStable(key) - else: + + if key not in self.checkpoints or not self.checkpoints[key].digest: self.stashCheckpoint(msg, sender) + self.__start_catchup_if_needed() + return False + + checkpoint_state = self.checkpoints[key] + # Raise the error only if master since only master's last + # ordered 3PC is communicated during view change + if self.isMaster and checkpoint_state.digest != msg.digest: + logger.error("{} received an incorrect digest {} for " + "checkpoint {} from {}".format(self, + msg.digest, + key, + sender)) + return True + + checkpoint_state.receivedDigests[sender] = msg.digest + self.checkIfCheckpointStable(key) + return True + + def __start_catchup_if_needed(self): + stashed_chks_with_quorum = self.stashed_checkpoints_with_quorum() + is_stashed_enough = stashed_chks_with_quorum > self.STASHED_CHECKPOINTS_BEFORE_CATCHUP + is_non_primary_master = self.isMaster and not self.isPrimary + if is_stashed_enough and is_non_primary_master: + logger.info('{} has stashed {} checkpoints with quorum ' + 'so the catchup procedure starts'.format(self, stashed_chks_with_quorum)) + self.node.start_catchup() def _newCheckpointState(self, ppSeqNo, digest) -> CheckpointState: s, e = ppSeqNo, ppSeqNo + self.config.CHK_FREQ - 1 @@ -1313,6 +1501,9 @@ def addToCheckpoint(self, ppSeqNo, digest): s, e = ppSeqNo, ppSeqNo + self.config.CHK_FREQ - 1 if len(state.digests) == self.config.CHK_FREQ: + # TODO CheckpointState/Checkpoint is not a namedtuple anymore + # 1. check if updateNamedTuple works for the new message type + # 2. choose another name state = updateNamedTuple(state, digest=sha256( serialize(state.digests).encode() @@ -1327,6 +1518,9 @@ def markCheckPointStable(self, seqNo): previousCheckpoints = [] for (s, e), state in self.checkpoints.items(): if e == seqNo: + # TODO CheckpointState/Checkpoint is not a namedtuple anymore + # 1. check if updateNamedTuple works for the new message type + # 2. choose another name state = updateNamedTuple(state, isStable=True) self.checkpoints[s, e] = state break @@ -1340,13 +1534,14 @@ def markCheckPointStable(self, seqNo): for k in previousCheckpoints: logger.debug("{} removing previous checkpoint {}".format(self, k)) self.checkpoints.pop(k) - self.gc(seqNo) + self._gc(seqNo) logger.debug("{} marked stable checkpoint {}".format(self, (s, e))) self.processStashedMsgsForNewWaterMarks() def checkIfCheckpointStable(self, key: Tuple[int, int]): ckState = self.checkpoints[key] - if len(ckState.receivedDigests) == 2 * self.f: + # TODO: what if len(ckState.receivedDigests) > 2 * f? + if len(ckState.receivedDigests) == self.quorums.checkpoint.value: self.markCheckPointStable(ckState.seqNo) return True else: @@ -1355,22 +1550,70 @@ def checkIfCheckpointStable(self, key: Tuple[int, int]): return False def stashCheckpoint(self, ck: Checkpoint, sender: str): + logger.debug('{} stashing {} from {}'.format(self, ck, sender)) seqNoStart, seqNoEnd = ck.seqNoStart, ck.seqNoEnd - if (seqNoStart, seqNoEnd) not in self.stashedRecvdCheckpoints: - self.stashedRecvdCheckpoints[seqNoStart, seqNoEnd] = {} - self.stashedRecvdCheckpoints[seqNoStart, seqNoEnd][sender] = ck + if ck.viewNo not in self.stashedRecvdCheckpoints: + self.stashedRecvdCheckpoints[ck.viewNo] = {} + stashed_for_view = self.stashedRecvdCheckpoints[ck.viewNo] + if (seqNoStart, seqNoEnd) not in stashed_for_view: + stashed_for_view[seqNoStart, seqNoEnd] = {} + stashed_for_view[seqNoStart, seqNoEnd][sender] = ck + + def _clear_prev_view_pre_prepares(self): + to_remove = [] + for idx, (pp, _, _) in enumerate(self.prePreparesPendingFinReqs): + if pp.viewNo < self.viewNo: + to_remove.insert(0, idx) + for idx in to_remove: + self.prePreparesPendingFinReqs.pop(idx) + + for (v, p) in list(self.prePreparesPendingPrevPP.keys()): + if v < self.viewNo: + self.prePreparesPendingPrevPP.pop((v, p)) + + def _clear_prev_view_stashed_checkpoints(self): + for view_no in list(self.stashedRecvdCheckpoints.keys()): + if view_no < self.viewNo: + logger.debug('{} found stashed checkpoints for view {} which ' + 'is less than the current view {}, so ignoring it' + .format(self, view_no, self.viewNo)) + self.stashedRecvdCheckpoints.pop(view_no) + + def stashed_checkpoints_with_quorum(self): + quorum = self.quorums.checkpoint + return sum(quorum.is_reached(len(senders)) + for senders in self.stashedRecvdCheckpoints.get(self.viewNo, {}).values()) def processStashedCheckpoints(self, key): - i = 0 - if key in self.stashedRecvdCheckpoints: - for sender, ck in self.stashedRecvdCheckpoints[key].items(): - self.processCheckpoint(ck, sender) - i += 1 - logger.debug('{} processed {} stashed checkpoints for {}'. - format(self, i, key)) - return i + self._clear_prev_view_stashed_checkpoints() + + if key not in self.stashedRecvdCheckpoints.get(self.viewNo, {}): + logger.debug("{} have no stashed checkpoints for {}") + return 0 + + stashed = self.stashedRecvdCheckpoints[self.viewNo][key] + total_processed = 0 + senders_of_completed_checkpoints = [] + + for sender, checkpoint in stashed.items(): + if self.processCheckpoint(checkpoint, sender): + senders_of_completed_checkpoints.append(sender) + total_processed += 1 + + for sender in senders_of_completed_checkpoints: + # unstash checkpoint + del stashed[sender] + if len(stashed) == 0: + del self.stashedRecvdCheckpoints[self.viewNo][key] - def gc(self, tillSeqNo): + restashed_num = total_processed - len(senders_of_completed_checkpoints) + logger.debug('{} processed {} stashed checkpoints for {}, ' + '{} of them were stashed again' + .format(self, total_processed, key, restashed_num)) + + return total_processed + + def _gc(self, tillSeqNo): logger.debug("{} cleaning up till {}".format(self, tillSeqNo)) tpcKeys = set() reqKeys = set() @@ -1390,12 +1633,18 @@ def gc(self, tillSeqNo): logger.debug("{} found {} request keys to clean". format(self, len(reqKeys))) + to_clean_up = ( + self.sentPrePrepares, + self.prePrepares, + self.prepares, + self.commits, + self.batches, + self.requested_pre_prepares, + self.pre_prepares_stashed_for_incorrect_time, + ) for k in tpcKeys: - self.sentPrePrepares.pop(k, None) - self.prePrepares.pop(k, None) - self.prepares.pop(k, None) - self.commits.pop(k, None) - self.batches.pop(k[1], None) + for coll in to_clean_up: + coll.pop(k, None) for k in reqKeys: self.requests[k].forwardedTo -= 1 @@ -1404,6 +1653,23 @@ def gc(self, tillSeqNo): format(self, len(reqKeys))) self.requests.pop(k) + self.compact_ordered() + + def _gc_before_new_view(self): + # Trigger GC for all batches of old view + # Clear any checkpoints, since they are valid only in a view + self._gc(self.last_ordered_3pc[1]) + self.checkpoints.clear() + self._clear_prev_view_stashed_checkpoints() + self._clear_prev_view_pre_prepares() + + def _reset_watermarks_before_new_view(self): + # Reset any previous view watermarks since for view change to + # successfully complete, the node must have reached the same state + # as other nodes + self.h = 0 + self._lastPrePrepareSeqNo = self.h + def stashOutsideWatermarks(self, item: Union[ReqDigest, Tuple]): self.stashingWhileOutsideWaterMarks.append(item) @@ -1426,28 +1692,19 @@ def processStashedMsgsForNewWaterMarks(self): format(self, item)) itemsToConsume -= 1 - @staticmethod - def peekitem(d, i): - # Adding it since its not present in version supported by - # Ubuntu repositories. - key = d._list[i] - return key, d[key] - @property def firstCheckPoint(self) -> Tuple[Tuple[int, int], CheckpointState]: if not self.checkpoints: return None else: - return self.peekitem(self.checkpoints, 0) - # return self.checkpoints.peekitem(0) + return self.checkpoints.peekitem(0) @property def lastCheckPoint(self) -> Tuple[Tuple[int, int], CheckpointState]: if not self.checkpoints: return None else: - return self.peekitem(self.checkpoints, -1) - # return self.checkpoints.peekitem(-1) + return self.checkpoints.peekitem(-1) def isPpSeqNoStable(self, ppSeqNo): """ @@ -1462,13 +1719,28 @@ def isPpSeqNoStable(self, ppSeqNo): else: return False + def has_already_ordered(self, view_no, pp_seq_no): + return compare_3PC_keys((view_no, pp_seq_no), self.last_ordered_3pc) >= 0 + def isPpSeqNoBetweenWaterMarks(self, ppSeqNo: int): return self.h < ppSeqNo <= self.H - def addToOrdered(self, viewNo: int, ppSeqNo: int): - self.ordered.add((viewNo, ppSeqNo)) - if ppSeqNo > self.lastOrderedPPSeqNo: - self.lastOrderedPPSeqNo = ppSeqNo + def addToOrdered(self, view_no: int, pp_seq_no: int): + self.ordered.add((view_no, pp_seq_no)) + self.last_ordered_3pc = (view_no, pp_seq_no) + + # This might not be called always as Pre-Prepare might be requested + # but never received and catchup might be done + self.requested_pre_prepares.pop((view_no, pp_seq_no), None) + + def compact_ordered(self): + min_allowed_view_no = self.viewNo - 1 + i = 0 + for view_no, _ in self.ordered: + if view_no >= min_allowed_view_no: + break + i += 1 + self.ordered = self.ordered[i:] def enqueue_pre_prepare(self, ppMsg: PrePrepare, sender: str, nonFinReqs: Set=None): @@ -1481,7 +1753,7 @@ def enqueue_pre_prepare(self, ppMsg: PrePrepare, sender: str, # pre-prepare and over-write the correct one? logger.debug( "Queueing pre-prepares due to unavailability of previous " - "pre-prepares. PrePrepare {} from {}".format(ppMsg, sender)) + "pre-prepares. {} from {}".format(ppMsg, sender)) self.prePreparesPendingPrevPP[ppMsg.viewNo, ppMsg.ppSeqNo] = (ppMsg, sender) def dequeuePrePrepares(self): @@ -1510,7 +1782,7 @@ def dequeuePrePrepares(self): r = 0 while self.prePreparesPendingPrevPP and self.__is_next_pre_prepare( - self.prePreparesPendingPrevPP.iloc[0][1]): + *self.prePreparesPendingPrevPP.iloc[0]): _, (pp, sender) = self.prePreparesPendingPrevPP.popitem(last=False) if not self.can_pp_seq_no_be_in_view(pp.viewNo, pp.ppSeqNo): self.discard(pp, "Pre-Prepare from a previous view", @@ -1520,13 +1792,17 @@ def dequeuePrePrepares(self): r += 1 return r - def enqueuePrepare(self, pMsg: Prepare, sender: str): - logger.debug("Queueing prepare due to unavailability of PRE-PREPARE. " - "Prepare {} from {}".format(pMsg, sender)) + def enqueue_prepare(self, pMsg: Prepare, sender: str): + logger.debug("{} queueing prepare due to unavailability of PRE-PREPARE. " + "Prepare {} from {}".format(self, pMsg, sender)) key = (pMsg.viewNo, pMsg.ppSeqNo) if key not in self.preparesWaitingForPrePrepare: self.preparesWaitingForPrePrepare[key] = deque() self.preparesWaitingForPrePrepare[key].append((pMsg, sender)) + if key not in self.pre_prepares_stashed_for_incorrect_time: + self._request_pre_prepare_if_possible(key) + else: + self._process_stashed_pre_prepare_for_time_if_possible(key) def dequeuePrepares(self, viewNo: int, ppSeqNo: int): key = (viewNo, ppSeqNo) @@ -1555,6 +1831,10 @@ def enqueueCommit(self, request: Commit, sender: str): def dequeueCommits(self, viewNo: int, ppSeqNo: int): key = (viewNo, ppSeqNo) if key in self.commitsWaitingForPrepare: + if not self.has_prepared(key): + logger.debug('{} has not prepared {}, will dequeue the ' + 'COMMITs later'.format(self, key)) + return i = 0 # Keys of pending prepares that will be processed below while self.commitsWaitingForPrepare[key]: @@ -1600,20 +1880,164 @@ def can_pp_seq_no_be_in_view(self, view_no, pp_seq_no): :return: """ assert view_no <= self.viewNo - return view_no == self.viewNo or (view_no < self.viewNo and ( - view_no in self.view_ends_at and - pp_seq_no <= self.view_ends_at[view_no][0])) + return view_no == self.viewNo or (view_no < self.viewNo and + self.last_prepared_before_view_change and + compare_3PC_keys((view_no, pp_seq_no), + self.last_prepared_before_view_change) >= 0) - @property - def threePhaseState(self): - # TODO: This method is incomplete - # Gets the current stable and unstable checkpoints and creates digest - # of unstable checkpoints - if self.checkpoints: - pass + def _request_pre_prepare_if_possible(self, three_pc_key) -> bool: + """ + Check if has an acceptable PRE_PREPARE already stashed, if not then + check count of PREPAREs, make sure >f consistent PREPAREs are found, + store the acceptable PREPARE state (digest, roots) for verification of + the received PRE-PREPARE + """ + if len(self.preparesWaitingForPrePrepare[three_pc_key]) < self.quorums.prepare.value: + logger.debug('{} not requesting a PRE-PREPARE because does not have' + ' sufficient PREPAREs for {}'.format(self, three_pc_key)) + return False + + if three_pc_key in self.requested_pre_prepares: + logger.debug('{} not requesting a PRE-PREPARE since already ' + 'requested for {}'.format(self, three_pc_key)) + return False + + if three_pc_key in self.prePreparesPendingPrevPP: + logger.debug('{} not requesting a PRE-PREPARE since already found ' + 'stashed for {}'.format(self, three_pc_key)) + return False + + digest, state_root, txn_root, prepare_senders = \ + self.get_acceptable_stashed_prepare_state(three_pc_key) + + # Choose a better data structure for `prePreparesPendingFinReqs` + pre_prepares = [pp for pp, _, _ in self.prePreparesPendingFinReqs + if (pp.viewNo, pp.ppSeqNo) == three_pc_key] + if pre_prepares: + if [pp for pp in pre_prepares if + (pp.digest, pp.stateRootHash, pp.txnRootHash) == (digest, state_root, txn_root)]: + logger.debug('{} not requesting a PRE-PREPARE since already ' + 'found stashed for {}'.format(self, three_pc_key)) + return False + + # TODO: Using a timer to retry would be a better thing to do + logger.debug('{} requesting PRE-PREPARE({}) from {}'. + format(self, three_pc_key, prepare_senders)) + # An optimisation can be to request PRE-PREPARE from f+1 or + # f+x (f+x<2f) nodes only rather than 2f since only 1 correct + # PRE-PREPARE is needed. + self.node.request_msg(PREPREPARE, {f.INST_ID.nm: self.instId, + f.VIEW_NO.nm: three_pc_key[0], + f.PP_SEQ_NO.nm: three_pc_key[1]}, + [self.getNodeName(s) for s in prepare_senders]) + self.requested_pre_prepares[three_pc_key] = digest, state_root, txn_root + return True + + def get_acceptable_stashed_prepare_state(self, three_pc_key): + prepares = {s: (m.digest, m.stateRootHash, m.txnRootHash) for m, s in + self.preparesWaitingForPrePrepare[three_pc_key]} + acceptable = mostCommonElement(prepares.values()) + return (*acceptable, {s for s, state in prepares.items() + if state == acceptable}) + + def process_requested_pre_prepare(self, pp: PrePrepare, sender: str): + if pp is None: + logger.debug('{} received null PRE-PREPARE from {}'. + format(self, sender)) + return + key = (pp.viewNo, pp.ppSeqNo) + logger.debug('{} received requested PRE-PREPARE({}) from {}'. + format(self, key, sender)) + + if key not in self.requested_pre_prepares: + logger.debug('{} had either not requested a PRE-PREPARE or already ' + 'received a PRE-PREPARE for {}'.format(self, key)) + return + if self.has_already_ordered(*key): + logger.debug('{} has already ordered PRE-PREPARE({})'.format(self, key)) + return + if self.getPrePrepare(*key): + logger.debug( + '{} has already received PRE-PREPARE({})'.format(self, key)) + return + # There still might be stashed PRE-PREPARE but not checking that + # it is expensive, also reception of PRE-PREPAREs is idempotent + digest, state_root, txn_root = self.requested_pre_prepares[key] + if (pp.digest, pp.stateRootHash, pp.txnRootHash) == (digest, state_root, txn_root): + self.processThreePhaseMsg(pp, sender) else: - state = [] - return ThreePCState(self.instId, state) + self.discard(pp, reason='does not have expected state({} {} {})'. + format(digest, state_root, txn_root), + logMethod=logger.warning) + + def is_pre_prepare_time_correct(self, pp: PrePrepare) -> bool: + """ + Check if this PRE-PREPARE is not older than (not checking for greater + than since batches maybe sent in less than 1 second) last PRE-PREPARE + and in a sufficient range of local clock's UTC time. + :param pp: + :return: + """ + return (self.last_accepted_pre_prepare_time is None or + pp.ppTime >= self.last_accepted_pre_prepare_time) and \ + abs(pp.ppTime - self.utc_epoch) <= self.config.ACCEPTABLE_DEVIATION_PREPREPARE_SECS + + def is_pre_prepare_time_acceptable(self, pp: PrePrepare) -> bool: + """ + Returns True or False depending on the whether the time in PRE-PREPARE + is acceptable. Can return True if time is not acceptable but sufficient + PREPAREs are found to support the PRE-PREPARE + :param pp: + :return: + """ + correct = self.is_pre_prepare_time_correct(pp) + if not correct: + logger.error('{} found {} to have incorrect time.'.format(self, pp)) + key = (pp.viewNo, pp.ppSeqNo) + if key in self.pre_prepares_stashed_for_incorrect_time and \ + self.pre_prepares_stashed_for_incorrect_time[key][-1]: + logger.info('{} marking time as correct for {}'.format(self, pp)) + correct = True + return correct + + def _process_stashed_pre_prepare_for_time_if_possible(self, + key: Tuple[int, int]): + """ + Check if any PRE-PREPAREs that were stashed since their time was not + acceptable, can now be accepted since enough PREPAREs are received + """ + logger.debug('{} going to process stashed PRE-PREPAREs with ' + 'incorrect times'.format(self)) + q = self.quorums.f + if len(self.preparesWaitingForPrePrepare[key]) > q: + times = [pr.ppTime for (pr, _) in + self.preparesWaitingForPrePrepare[key]] + most_common_time = mostCommonElement(times) + if self.quorums.timestamp.is_reached(times.count(most_common_time)): + logger.debug('{} found sufficient PREPAREs for the ' + 'PRE-PREPARE{}'.format(self, key)) + stashed_pp = self.pre_prepares_stashed_for_incorrect_time + pp, sender, done = stashed_pp[key] + if done: + logger.debug('{} already processed PRE-PREPARE{}'.format(self, key)) + return True + # True is set since that will indicate to `is_pre_prepare_time_acceptable` + # that sufficient PREPAREs are received + stashed_pp[key] = (pp, sender, True) + self.processPrePrepare(pp, sender) + return True + return False + + # @property + # def threePhaseState(self): + # # TODO: This method is incomplete + # # Gets the current stable and unstable checkpoints and creates digest + # # of unstable checkpoints + # if self.checkpoints: + # pass + # else: + # state = [] + # return ThreePCState(self.instId, state) def process3PhaseState(self, msg: ThreePCState, sender: str): # TODO: This is not complete @@ -1634,29 +2058,59 @@ def send(self, msg, stat=None) -> None: self.stats.inc(stat) self.outBox.append(msg) - def revert_unordered_batches(self, ledger_id): + def revert_unordered_batches(self): + i = 0 for key in sorted(self.batches.keys(), reverse=True): - if key > self.lastOrderedPPSeqNo: - count, _, prevStateRoot = self.batches.pop(key) + if compare_3PC_keys(self.last_ordered_3pc, key) > 0: + ledger_id, count, _, prevStateRoot = self.batches.pop(key) self.revert(ledger_id, prevStateRoot, count) + i += 1 else: break + return i - def caught_up_till_pp_seq_no(self, last_caught_up_pp_seq_no): - self.addToOrdered(self.viewNo, last_caught_up_pp_seq_no) - self._remove_till_caught_up_pp_seq_no(last_caught_up_pp_seq_no) + def caught_up_till_3pc(self, last_caught_up_3PC): + self.last_ordered_3pc = last_caught_up_3PC + self._remove_till_caught_up_3pc(last_caught_up_3PC) + self._remove_ordered_from_queue(last_caught_up_3PC) - def _remove_till_caught_up_pp_seq_no(self, last_caught_up_pp_seq_no): + def _remove_till_caught_up_3pc(self, last_caught_up_3PC): outdated_pre_prepares = set() - outdated_ledger_ids = set() for key, pp in self.prePrepares.items(): - if (key[1] <= last_caught_up_pp_seq_no): - outdated_pre_prepares.add((pp.viewNo, pp.ppSeqNo)) - outdated_ledger_ids.add(pp.ledgerId) - self.prePrepares.pop(key, None) - self.ordered.add((pp.viewNo, pp.ppSeqNo)) - - for key in sorted(list(outdated_pre_prepares), key=itemgetter(1), reverse=True): - self.batches.pop(key[1], None) + if compare_3PC_keys(key, last_caught_up_3PC) > 0: + outdated_pre_prepares.add(key) + + logger.debug('{} going to remove messages for {} 3PC keys'. + format(self, len(outdated_pre_prepares))) + + for key in outdated_pre_prepares: + self.batches.pop(key, None) self.sentPrePrepares.pop(key, None) - self.prepares.pop(key, None) \ No newline at end of file + self.prePrepares.pop(key, None) + self.prepares.pop(key, None) + self.commits.pop(key, None) + + def _remove_ordered_from_queue(self, last_caught_up_3PC=None): + """ + Remove any Ordered that the replica might be sending to node which is + less than or equal to `last_caught_up_3PC` if `last_caught_up_3PC` is + passed else remove all ordered, needed in catchup + """ + to_remove = [] + for i, msg in enumerate(self.outBox): + if isinstance(msg, Ordered) and (not last_caught_up_3PC or + compare_3PC_keys( + (msg.viewNo, msg.ppSeqNo), + last_caught_up_3PC) >= 0): + to_remove.append(i) + + logger.debug('{} going to remove {} Ordered messages from outbox'. + format(self, len(to_remove))) + + # Removing Ordered from queue but returning `Ordered` in order that + # they should be processed. + removed = [] + for i in reversed(to_remove): + removed.insert(0, self.outBox[i]) + del self.outBox[i] + return removed diff --git a/plenum/server/req_handler.py b/plenum/server/req_handler.py index 473471e208..a342eeb61f 100644 --- a/plenum/server/req_handler.py +++ b/plenum/server/req_handler.py @@ -1,6 +1,7 @@ -from binascii import unhexlify from typing import List +import base58 + from plenum.common.ledger import Ledger from plenum.common.request import Request from plenum.persistence.util import txnsWithSeqNo @@ -24,11 +25,11 @@ def __init__(self, ledger: Ledger, state: State): def validate(self, req: Request, config=None): """ - Validates request. Raises exception if requiest is invalid. + Validates request. Raises exception if request is invalid. """ pass - def apply(self, req: Request): + def apply(self, req: Request, cons_time: int): """ Applies request """ @@ -53,8 +54,7 @@ def commit(self, txnCount, stateRoot, txnRoot) -> List: (seqNoStart, seqNoEnd), committedTxns = \ self.ledger.commitTxns(txnCount) - stateRoot = unhexlify(stateRoot.encode()) - txnRoot = self.ledger.hashToStr(unhexlify(txnRoot.encode())) + stateRoot = base58.b58decode(stateRoot.encode()) # Probably the following assertion fail should trigger catchup assert self.ledger.root_hash == txnRoot, '{} {}'.format( self.ledger.root_hash, txnRoot) diff --git a/plenum/server/router.py b/plenum/server/router.py index febfdc7737..9814c3a5d3 100644 --- a/plenum/server/router.py +++ b/plenum/server/router.py @@ -1,8 +1,10 @@ from collections import deque, OrderedDict from inspect import isawaitable -from typing import Callable, Any, NamedTuple, Union +from typing import Callable, Any, NamedTuple, Union, Iterable from typing import Tuple +Route = Tuple[Union[type, NamedTuple], Callable] + class Router: """ @@ -15,7 +17,7 @@ class Router: (2) a function that handles the message """ - def __init__(self, *routes: Tuple[Union[type, NamedTuple], Callable]): + def __init__(self, *routes: Route): """ Create a new router with a list of routes @@ -25,6 +27,18 @@ def __init__(self, *routes: Tuple[Union[type, NamedTuple], Callable]): """ self.routes = OrderedDict(routes) + def add(self, route: Route): + k, v = route + self.routes[k] = v + + def extend(self, routes: Iterable[Route]): + for r in routes: + self.add(r) + + def remove(self, routes: Iterable[Route]): + for k in routes: + self.routes.pop(k, None) + def getFunc(self, o: Any) -> Callable: """ Get the next function from the list of routes that is capable of diff --git a/plenum/server/suspicion_codes.py b/plenum/server/suspicion_codes.py index d4e63dc7d0..8e866ba894 100644 --- a/plenum/server/suspicion_codes.py +++ b/plenum/server/suspicion_codes.py @@ -42,27 +42,29 @@ class Suspicions: Suspicion(16, "REELECTION request already received") WRONG_PPSEQ_NO = \ Suspicion(17, "Wrong PRE-PREPARE seq number") - PR_TIME_WRONG = \ - Suspicion(5, "PREPARE time does not match with PRE-PREPARE") + PPR_TIME_WRONG = \ + Suspicion(18, "PRE-PREPARE time not acceptable") CM_TIME_WRONG = \ - Suspicion(5, "COMMIT time does not match with PRE-PREPARE") + Suspicion(19, "COMMIT time does not match with PRE-PREPARE") PPR_REJECT_WRONG = \ - Suspicion(16, "Pre-Prepare message has incorrect reject") + Suspicion(20, "Pre-Prepare message has incorrect reject") PPR_STATE_WRONG = \ - Suspicion(17, "Pre-Prepare message has incorrect state trie root") + Suspicion(21, "Pre-Prepare message has incorrect state trie root") PPR_TXN_WRONG = \ - Suspicion(18, "Pre-Prepare message has incorrect transaction tree root") + Suspicion(22, "Pre-Prepare message has incorrect transaction tree root") PR_STATE_WRONG = \ - Suspicion(19, "Prepare message has incorrect state trie root") + Suspicion(23, "Prepare message has incorrect state trie root") PR_TXN_WRONG = \ - Suspicion(20, "Prepare message has incorrect transaction tree root") - PRIMARY_DEGRADED = Suspicion(21, 'Primary of master protocol instance ' + Suspicion(24, "Prepare message has incorrect transaction tree root") + PRIMARY_DEGRADED = Suspicion(25, 'Primary of master protocol instance ' 'degraded the performance') - PRIMARY_DISCONNECTED = Suspicion(22, 'Primary of master protocol instance ' + PRIMARY_DISCONNECTED = Suspicion(26, 'Primary of master protocol instance ' 'disconnected') - PRIMARY_ABOUT_TO_BE_DISCONNECTED = Suspicion(23, 'Primary of master ' + PRIMARY_ABOUT_TO_BE_DISCONNECTED = Suspicion(27, 'Primary of master ' 'protocol instance ' 'about to be disconnected') + INSTANCE_CHANGE_TIMEOUT = Suspicion(28, 'View change could not complete ' + 'in time') @classmethod def get_list(cls): diff --git a/plenum/server/view_change/__init__.py b/plenum/server/view_change/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/plenum/server/view_change/view_change_msg_filter.py b/plenum/server/view_change/view_change_msg_filter.py new file mode 100644 index 0000000000..afce0d4c60 --- /dev/null +++ b/plenum/server/view_change/view_change_msg_filter.py @@ -0,0 +1,38 @@ +from abc import abstractmethod +from typing import Optional + +from plenum.common.request import Request +from plenum.common.messages.node_messages import * +from plenum.server.msg_filter import MessageFilter + + +class ViewChangeMessageFilter(MessageFilter): + + NAME = "ViewChangeMessageFilter" + + def __init__(self, view_no): + self.__view_no = view_no + + def filter_node_to_node(self, msg) -> Optional[str]: + if self.__is_next_view_3pc_msg(msg): + return 'A message for the next view' + + return None + + def filter_client_to_node(self, req) -> Optional[str]: + if isinstance(req, Request): + return 'Can not process requests when view change is in progress' + + return None + + def __is_next_view_3pc_msg(self, msg): + msgs_3pc = [PrePrepare, + Prepare, + Commit] + + for msg_3pc in msgs_3pc: + if isinstance(msg, msg_3pc) and\ + msg.viewNo > self.__view_no: + return True + + return False diff --git a/plenum/test/README.md b/plenum/test/README.md index 3981d34d20..cffe426845 100644 --- a/plenum/test/README.md +++ b/plenum/test/README.md @@ -80,7 +80,7 @@ be able to monitor the master instance. For that purpose, each node runs a monitoring module that computes the throughput of the f +1 protocol instances. -If 2f +1 nodes observe that the +If n-f nodes observe that the ratio between the performance of the master instance and the best backup instance is lower than a given threshold, then the primary of the master instance is considered to be malicious, @@ -223,14 +223,14 @@ only to him, in order to boost the performance of the protocol instance of the malicious primary at the expense of the other protocol instances. -Following the reception of 2f matching +Following the reception of n-f-1 matching PREPARE messages from distinct replicas of the same protocol instance that are consistent with a PRE-PREPARE message, a replica r sends a commit message ~µr that is authenticated with a MAC authenticator (Step 5 in the figure). -After the reception of 2f + 1 matching COMMIT +After the reception of n-f matching COMMIT messages from distinct replicas of the same protocol instance, a replica gives back the ordered request to the node it is running on. @@ -259,7 +259,7 @@ monitoring mechanism works as follows. Each node keeps a counter nbreqs-i for each protocol instance i, which cor- responds to the number of requests that have been ordered by the replica of the corresponding instance (i.e. for which -2f + 1 COMMIT messages have been collected). +n-f COMMIT messages have been collected). Periodically, the node uses these counters to compute the throughput of @@ -339,7 +339,7 @@ an INSTANCE CHANGE message. It does so only if it also observes too much difference between the performance of the replicas. -Upon the reception of 2f + 1 valid and matching +Upon the reception of n-f valid and matching INSTANCE CHANGE messages, the node increments cpi and initiates a view change on every protocol instance that runs locally. diff --git a/plenum/test/__init__.py b/plenum/test/__init__.py index c324fc4e17..ba91b26b6e 100644 --- a/plenum/test/__init__.py +++ b/plenum/test/__init__.py @@ -1,8 +1,14 @@ # -*- coding: utf-8 -*- +# Setting this to True will not apply spies on any method. This will lead to +# failure of lot of tests. Set this to True only for benchmarking +NO_SPIES = False + + def run(): import pytest pytest.main() + if __name__ == "__main__": run() diff --git a/plenum/test/batching_3pc/catch-up/test_3pc_paused_during_catch_up.py b/plenum/test/batching_3pc/catch-up/test_3pc_paused_during_catch_up.py index dedc6022f5..f18f2491c1 100644 --- a/plenum/test/batching_3pc/catch-up/test_3pc_paused_during_catch_up.py +++ b/plenum/test/batching_3pc/catch-up/test_3pc_paused_during_catch_up.py @@ -1,4 +1,4 @@ -from plenum.common.types import Prepare +from plenum.common.messages.node_messages import Prepare from plenum.test.batching_3pc.helper import make_node_syncing, fail_on_execute_batch_on_master from plenum.test.helper import sendRandomRequests from plenum.test.test_node import getNonPrimaryReplicas diff --git a/plenum/test/batching_3pc/catch-up/test_catchup_during_3pc.py b/plenum/test/batching_3pc/catch-up/test_catchup_during_3pc.py index 2f0f94ed9e..eeb021de84 100644 --- a/plenum/test/batching_3pc/catch-up/test_catchup_during_3pc.py +++ b/plenum/test/batching_3pc/catch-up/test_catchup_during_3pc.py @@ -18,6 +18,7 @@ def reset(): request.addfinalizer(reset) return tconf + def test_catchup_during_3pc(tconf, looper, txnPoolNodeSet, client, wallet1): reqs = signed_random_requests(wallet1, tconf.Max3PCBatchSize) diff --git a/plenum/test/batching_3pc/catch-up/test_state_reverted_before_catchup.py b/plenum/test/batching_3pc/catch-up/test_state_reverted_before_catchup.py index b8c1fc8a00..d23751f03a 100644 --- a/plenum/test/batching_3pc/catch-up/test_state_reverted_before_catchup.py +++ b/plenum/test/batching_3pc/catch-up/test_state_reverted_before_catchup.py @@ -58,7 +58,8 @@ def test_unordered_state_reverted_before_catchup(tconf, looper, txnPoolNodeSet, # CHECK - # check that initial uncommitted state differs from the state during 3PC but committed does not + # check that initial uncommitted state differs from the state during 3PC + # but committed does not assert committed_ledger_before == committed_ledger_during_3pc assert uncommitted_ledger_before != uncommitted_ledger_during_3pc assert committed_state_before == committed_state_during_3pc diff --git a/plenum/test/batching_3pc/conftest.py b/plenum/test/batching_3pc/conftest.py index 89a0d1c661..7b41e2158f 100644 --- a/plenum/test/batching_3pc/conftest.py +++ b/plenum/test/batching_3pc/conftest.py @@ -1,4 +1,6 @@ import pytest + +from plenum.test.conftest import getValueFromModule from plenum.test.pool_transactions.conftest import looper, clientAndWallet1, \ client1, wallet1, client1Connected @@ -6,7 +8,7 @@ @pytest.fixture(scope="module") def tconf(tconf, request): oldSize = tconf.Max3PCBatchSize - tconf.Max3PCBatchSize = 10 + tconf.Max3PCBatchSize = getValueFromModule(request, "Max3PCBatchSize", 10) def reset(): tconf.Max3PCBatchSize = oldSize diff --git a/plenum/test/batching_3pc/helper.py b/plenum/test/batching_3pc/helper.py index 45e6517ddd..2f457558ce 100644 --- a/plenum/test/batching_3pc/helper.py +++ b/plenum/test/batching_3pc/helper.py @@ -4,7 +4,7 @@ from plenum.common.constants import DOMAIN_LEDGER_ID from plenum.common.startable import Mode from plenum.common.txn_util import reqToTxn -from plenum.common.types import ThreePhaseType +from plenum.common.messages.node_messages import * from plenum.test.helper import waitForSufficientRepliesForRequests, send_signed_requests @@ -21,7 +21,7 @@ def addRoot(root, collection): if checkLastOrderedPpSeqNo: ppSeqNos = set() for node in nodes: - ppSeqNos.add(node.replicas[0].lastOrderedPPSeqNo) + ppSeqNos.add(node.replicas[0].last_ordered_3pc) assert len(ppSeqNos) == 1 @@ -78,9 +78,10 @@ def tryOrderAndAddTxns(self, commit): ledgerInfo = ledger_manager.getLedgerInfoByType(ledger_id) ledger_manager.preCatchupClbk(ledger_id) + pp = self.getPrePrepare(commit.viewNo, commit.ppSeqNo) for req in reqs: - ledger_manager._add_txn(ledger_id, ledger, ledgerInfo, reqToTxn(req)) - ledger_manager.catchupCompleted(DOMAIN_LEDGER_ID, commit.ppSeqNo) + ledger_manager._add_txn(ledger_id, ledger, ledgerInfo, reqToTxn(req, pp.ppTime)) + ledger_manager.catchupCompleted(DOMAIN_LEDGER_ID, (node.viewNo, commit.ppSeqNo)) added = True @@ -88,6 +89,7 @@ def tryOrderAndAddTxns(self, commit): replica.tryOrder = types.MethodType(tryOrderAndAddTxns, replica) + def start_precatchup_before_order(replica): called = False origMethod = replica.tryOrder @@ -105,6 +107,7 @@ def tryOrderAndAddTxns(self, commit): replica.tryOrder = types.MethodType(tryOrderAndAddTxns, replica) + def make_node_syncing(replica, three_phase_type: ThreePhaseType): added = False diff --git a/plenum/test/batching_3pc/test_batch_rejection.py b/plenum/test/batching_3pc/test_batch_rejection.py index 9e63c95054..9145fc3750 100644 --- a/plenum/test/batching_3pc/test_batch_rejection.py +++ b/plenum/test/batching_3pc/test_batch_rejection.py @@ -19,7 +19,7 @@ def setup(tconf, looper, txnPoolNodeSet, client, wallet1): reqs = sendRandomRequests(wallet1, client, tconf.Max3PCBatchSize) waitForSufficientRepliesForRequests(looper, client, requests=reqs, customTimeoutPerReq=tconf.Max3PCBatchWait) - stateRoot = pr.stateRootHash(DOMAIN_LEDGER_ID, toHex=False) + stateRoot = pr.stateRootHash(DOMAIN_LEDGER_ID, to_str=False) origMethod = pr.create3PCBatch malignedOnce = None @@ -43,7 +43,7 @@ def reverted(setup, looper): def chkStateRoot(root): for r in [pr]+otherR: - r.stateRootHash(DOMAIN_LEDGER_ID, toHex=False) == root + r.stateRootHash(DOMAIN_LEDGER_ID, to_str=False) == root looper.run(eventually(chkStateRoot, oldStateRoot)) diff --git a/plenum/test/batching_3pc/test_batching_scenarios.py b/plenum/test/batching_3pc/test_batching_scenarios.py index b318986b97..f0e4fd702f 100644 --- a/plenum/test/batching_3pc/test_batching_scenarios.py +++ b/plenum/test/batching_3pc/test_batching_scenarios.py @@ -1,5 +1,5 @@ from stp_core.common.log import getlogger -from plenum.common.types import PrePrepare +from plenum.common.messages.node_messages import PrePrepare from plenum.test.batching_3pc.helper import checkNodesHaveSameRoots from plenum.test.helper import sendRandomRequests, \ waitForSufficientRepliesForRequests diff --git a/plenum/test/blacklist/test_blacklist_node_on_multiple_nominations.py b/plenum/test/blacklist/test_blacklist_node_on_multiple_nominations.py index 320fe9051f..e442282cf5 100644 --- a/plenum/test/blacklist/test_blacklist_node_on_multiple_nominations.py +++ b/plenum/test/blacklist/test_blacklist_node_on_multiple_nominations.py @@ -1,7 +1,7 @@ import pytest from stp_core.loop.eventually import eventually -from plenum.common.types import Nomination +from plenum.common.messages.node_messages import Nomination from plenum.test import waits whitelist = ['already got nomination', diff --git a/plenum/test/blacklist/test_blacklist_node_on_multiple_primary_declarations.py b/plenum/test/blacklist/test_blacklist_node_on_multiple_primary_declarations.py index 6af10e63b7..db2ae44244 100644 --- a/plenum/test/blacklist/test_blacklist_node_on_multiple_primary_declarations.py +++ b/plenum/test/blacklist/test_blacklist_node_on_multiple_primary_declarations.py @@ -1,7 +1,7 @@ import pytest from stp_core.loop.eventually import eventually -from plenum.common.types import Primary +from plenum.common.messages.node_messages import Primary from plenum.test import waits whitelist = ['got primary declaration', diff --git a/plenum/test/checkpoints/conftest.py b/plenum/test/checkpoints/conftest.py index 749d1409f8..67f6501506 100644 --- a/plenum/test/checkpoints/conftest.py +++ b/plenum/test/checkpoints/conftest.py @@ -1,5 +1,6 @@ import pytest +from plenum.test.conftest import getValueFromModule from plenum.test.pool_transactions.conftest import looper, clientAndWallet1, \ client1, wallet1, client1Connected from plenum.test.batching_3pc.conftest import tconf @@ -10,7 +11,7 @@ def chkFreqPatched(tconf, request): oldChkFreq = tconf.CHK_FREQ oldLogSize = tconf.LOG_SIZE - tconf.CHK_FREQ = 2 + tconf.CHK_FREQ = getValueFromModule(request, "CHK_FREQ", 2) tconf.LOG_SIZE = 2*tconf.CHK_FREQ def reset(): diff --git a/plenum/test/checkpoints/helper.py b/plenum/test/checkpoints/helper.py index d9e9ddc01d..c32d0167f5 100644 --- a/plenum/test/checkpoints/helper.py +++ b/plenum/test/checkpoints/helper.py @@ -1,3 +1,6 @@ +from plenum.test.helper import assertEquality + + def chkChkpoints(nodes, total: int, stableIndex: int=None): for node in nodes: for r in node.replicas: @@ -8,3 +11,12 @@ def chkChkpoints(nodes, total: int, stableIndex: int=None): else: for state in r.checkpoints.values(): assert not state.isStable + + +def checkRequestCounts(nodes, req_count, cons_count, batches_count): + for node in nodes: + assertEquality(len(node.requests), req_count) + for r in node.replicas: + assertEquality(len(r.commits), cons_count) + assertEquality(len(r.prepares), cons_count) + assertEquality(len(r.batches), batches_count) diff --git a/plenum/test/checkpoints/test_basic_checkpointing.py b/plenum/test/checkpoints/test_basic_checkpointing.py index fad4eb4b8f..adf51e7ded 100644 --- a/plenum/test/checkpoints/test_basic_checkpointing.py +++ b/plenum/test/checkpoints/test_basic_checkpointing.py @@ -32,10 +32,13 @@ def testOldCheckpointDeleted(chkFreqPatched, looper, txnPoolNodeSet, client1, Send requests more than twice of `CHK_FREQ`, there should be one new stable checkpoint on each replica. The old stable checkpoint should be removed """ - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 2*reqs_for_checkpoint, - 1) + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, + numReqs=2*reqs_for_checkpoint, + fVal=1) - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1, 1) + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, + numReqs=1, + fVal=1) timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) looper.run(eventually(chkChkpoints, txnPoolNodeSet, 2, 0, retryWait=1, timeout=timeout)) diff --git a/plenum/test/checkpoints/test_discard_old_checkpoint_messages.py b/plenum/test/checkpoints/test_discard_old_checkpoint_messages.py index 6bfd9d0441..6401b84be1 100644 --- a/plenum/test/checkpoints/test_discard_old_checkpoint_messages.py +++ b/plenum/test/checkpoints/test_discard_old_checkpoint_messages.py @@ -1,5 +1,5 @@ from stp_core.loop.eventually import eventually -from plenum.common.types import Checkpoint +from plenum.common.messages.node_messages import Checkpoint from plenum.test.checkpoints.helper import chkChkpoints from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, \ checkDiscardMsg diff --git a/plenum/test/checkpoints/test_message_outside_watermark.py b/plenum/test/checkpoints/test_message_outside_watermark.py index c2a0b87802..79a3efc5ba 100644 --- a/plenum/test/checkpoints/test_message_outside_watermark.py +++ b/plenum/test/checkpoints/test_message_outside_watermark.py @@ -1,5 +1,5 @@ from plenum.test import waits -from plenum.test.delayers import ppDelay +from plenum.test.delayers import ppDelay, pDelay from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, \ countDiscarded from plenum.test.node_catchup.helper import checkNodeDataForEquality @@ -12,9 +12,9 @@ def testNonPrimaryRecvs3PhaseMessageOutsideWatermarks(chkFreqPatched, looper, wallet1, client1Connected, reqs_for_logsize): """ - A node is slow in processing PRE-PREPAREs such that lot of requests happen - and the slow node has started getting 3 phase messages outside of it - watermarks. Check that it queues up requests outside watermarks and once it + A node is slow in processing PRE-PREPAREs and PREPAREs such that lot of + requests happen and the slow node has started getting 3 phase messages + outside of it watermarks. Check that it queues up requests outside watermarks and once it has received stable checkpoint it processes more requests. It sends other nodes 3 phase messages older than their stable checkpoint so they should discard them. @@ -26,6 +26,7 @@ def testNonPrimaryRecvs3PhaseMessageOutsideWatermarks(chkFreqPatched, looper, slowReplica = npr[0] slowNode = slowReplica.node slowNode.nodeIbStasher.delay(ppDelay(delay, instId)) + slowNode.nodeIbStasher.delay(pDelay(delay, instId)) def discardCounts(replicas, pat): counts = {} diff --git a/plenum/test/checkpoints/test_message_outside_watermark1.py b/plenum/test/checkpoints/test_message_outside_watermark1.py index ea1b2f695d..e144b82a6b 100644 --- a/plenum/test/checkpoints/test_message_outside_watermark1.py +++ b/plenum/test/checkpoints/test_message_outside_watermark1.py @@ -1,9 +1,12 @@ import math +import pytest + +from stp_core.common.log import getlogger from stp_core.loop.eventually import eventually from plenum.test import waits -from plenum.test.delayers import ppDelay +from plenum.test.delayers import ppDelay, pDelay from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies from plenum.test.test_node import getNonPrimaryReplicas, getPrimaryReplica @@ -11,6 +14,22 @@ TestRunningTimeLimitSec = 300 +logger = getlogger() + + +@pytest.fixture(scope="module") +def tconf(tconf, request): + # Delaying perf check as the test sends a lot of requests with delays + old_freq = tconf.PerfCheckFreq + tconf.PerfCheckFreq = 30 + + def reset(): + tconf.PerfCheckFreq = old_freq + + request.addfinalizer(reset) + return tconf + + def testPrimaryRecvs3PhaseMessageOutsideWatermarks(tconf, chkFreqPatched, looper, txnPoolNodeSet, client1, wallet1, client1Connected, @@ -24,7 +43,9 @@ def testPrimaryRecvs3PhaseMessageOutsideWatermarks(tconf, chkFreqPatched, looper """ delay = 3 instId = 1 - reqsToSend = 2*reqs_for_logsize + 1 + reqs_to_send = 2*reqs_for_logsize + 1 + logger.debug('Will send {} requests'.format(reqs_to_send)) + npr = getNonPrimaryReplicas(txnPoolNodeSet, instId) pr = getPrimaryReplica(txnPoolNodeSet, instId) from plenum.server.replica import TPCStat @@ -32,15 +53,14 @@ def testPrimaryRecvs3PhaseMessageOutsideWatermarks(tconf, chkFreqPatched, looper for r in npr: r.node.nodeIbStasher.delay(ppDelay(delay, instId)) + r.node.nodeIbStasher.delay(pDelay(delay, instId)) tm_exec_1_batch = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) - batch_count = math.ceil(reqsToSend / tconf.Max3PCBatchSize) + batch_count = math.ceil(reqs_to_send / tconf.Max3PCBatchSize) total_timeout = (tm_exec_1_batch + delay) * batch_count def chk(): assert orderedCount + batch_count == pr.stats.get(TPCStat.OrderSent) - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, reqsToSend, - 1, override_timeout_limit=True, - total_timeout=total_timeout) - looper.run(eventually(chk, retryWait=1, timeout=3)) + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, reqs_to_send) + looper.run(eventually(chk, retryWait=1, timeout=total_timeout)) diff --git a/plenum/test/checkpoints/test_stable_checkpoint.py b/plenum/test/checkpoints/test_stable_checkpoint.py index c919ecbf93..da399eaa5b 100644 --- a/plenum/test/checkpoints/test_stable_checkpoint.py +++ b/plenum/test/checkpoints/test_stable_checkpoint.py @@ -1,30 +1,24 @@ -from stp_core.loop.eventually import eventually from plenum.test import waits -from plenum.test.checkpoints.helper import chkChkpoints +from plenum.test.checkpoints.helper import chkChkpoints, checkRequestCounts from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies - - -def checkRequestCounts(nodes, req_count, cons_count, batches_count): - for node in nodes: - assert len(node.requests) == req_count - for r in node.replicas: - assert len(r.commits) == cons_count - assert len(r.prepares) == cons_count - assert len(r.batches) == batches_count +from stp_core.loop.eventually import eventually def testRequestOlderThanStableCheckpointRemoved(chkFreqPatched, looper, txnPoolNodeSet, client1, wallet1, client1Connected, reqs_for_checkpoint): + max_batch_size = chkFreqPatched.Max3PCBatchSize + chk_freq = chkFreqPatched.CHK_FREQ reqs = sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, - reqs_for_checkpoint - (chkFreqPatched.Max3PCBatchSize), 1) + reqs_for_checkpoint - max_batch_size, + 1) timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, retryWait=1, timeout=timeout)) - checkRequestCounts(txnPoolNodeSet, len(reqs), chkFreqPatched.CHK_FREQ-1, 1) + checkRequestCounts(txnPoolNodeSet, len(reqs), chk_freq - 1, 1) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, - chkFreqPatched.Max3PCBatchSize, 1) + max_batch_size, 1) looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, 0, retryWait=1, timeout=timeout)) diff --git a/plenum/test/checkpoints/test_view_change_after_checkpoint.py b/plenum/test/checkpoints/test_view_change_after_checkpoint.py new file mode 100644 index 0000000000..2f1b7d02e2 --- /dev/null +++ b/plenum/test/checkpoints/test_view_change_after_checkpoint.py @@ -0,0 +1,83 @@ +import pytest + +from plenum.test.checkpoints.helper import checkRequestCounts +from plenum.test.helper import send_reqs_batches_and_get_suff_replies +from plenum.test.node_catchup.helper import ensure_all_nodes_have_same_data +from plenum.test.test_node import ensureElectionsDone +from plenum.test.view_change.helper import ensure_view_change +from stp_core.loop.eventually import eventually + +CHK_FREQ = 5 + + +@pytest.fixture(scope='function', params=['greater_than_checkpoint', + 'lesser_than_checkpoint', + 'equal_to_checkpoint']) +def sent_batches(request, chkFreqPatched): + # Test with number of sent batches greater than checkpoint, + # lesser than checkpoint and equal to checkpont. + if request.param == 'greater_than_checkpoint': + return CHK_FREQ + 2 + if request.param == 'lesser_than_checkpoint': + return CHK_FREQ - 2 + if request.param == 'equal_to_checkpoint': + return CHK_FREQ + + +def test_checkpoint_across_views(sent_batches, chkFreqPatched, looper, + txnPoolNodeSet, client1, wallet1, + client1Connected): + """ + Test checkpointing across views. + This test checks that checkpointing and garbage collection works correctly + no matter if view change happened before a checkpoint or after a checkpoint + """ + batch_size = 2 + send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, + batch_size*sent_batches, + sent_batches) + + # Check that correct garbage collection happens + non_gced_batch_count = (sent_batches - CHK_FREQ) if sent_batches >= CHK_FREQ else sent_batches + looper.run(eventually(checkRequestCounts, txnPoolNodeSet, + batch_size*non_gced_batch_count, non_gced_batch_count, + non_gced_batch_count, + retryWait=1)) + + ensure_view_change(looper, txnPoolNodeSet) + ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) + ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) + + # Check that after view change, proper clean up is done + for node in txnPoolNodeSet: + for r in node.replicas: + assert not r.checkpoints + # No stashed checkpoint for previous view + assert not [view_no for view_no in r.stashedRecvdCheckpoints if view_no < r.viewNo] + assert r._h == 0 + assert r._lastPrePrepareSeqNo == 0 + assert r.h == 0 + assert r.H == r._h + chkFreqPatched.LOG_SIZE + + checkRequestCounts(txnPoolNodeSet, 0, 0, 0) + + # Even after view change, chekpointing works + send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, + batch_size*sent_batches, + sent_batches) + + looper.run(eventually(checkRequestCounts, txnPoolNodeSet, + batch_size * non_gced_batch_count, + non_gced_batch_count, + non_gced_batch_count, + retryWait=1)) + + # Send more batches so one more checkpoint happens. This is done so that + # when this test finishes, all requests are garbage collected and the + # next run of this test (with next param) has the calculations correct + more = CHK_FREQ - non_gced_batch_count + send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, + batch_size * more, + more) + looper.run(eventually(checkRequestCounts, txnPoolNodeSet, 0, 0, 0, + retryWait=1)) diff --git a/plenum/test/cli/conftest.py b/plenum/test/cli/conftest.py index c7eabbebff..ee315a855c 100644 --- a/plenum/test/cli/conftest.py +++ b/plenum/test/cli/conftest.py @@ -7,7 +7,7 @@ from stp_core.loop.eventually import eventually from stp_core.loop.looper import Looper -from plenum.common.util import adict +from stp_core.common.util import adict from plenum.test.cli.helper import newCLI, waitAllNodesUp, loadPlugin, \ doByCtx from stp_core.network.port_dispenser import genHa diff --git a/plenum/test/cli/helper.py b/plenum/test/cli/helper.py index 34c30d37e4..09b8f66b06 100644 --- a/plenum/test/cli/helper.py +++ b/plenum/test/cli/helper.py @@ -1,5 +1,6 @@ import ast import os +import stat import re import traceback from tempfile import gettempdir, mkdtemp @@ -339,15 +340,21 @@ def newKeyPair(cli: TestCli, alias: str=None): idrs = set(cli.activeWallet.idsToSigners.keys()) checkCmdValid(cli, cmd) assert len(cli.activeWallet.idsToSigners.keys()) == len(idrs) + 1 - pubKey = set(cli.activeWallet.idsToSigners.keys()).difference(idrs).pop() + new_identifer = set(cli.activeWallet.idsToSigners.keys()).difference(idrs).pop() expected = ['Key created in keyring Default'] if alias: + idr = cli.activeWallet.aliasesToIds.get(alias) + verkey = cli.activeWallet.getVerkey(idr) expected.append('Identifier for key is {}'. - format(cli.activeWallet.aliasesToIds.get(alias))) + format(idr)) + expected.append('Verification key is {}'.format(verkey)) expected.append('Alias for identifier is {}'.format(alias)) else: - expected.append('Identifier for key is {}'.format(pubKey)) - expected.append('Current identifier set to {}'.format(alias or pubKey)) + expected.append('Identifier for key is {}'.format(new_identifer)) + verkey = cli.activeWallet.getVerkey(new_identifer) + expected.append('Verification key is {}'.format(verkey)) + + expected.append('Current identifier set to {}'.format(alias or new_identifer)) # TODO: Reconsider this # Using `in` rather than `=` so as to take care of the fact that this might @@ -357,11 +364,10 @@ def newKeyPair(cli: TestCli, alias: str=None): # the public key and alias are listed cli.enterCmd("list ids") - needle = alias if alias else pubKey + needle = alias if alias else new_identifer # assert cli.lastMsg().split("\n")[0] == alias if alias else pubKey assert needle in cli.lastCmdOutput - return pubKey - + return new_identifer pluginLoadedPat = re.compile("plugin [A-Za-z0-9_]+ successfully loaded from module") @@ -580,6 +586,10 @@ def checkWalletFilePersisted(filePath): assert os.path.exists(filePath) +def checkPermissions(path, mode): + assert stat.S_IMODE(os.stat(path).st_mode) == mode + + def checkWalletRestored(cli, expectedWalletKeyName, expectedIdentifiers): @@ -621,6 +631,15 @@ def useAndAssertKeyring(do, name, expectedName=None, expectedMsgs=None): ) +def saveAndAssertKeyring(do, name, expectedName=None, expectedMsgs=None): + keyringName = expectedName or name + finalExpectedMsgs = expectedMsgs or \ + ['Active keyring "{}" saved'.format(keyringName)] + do('save keyring'.format(name), + expect=finalExpectedMsgs + ) + + def exitFromCli(do): import pytest with pytest.raises(cli.Exit): diff --git a/plenum/test/cli/test_save_wallet.py b/plenum/test/cli/test_save_wallet.py new file mode 100644 index 0000000000..4573eaf48f --- /dev/null +++ b/plenum/test/cli/test_save_wallet.py @@ -0,0 +1,25 @@ +import pytest + +from plenum.common.util import getWalletFilePath +from plenum.test.cli.helper import createAndAssertNewCreation, \ + checkWalletFilePersisted, checkPermissions, saveAndAssertKeyring + + +def createNewKey(do, cli, keyringName): + createAndAssertNewCreation(do, cli, keyringName) + + +def testSaveWallet(do, be, cli): + be(cli) + assert cli._activeWallet is None + createNewKey(do, cli, keyringName="Default") + saveAndAssertKeyring(do, "Default") + filePath = getWalletFilePath( + cli.getContextBasedKeyringsBaseDir(), + cli.walletFileName) + + checkPermissions(cli.getKeyringsBaseDir(), cli.config.KEYRING_DIR_MODE) + checkPermissions(cli.getContextBasedKeyringsBaseDir(), + cli.config.KEYRING_DIR_MODE) + checkWalletFilePersisted(filePath) + checkPermissions(filePath, cli.config.KEYRING_FILE_MODE) diff --git a/plenum/test/client/test_client.py b/plenum/test/client/test_client.py index f42a4b5600..444148610d 100644 --- a/plenum/test/client/test_client.py +++ b/plenum/test/client/test_client.py @@ -5,7 +5,7 @@ from plenum.common.exceptions import EmptySignature from plenum.common.exceptions import NotConnectedToAny from stp_core.common.log import getlogger -from plenum.common.constants import OP_FIELD_NAME, REPLY, REQACK, TXN_ID +from plenum.common.constants import OP_FIELD_NAME, REPLY, REQACK from plenum.common.types import f from plenum.server.node import Node from plenum.test import waits @@ -168,7 +168,7 @@ def testReplyWhenRepliesFromExactlyFPlusOneNodesAreSame(looper, client1, wallet1): """ - When only :math:`2f+1` replies from the nodes are matching, the client + When only :math:`f+1` replies from the nodes are matching, the client would accept the reply """ request = sendRandomRequest(wallet1, client1) diff --git a/plenum/test/client/test_client_authn.py b/plenum/test/client/test_client_authn.py index 7c6f42851e..3576fe43fa 100644 --- a/plenum/test/client/test_client_authn.py +++ b/plenum/test/client/test_client_authn.py @@ -1,12 +1,16 @@ import pytest -from plenum.common.exceptions import InvalidSignature +from plenum.common.exceptions import InvalidSignature, CouldNotAuthenticate from plenum.common.signer_simple import SimpleSigner from plenum.server.client_authn import SimpleAuthNr idr = '5G72199XZB7wREviUbQma7' +msg_str = "42 (forty-two) is the natural number that succeeds 41 and precedes 43." +class DummyAuthenticator(SimpleAuthNr): + def getVerkey(self, _): + return None @pytest.fixture(scope="module") def cli(): @@ -22,7 +26,7 @@ def sa(cli): @pytest.fixture(scope="module") def msg(): - return dict(myMsg="42 (forty-two) is the natural number that succeeds 41 and precedes 43.") + return dict(myMsg=msg_str) @pytest.fixture(scope="module") @@ -30,6 +34,18 @@ def sig(cli, msg): return cli.sign(msg) +def test_authenticate_raises_correct_exception(): + msg = dict(myMsg=msg_str) + simple_signer = SimpleSigner() + identifier = simple_signer.identifier + signature = simple_signer.sign(msg) + verkey = simple_signer.verkey + dummyAr = DummyAuthenticator() + dummyAr.addIdr(identifier, verkey) + pytest.raises(CouldNotAuthenticate, dummyAr.authenticate, msg,identifier, signature) + + + def testClientAuthentication(sa, cli, msg, sig): sa.authenticate(msg, idr, sig) diff --git a/plenum/test/client/test_client_retry.py b/plenum/test/client/test_client_retry.py index b30eed9be6..fab4e54b81 100644 --- a/plenum/test/client/test_client_retry.py +++ b/plenum/test/client/test_client_retry.py @@ -6,7 +6,7 @@ from stp_core.loop.eventually import eventually, eventuallyAll from plenum.common.request import Request -from plenum.common.types import Reply, RequestNack +from plenum.common.messages.node_messages import RequestNack, Reply from plenum.test.helper import sendRandomRequest, checkReqAck, wait_for_replies from plenum.test import waits @@ -100,7 +100,7 @@ def testClientNotRetryRequestWhenReqnackReceived(looper, nodeSet, client1, origTrans = alpha.transmitToClient def nackReq(self, req, frm): - self.transmitToClient(RequestNack(*req.key, reason="testing"), frm) + self.transmitToClient(RequestNack(*req.key, "testing"), frm) def onlyTransNack(msg, remoteName): if not isinstance(msg, RequestNack): @@ -187,4 +187,4 @@ def dontTransmitReply(msg, remoteName): (totalResends + withFewerRetryReq.CLIENT_MAX_RETRY_REPLY) assert req.key not in client1.expectingAcksFor assert req.key not in client1.expectingRepliesFor - alpha.processRequest = origTrans + alpha.transmitToClient = origTrans diff --git a/plenum/test/common/test_parse_ledger.py b/plenum/test/common/test_parse_ledger.py new file mode 100644 index 0000000000..49923f18bb --- /dev/null +++ b/plenum/test/common/test_parse_ledger.py @@ -0,0 +1,38 @@ +import pytest +import base58 + +from ledger.compact_merkle_tree import CompactMerkleTree +from ledger.ledger import Ledger +from plenum.common.constants import TXN_TYPE, TARGET_NYM, DATA, NAME, ALIAS, SERVICES, VALIDATOR +from plenum.common.stack_manager import TxnStackManager + +whitelist = ['substring not found'] + +@pytest.fixture(scope="module") +def tdirWithLedger(tdir): + tree = CompactMerkleTree() + ledger = Ledger(CompactMerkleTree(), dataDir=tdir) + for d in range(3): + txn = { TXN_TYPE: '0', + TARGET_NYM: base58.b58encode(b'whatever'), + DATA: { + NAME: str(d), + ALIAS: 'test' + str(d), + SERVICES: [VALIDATOR], + } + } + if d == 1: + txn[TARGET_NYM] = "invalid====" + ledger.add(txn) + return ledger + +""" +Test that invalid base58 TARGET_NYM in pool_transaction raises the proper exception (INDY-150) +""" + +def test_parse_non_base58_txn_type_field_raises_descriptive_error(tdirWithLedger,tdir): + with pytest.raises(ValueError) as excinfo: + ledger = Ledger(CompactMerkleTree(), dataDir=tdir) + _, _, nodeKeys = TxnStackManager.parseLedgerForHaAndKeys(ledger) + assert("verkey" in str(excinfo.value)) + ledger.stop() diff --git a/plenum/test/common/test_signers.py b/plenum/test/common/test_signers.py new file mode 100644 index 0000000000..8dd4dc2dcf --- /dev/null +++ b/plenum/test/common/test_signers.py @@ -0,0 +1,30 @@ +import base58 + +from plenum.common.signer_did import DidSigner +from plenum.common.signer_simple import SimpleSigner +from plenum.common.util import friendlyToRaw + + +def test_signer_compatable(): + seed = b'Falcon00000000000000000000000000' + s_signer = SimpleSigner(seed=seed) + did_signer = DidSigner(seed=seed) + + assert friendlyToRaw(s_signer.identifier).startswith(friendlyToRaw(did_signer.identifier)) + + if did_signer.abbreviated: + verkey = friendlyToRaw(did_signer.identifier) + verkey += friendlyToRaw(did_signer.verkey[1:]) + else: + verkey = friendlyToRaw(did_signer.verkey) + assert friendlyToRaw(s_signer.verkey) == verkey + assert friendlyToRaw(s_signer.verkey) == friendlyToRaw(did_signer.full_verkey) + + +def test_compare_identities(): + cryptonym = 'BPtrqHo3WyjmTNpVchEhWxp3qfDdssdFUNoM8kmKoEWw' + did_id = 'L5AD5g65TDQr1PPHHRoiGf' + did_verkey = 'Bf9Z1tKWpcJAvKJVhZhvVZ' + + did_to_cryptonym = base58.b58encode(base58.b58decode(did_id) + base58.b58decode(did_verkey)) + assert cryptonym == did_to_cryptonym diff --git a/plenum/test/conftest.py b/plenum/test/conftest.py index e0a1ad2daa..63c781769a 100644 --- a/plenum/test/conftest.py +++ b/plenum/test/conftest.py @@ -161,7 +161,7 @@ def getValueFromModule(request, name: str, default: Any = None): "DefaultPluginPath": { PLUGIN_BASE_DIR_PATH: testPluginBaseDirPath, PLUGIN_TYPE_STATS_CONSUMER: "stats_consumer" - }, + } } @@ -212,7 +212,9 @@ def logcapture(request, whitelist, concerningLogLevels): 'discarding message (NOMINATE|PRIMARY)', '.+ rid .+ has been removed', 'last try...', - 'has uninitialised socket' + 'has uninitialised socket', + 'to have incorrect time', + 'time not acceptable' ] wlfunc = inspect.isfunction(whitelist) @@ -322,7 +324,6 @@ def ensureView(nodeSet, looper, up): """ Ensure that all the nodes in the nodeSet are in the same view. """ - return waitForViewChange(looper, nodeSet) diff --git a/plenum/test/delayers.py b/plenum/test/delayers.py index 351fc1935a..da87d663e9 100644 --- a/plenum/test/delayers.py +++ b/plenum/test/delayers.py @@ -1,9 +1,9 @@ import random -from typing import Iterable +from typing import Iterable, List -from plenum.common.types import f, Propagate, PrePrepare, \ - Prepare, Commit, InstanceChange, LedgerStatus, ConsistencyProof, CatchupReq, \ - Nomination, CatchupRep, Primary, Reelection +from plenum.common.request import Request +from plenum.common.types import f +from plenum.common.messages.node_messages import * from plenum.common.constants import OP_FIELD_NAME from plenum.common.util import getCallableName from plenum.test.test_client import TestClient @@ -41,6 +41,7 @@ def inner(wrappedMsg): getattr(msg, f.INST_ID.nm) == instFilter)): return seconds + inner.__name__ = opType.__name__ return inner @@ -86,7 +87,8 @@ def ppgDelay(delay: float, sender_filter: str=None): def ppDelay(delay: float, instId: int=None, sender_filter: str=None): # Delayer of PRE-PREPARE requests from a particular instance - return delayerMsgTuple(delay, PrePrepare, instFilter=instId, senderFilter=sender_filter) + return delayerMsgTuple(delay, PrePrepare, instFilter=instId, + senderFilter=sender_filter) def pDelay(delay: float, instId: int=None, sender_filter: str=None): @@ -104,6 +106,11 @@ def icDelay(delay: float): return delayerMsgTuple(delay, InstanceChange) +def vcd_delay(delay: float): + # Delayer of VIEW_CHANGE_DONE requests + return delayerMsgTuple(delay, ViewChangeDone) + + def lsDelay(delay: float): # Delayer of LEDGER_STATUSES requests return delayerMsgTuple(delay, LedgerStatus) @@ -124,6 +131,31 @@ def cr_delay(delay: float): return delayerMsgTuple(delay, CatchupRep) +def req_delay(delay: float): + # Delayer of Request requests + return delayerMsgTuple(delay, Request) + + +def msg_req_delay(delay: float, types_to_delay: List=None): + # Delayer of MessageReq messages + def specific_msgs(msg): + if isinstance(msg[0], MessageReq) and (not types_to_delay or + msg[0].msg_type in types_to_delay): + return delay + + return specific_msgs + + +def msg_rep_delay(delay: float, types_to_delay: List=None): + # Delayer of MessageRep messages + def specific_msgs(msg): + if isinstance(msg[0], MessageRep) and (not types_to_delay or + msg[0].msg_type in types_to_delay): + return delay + + return specific_msgs + + def delay(what, frm, to, howlong): from plenum.test.test_node import TestNode @@ -185,3 +217,8 @@ def delay_3pc_messages(nodes, inst_id, delay=None, min_delay=None, max_delay=None): # Delay 3 phase commit message delay_messages('3pc', nodes, inst_id, delay, min_delay, max_delay) + + +def reset_delays_and_process_delayeds(nodes): + for node in nodes: + node.reset_delays_and_process_delayeds() diff --git a/plenum/test/helper.py b/plenum/test/helper.py index 58d74a87ca..90dd2d08a7 100644 --- a/plenum/test/helper.py +++ b/plenum/test/helper.py @@ -2,6 +2,7 @@ import os import random import string +import time from _signal import SIGINT from functools import partial from itertools import permutations, combinations @@ -14,11 +15,10 @@ from plenum.client.client import Client from plenum.client.wallet import Wallet -from plenum.common.constants import REPLY, REQACK, REQNACK, REJECT, OP_FIELD_NAME from plenum.common.request import Request -from plenum.common.types import Reply, f, PrePrepare from plenum.common.util import getMaxFailures, \ - checkIfMoreThanFSameItems + checkIfMoreThanFSameItems, getNoInstances, get_utc_epoch +from plenum.common.messages.node_messages import * from plenum.config import poolTransactionsFile, domainTransactionsFile from plenum.server.node import Node from plenum.test import waits @@ -27,7 +27,7 @@ getAllReturnVals from plenum.test.test_client import TestClient, genTestClient from plenum.test.test_node import TestNode, TestReplica, TestNodeSet, \ - checkNodesConnected, ensureElectionsDone, NodeRef + checkNodesConnected, ensureElectionsDone, NodeRef, getPrimaryReplica from psutil import Popen from stp_core.common.log import getlogger from stp_core.loop.eventually import eventuallyAll, eventually @@ -156,11 +156,57 @@ def sendReqsToNodesAndVerifySuffReplies(looper: Looper, return requests +def send_reqs_to_nodes_and_verify_all_replies(looper: Looper, + wallet: Wallet, + client: TestClient, + numReqs: int, + customTimeoutPerReq: float=None, + add_delay_to_timeout: float=0, + override_timeout_limit=False, + total_timeout=None): + requests = sendRandomRequests(wallet, client, numReqs) + nodeCount = len(client.nodeReg) + # wait till more than nodeCount replies are received (that is all nodes answered) + waitForSufficientRepliesForRequests(looper, client, + requests=requests, + fVal=nodeCount - 1, + customTimeoutPerReq=customTimeoutPerReq, + add_delay_to_timeout=add_delay_to_timeout, + override_timeout_limit=override_timeout_limit, + total_timeout=total_timeout) + return requests + + +def send_reqs_batches_and_get_suff_replies(looper: Looper, + wallet: Wallet, + client: TestClient, + num_reqs: int, num_batches=1, **kwargs): + # This method assumes that `num_reqs` <= num_batches*MaxbatchSize + if num_batches == 1: + return sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, + num_reqs, **kwargs) + else: + requests = [] + for _ in range(num_batches-1): + requests.extend(sendReqsToNodesAndVerifySuffReplies(looper, wallet, + client, + num_reqs//num_batches, + **kwargs)) + rem = num_reqs % num_batches + if rem == 0: + rem = num_reqs // num_batches + requests.extend(sendReqsToNodesAndVerifySuffReplies(looper, wallet, + client, + rem, + **kwargs)) + return requests + + # noinspection PyIncorrectDocstring def checkResponseCorrectnessFromNodes(receivedMsgs: Iterable, reqId: int, fValue: int) -> bool: """ - the client must get at least :math:`2f+1` responses + the client must get at least :math:`f+1` responses """ msgs = [(msg[f.RESULT.nm][f.REQ_ID.nm], msg[f.RESULT.nm][f.IDENTIFIER.nm]) for msg in getRepliesFromClientInbox(receivedMsgs, reqId)] @@ -292,6 +338,7 @@ def sendRandomRequest(wallet: Wallet, client: Client): def sendRandomRequests(wallet: Wallet, client: Client, count: int): + logger.debug('Sending {} random requests'.format(count)) return send_signed_requests(client, signed_random_requests(wallet, count)) @@ -309,10 +356,32 @@ async def msgAll(nodes: TestNodeSet): await sendMessageAndCheckDelivery(nodes, p[0], p[1]) +def sendMessage(nodes: TestNodeSet, + frm: NodeRef, + to: NodeRef, + msg: Optional[Tuple]=None): + """ + Sends message from one node to another + + :param nodes: + :param frm: sender + :param to: recepient + :param msg: optional message - by default random one generated + :return: + """ + + logger.debug("Sending msg from {} to {}".format(frm, to)) + msg = msg if msg else randomMsg() + sender = nodes.getNode(frm) + rid = sender.nodestack.getRemote(nodes.getNodeName(to)).uid + sender.nodestack.send(msg, rid) + + async def sendMessageAndCheckDelivery(nodes: TestNodeSet, frm: NodeRef, to: NodeRef, msg: Optional[Tuple] = None, + method = None, customTimeout=None): """ Sends message from one node to another and checks that it was delivered @@ -333,11 +402,45 @@ async def sendMessageAndCheckDelivery(nodes: TestNodeSet, timeout = customTimeout or waits.expectedNodeToNodeMessageDeliveryTime() - await eventually(checkMessageReceived, msg, nodes, to, + await eventually(checkMessageReceived, msg, nodes, to, method, retryWait=.1, timeout=timeout, ratchetSteps=10) +def sendMessageToAll(nodes: TestNodeSet, + frm: NodeRef, + msg: Optional[Tuple]=None): + """ + Sends message from one node to all others + + :param nodes: + :param frm: sender + :param msg: optional message - by default random one generated + :return: + """ + for node in nodes: + if node != frm: + sendMessage(nodes, frm, node, msg) + +async def sendMessageAndCheckDeliveryToAll(nodes: TestNodeSet, + frm: NodeRef, + msg: Optional[Tuple]=None, + method = None, + customTimeout=None): + """ + Sends message from one node to all other and checks that it was delivered + + :param nodes: + :param frm: sender + :param msg: optional message - by default random one generated + :param customTimeout: + :return: + """ + customTimeout = customTimeout or waits.expectedNodeToAllNodesMessageDeliveryTime(len(nodes)) + for node in nodes: + if node != frm: + await sendMessageAndCheckDelivery(nodes, frm, node, msg, method, customTimeout) + break def checkMessageReceived(msg, nodes, to, method: str = None): allMsgs = nodes.getAllMsgReceived(to, method) @@ -355,7 +458,7 @@ def addNodeBack(nodeSet: TestNodeSet, def checkPropagateReqCountOfNode(node: TestNode, identifier: str, reqId: int): key = identifier, reqId assert key in node.requests - assert len(node.requests[key].propagates) >= node.f + 1 + assert node.quorums.propagate.is_reached(len(node.requests[key].propagates)) def requestReturnedToNode(node: TestNode, identifier: str, reqId: int, @@ -372,6 +475,24 @@ def checkRequestReturnedToNode(node: TestNode, identifier: str, reqId: int, assert requestReturnedToNode(node, identifier, reqId, instId) +def checkRequestNotReturnedToNode(node: TestNode, identifier: str, reqId: int, + instId: int): + assert not requestReturnedToNode(node, identifier, reqId, instId) + +def check_request_is_not_returned_to_nodes(looper, nodeSet, request): + instances = range(getNoInstances(len(nodeSet))) + coros = [] + for node, inst_id in itertools.product(nodeSet, instances): + c = partial(checkRequestNotReturnedToNode, + node=node, + identifier=request.identifier, + reqId=request.reqId, + instId=inst_id + ) + coros.append(c) + timeout = waits.expectedTransactionExecutionTime(len(nodeSet)) + looper.run(eventuallyAll(*coros, retryWait=1, totalTimeout=timeout)) + def checkPrePrepareReqSent(replica: TestReplica, req: Request): prePreparesSent = getAllArgs(replica, replica.sendPrePrepare) expectedDigest = TestReplica.batchDigest([req]) @@ -392,9 +513,9 @@ def checkPrepareReqSent(replica: TestReplica, identifier: str, reqId: int, paramsList = getAllArgs(replica, replica.canPrepare) rv = getAllReturnVals(replica, replica.canPrepare) - assert [(identifier, reqId)] in \ - [p["ppReq"].reqIdr and p["ppReq"].viewNo == view_no for p in paramsList] - idx = [p["ppReq"].reqIdr for p in paramsList if p["ppReq"].viewNo == view_no].index([(identifier, reqId)]) + args = [p["ppReq"].reqIdr for p in paramsList if p["ppReq"].viewNo == view_no] + assert [(identifier, reqId)] in args + idx = args.index([(identifier, reqId)]) assert rv[idx] @@ -402,7 +523,7 @@ def checkSufficientPrepareReqRecvd(replica: TestReplica, viewNo: int, ppSeqNo: int): key = (viewNo, ppSeqNo) assert key in replica.prepares - assert len(replica.prepares[key][1]) >= 2 * replica.f + assert len(replica.prepares[key][1]) >= replica.quorums.prepare.value def checkSufficientCommitReqRecvd(replicas: Iterable[TestReplica], viewNo: int, @@ -411,7 +532,7 @@ def checkSufficientCommitReqRecvd(replicas: Iterable[TestReplica], viewNo: int, key = (viewNo, ppSeqNo) assert key in replica.commits received = len(replica.commits[key][1]) - minimum = 2 * replica.f + minimum = replica.quorums.commit.value assert received > minimum @@ -569,7 +690,9 @@ def checkDiscardMsg(processors, discardedMsg, def countDiscarded(processor, reasonPat): c = 0 for entry in processor.spylog.getAll(processor.discard): - if 'reason' in entry.params and reasonPat in entry.params['reason']: + if 'reason' in entry.params and ( + (isinstance(entry.params['reason'], str) and + reasonPat in entry.params['reason']), (reasonPat in str(entry.params['reason']))): c += 1 return c @@ -627,11 +750,14 @@ def check_seqno_db_equality(db1, db2): assert {bytes(k): bytes(v) for k, v in db1._keyValueStorage.iter()} == \ {bytes(k): bytes(v) for k, v in db2._keyValueStorage.iter()} -def check_last_ordered_pp_seq_no(node1, node2): - master_replica_1 = node1.replicas[0] - master_replica_2 = node2.replicas[0] - assert master_replica_1.lastOrderedPPSeqNo == master_replica_2.lastOrderedPPSeqNo, \ - "{} != {}".format(master_replica_1.lastOrderedPPSeqNo, master_replica_2.lastOrderedPPSeqNo) + +def check_last_ordered_3pc(node1, node2): + master_replica_1 = node1.master_replica + master_replica_2 = node2.master_replica + assert master_replica_1.last_ordered_3pc == master_replica_2.last_ordered_3pc, \ + "{} != {}".format(master_replica_1.last_ordered_3pc, master_replica_2.last_ordered_3pc) + return master_replica_1.last_ordered_3pc + def randomText(size): return ''.join(random.choice(string.ascii_letters) for _ in range(size)) @@ -717,6 +843,51 @@ def nodeByName(nodes, name): raise Exception("Node with the name '{}' has not been found.".format(name)) +def send_pre_prepare(view_no, pp_seq_no, wallet, nodes, state_root=None, txn_root=None): + last_req_id = wallet._getIdData().lastReqId or 0 + pre_prepare = PrePrepare( + 0, + view_no, + pp_seq_no, + get_utc_epoch(), + [(wallet.defaultId, last_req_id+1)], + 0, + "random digest", + DOMAIN_LEDGER_ID, + state_root or '0'*44, + txn_root or '0'*44 + ) + primary_node = getPrimaryReplica(nodes).node + non_primary_nodes = set(nodes) - {primary_node} + + sendMessageToAll(nodes, primary_node, pre_prepare) + for non_primary_node in non_primary_nodes: + sendMessageToAll(nodes, non_primary_node, pre_prepare) + + +def send_prepare(view_no, pp_seq_no, nodes, state_root=None, txn_root=None): + prepare = Prepare( + 0, + view_no, + pp_seq_no, + get_utc_epoch(), + "random digest", + state_root or '0'*44, + txn_root or '0'*44 + ) + primary_node = getPrimaryReplica(nodes).node + sendMessageToAll(nodes, primary_node, prepare) + + +def send_commit(view_no, pp_seq_no, nodes): + commit = Commit( + 0, + view_no, + pp_seq_no) + primary_node = getPrimaryReplica(nodes).node + sendMessageToAll(nodes, primary_node, commit) + + def chk_all_funcs(looper, funcs, acceptable_fails=0, retry_wait=None, timeout=None, override_eventually_timeout=False): # TODO: Move this logic to eventuallyAll diff --git a/plenum/test/input_validation/constants.py b/plenum/test/input_validation/constants.py new file mode 100644 index 0000000000..10e414c47e --- /dev/null +++ b/plenum/test/input_validation/constants.py @@ -0,0 +1,15 @@ +import base58 + +VALID_BASE58 = base58.alphabet + +TEST_TARGET_NYM = 'HebGWgHmicPtzr4BTHmSmXkDNL7CngDjYVcxqT5oprMw' + +TEST_VERKEY_ABBREVIATED = '~A43KHjJmjwFX71J1b5p61N' +TEST_VERKEY_FULL = 'ED6cb1vtM8JEReLzUFnuGMZX4jt9jnEuDfwGTiAcARtD' + +TEST_IDENTIFIER_LONG = 'EJC3Gg4J2PqATnMn2b94VTn1tm7Ygr1SfttzPp8QdXJ6' +TEST_IDENTIFIER_SHORT = '5gC6mJq5MoGPwubtU8F5Qc' + +TEST_SEQ_SMALL = 0 +TEST_SEQ_ONE = 1 +TEST_SEQ_NORMAL = 2 diff --git a/plenum/test/input_validation/fields_validation/test_base58_field.py b/plenum/test/input_validation/fields_validation/test_base58_field.py index 7ed9f92861..fdd72037e2 100644 --- a/plenum/test/input_validation/fields_validation/test_base58_field.py +++ b/plenum/test/input_validation/fields_validation/test_base58_field.py @@ -1,34 +1,51 @@ import pytest -import string +import base58 from plenum.common.messages.fields import Base58Field from plenum.common.util import randomString -from plenum.test.input_validation.utils import * +from plenum.test.input_validation.utils import b58_by_len -LENGTH_LONG_MIN = 43 -LENGTH_LONG_MAX = 46 -LENGTH_SHORT_MIN = 15 -LENGTH_SHORT_MAX = 26 -valid_base58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyzzzzaaaaa' -validator = Base58Field(short=True, long=True) - - -def test_valid_base58(): - assert not validator.validate(valid_base58[:LENGTH_LONG_MIN]) - assert not validator.validate(valid_base58[:LENGTH_SHORT_MIN]) +def test_non_empty_base58(): + for byte_len in range(1, 33): + val = b58_by_len(byte_len) + assert not Base58Field().validate(val) # no decoded length constraints + assert not Base58Field( + byte_lengths=(byte_len,)).validate(val) + assert Base58Field( + byte_lengths=(byte_len - 1,)).validate(val) def test_empty_string(): - assert validator.validate('') - - -def test_wrong_lengths(): - assert validator.validate(valid_base58[:LENGTH_LONG_MIN - 1]) - assert validator.validate(valid_base58[:LENGTH_LONG_MAX + 1]) - assert validator.validate(valid_base58[:LENGTH_SHORT_MIN - 1]) - assert validator.validate(valid_base58[:LENGTH_SHORT_MAX + 1]) - - -def test_invalid_symbol(): - assert validator.validate(valid_base58[:LENGTH_LONG_MIN - 1] + '0') + assert not Base58Field().validate('') + assert Base58Field( + byte_lengths=(len(randomString()),)).validate('') + + +def test_multiple_constraints(): + choices = (1, 7, 18) + validator = Base58Field(byte_lengths=choices) + for byte_len in range(1, 33): + val = b58_by_len(byte_len) + if byte_len in choices: + assert not validator.validate(val) + else: + assert validator.validate(val) + + +def test_invalid_symbols(): + INVALID_CHARS = '0!@#$%^' + res = Base58Field().validate( + b58_by_len(10)[slice(0, -len(INVALID_CHARS))] + INVALID_CHARS) + assert res + assert (res == 'should not contain the following chars {}' + .format(sorted(set(INVALID_CHARS))[:10])) + + +def test_invalid_symbols_truncated_output(): + INVALID_CHARS = '\x00\xAA0!@#$%^&*()' + res = Base58Field().validate( + b58_by_len(20)[slice(0, -len(INVALID_CHARS))] + INVALID_CHARS) + assert res + assert (res == 'should not contain the following chars ' + '{} (truncated)'.format(sorted(set(INVALID_CHARS))[:10])) diff --git a/plenum/test/input_validation/fields_validation/test_bool_field.py b/plenum/test/input_validation/fields_validation/test_bool_field.py new file mode 100644 index 0000000000..1d3fb904f6 --- /dev/null +++ b/plenum/test/input_validation/fields_validation/test_bool_field.py @@ -0,0 +1,19 @@ +import pytest +from plenum.common.messages.fields import BooleanField + +validator = BooleanField() + +def test_bool_true(): + assert not validator.validate(True) + + +def test_bool_false(): + assert not validator.validate(False) + + +def test_bool_none(): + assert validator.validate(None) + + +def test_bool_invalid(): + assert validator.validate("True") diff --git a/plenum/test/input_validation/fields_validation/test_identifier_field.py b/plenum/test/input_validation/fields_validation/test_identifier_field.py index 0e7c726850..cdeefef47b 100644 --- a/plenum/test/input_validation/fields_validation/test_identifier_field.py +++ b/plenum/test/input_validation/fields_validation/test_identifier_field.py @@ -1,29 +1,24 @@ import pytest +import base58 from plenum.common.messages.fields import IdentifierField -validator = IdentifierField() +from plenum.test.input_validation.utils import b58_by_len -valid_chars = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz" +validator = IdentifierField() -MIN_LENGTH_SHORT = 15 -MAX_LENGTH_SHORT = 25 -MIN_LENGTH_LONG = 43 -MAX_LENGTH_LONG = 45 -def test_valid_identifiers(): - all_valid_length = \ - list(range(MIN_LENGTH_SHORT, MAX_LENGTH_SHORT + 1)) + \ - list(range(MIN_LENGTH_LONG, MAX_LENGTH_LONG + 1)) - for length in all_valid_length: - assert not validator.validate(valid_chars[:length]) +def test_identifiers(): + for byte_len in range(1, 33): + val = b58_by_len(byte_len) + if byte_len in (16, 32): + assert not validator.validate(val) + else: + assert validator.validate(val) def test_invalid_char(): - invalid_identifier = valid_chars[:MIN_LENGTH_SHORT - 1] + "0" - assert validator.validate(invalid_identifier) - - -def test_invalid_length(): - invalid_identifier = valid_chars[:MIN_LENGTH_SHORT - 1] - assert validator.validate(invalid_identifier) + res = validator.validate(b58_by_len(16)[:-1] + '+') + assert res + assert (res == "should not contain the following chars {}". + format(sorted(set('+')))) diff --git a/plenum/test/input_validation/fields_validation/test_ledger_id_field.py b/plenum/test/input_validation/fields_validation/test_ledger_id_field.py index 431aeb862d..667ef29a2d 100644 --- a/plenum/test/input_validation/fields_validation/test_ledger_id_field.py +++ b/plenum/test/input_validation/fields_validation/test_ledger_id_field.py @@ -1,6 +1,6 @@ import pytest from plenum.common.messages.fields import LedgerIdField -from plenum.common.types import POOL_LEDGER_ID, DOMAIN_LEDGER_ID +from plenum.common.constants import POOL_LEDGER_ID, DOMAIN_LEDGER_ID validator = LedgerIdField() diff --git a/plenum/test/input_validation/fields_validation/test_limited_length_string_field.py b/plenum/test/input_validation/fields_validation/test_limited_length_string_field.py new file mode 100644 index 0000000000..b3b1d7c065 --- /dev/null +++ b/plenum/test/input_validation/fields_validation/test_limited_length_string_field.py @@ -0,0 +1,22 @@ +import pytest +from plenum.common.messages.fields import LimitedLengthStringField + + +def test_incorrect_max_length(): + with pytest.raises(Exception): + LimitedLengthStringField(max_length=0) + + +def test_empty_string(): + validator = LimitedLengthStringField(max_length=1) + assert validator.validate("") + + +def test_valid_string(): + validator = LimitedLengthStringField(max_length=1) + assert not validator.validate("x") + + +def test_long_string(): + validator = LimitedLengthStringField(max_length=1) + assert validator.validate("xx") diff --git a/plenum/test/input_validation/fields_validation/test_merkle_tree_root_field.py b/plenum/test/input_validation/fields_validation/test_merkle_tree_root_field.py index 6f5b2b80c3..cf21b1015b 100644 --- a/plenum/test/input_validation/fields_validation/test_merkle_tree_root_field.py +++ b/plenum/test/input_validation/fields_validation/test_merkle_tree_root_field.py @@ -1,26 +1,24 @@ -import pytest from plenum.common.messages.fields import MerkleRootField +from plenum.test.input_validation.utils import b58_by_len -LENGTH_MIN = 43 -LENGTH_MAX = 45 - -valid_merkle_root = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz' validator = MerkleRootField() -def test_valid_merkle_root(): - assert not validator.validate(valid_merkle_root[:LENGTH_MIN]) - assert not validator.validate(valid_merkle_root[:LENGTH_MAX]) +def test_non_empty_merkle_roots(): + for byte_len in range(1, 33): + val = b58_by_len(byte_len) + if byte_len == 32: + assert not validator.validate(val) + else: + assert validator.validate(val) def test_empty_string(): assert validator.validate('') -def test_wrong_lengths(): - assert validator.validate(valid_merkle_root[:LENGTH_MIN - 1]) - assert validator.validate(valid_merkle_root[:LENGTH_MAX + 1]) - - def test_invalid_symbol(): - assert validator.validate(valid_merkle_root[:LENGTH_MIN - 1] + '0') + res = validator.validate(b58_by_len(32)[:-1] + '0') + assert res + assert (res == "should not contain the following chars {}". + format(sorted(set('0')))) diff --git a/plenum/test/input_validation/fields_validation/test_message_field.py b/plenum/test/input_validation/fields_validation/test_message_field.py new file mode 100644 index 0000000000..dae37a6a61 --- /dev/null +++ b/plenum/test/input_validation/fields_validation/test_message_field.py @@ -0,0 +1,15 @@ +import pytest +from plenum.common.messages.fields import MessageField +from plenum.common.messages.node_messages import Commit, ViewChangeDone + + +def test_correct_message(): + validator = MessageField(Commit) + message = Commit(1,2,3) + assert not validator.validate(message) + + +def test_incorrect_message(): + validator = MessageField(ViewChangeDone) + message = Commit(1,2,3) + assert validator.validate(message) diff --git a/plenum/test/input_validation/fields_validation/test_request_identifier_field.py b/plenum/test/input_validation/fields_validation/test_request_identifier_field.py index 29622ca4dd..649d51b7d0 100644 --- a/plenum/test/input_validation/fields_validation/test_request_identifier_field.py +++ b/plenum/test/input_validation/fields_validation/test_request_identifier_field.py @@ -1,26 +1,25 @@ import pytest from plenum.common.messages.fields import RequestIdentifierField -validator = RequestIdentifierField() +from plenum.test.input_validation.constants import \ + TEST_IDENTIFIER_SHORT, TEST_IDENTIFIER_LONG -valid_client_id_chars = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz" +from plenum.test.input_validation.utils import b58_by_len -MIN_LENGTH_SHORT = 15 -MAX_LENGTH_SHORT = 25 -MIN_LENGTH_LONG = 43 -MAX_LENGTH_LONG = 45 +validator = RequestIdentifierField() # Request id consists of client identifier (base56 string 16/32 long) and # some number (for now it is current timestamp, but can be any number) -valid_request_id = (valid_client_id_chars[:MIN_LENGTH_SHORT], 11111) +valid_request_id = (TEST_IDENTIFIER_LONG, 11111) def test_valid_request_id(): - all_valid_length = \ - list(range(MIN_LENGTH_SHORT, MAX_LENGTH_SHORT + 1)) + \ - list(range(MIN_LENGTH_LONG, MAX_LENGTH_LONG + 1)) - for length in all_valid_length: - assert not validator.validate((valid_client_id_chars[:length], 11111)) + for byte_len in range(1, 33): + val = b58_by_len(byte_len) + if byte_len in (16, 32): + assert not validator.validate((val, 11111)) + else: + assert validator.validate(val) def test_invalid_order(): diff --git a/plenum/test/input_validation/fields_validation/test_serializedvalue_field.py b/plenum/test/input_validation/fields_validation/test_serializedvalue_field.py new file mode 100644 index 0000000000..deacd17e79 --- /dev/null +++ b/plenum/test/input_validation/fields_validation/test_serializedvalue_field.py @@ -0,0 +1,20 @@ +import pytest +from plenum.common.messages.fields import SerializedValueField + +validator = SerializedValueField() + + +def test_non_empty_string(): + assert not validator.validate("x") + + +def test_empty_string(): + assert validator.validate("") + + +def test_non_empty_bytes(): + assert not validator.validate(b"hello") + + +def test_empty_bytes(): + assert validator.validate(b"") diff --git a/plenum/test/input_validation/fields_validation/test_sha256_hex_field.py b/plenum/test/input_validation/fields_validation/test_sha256_hex_field.py new file mode 100644 index 0000000000..433cc242c3 --- /dev/null +++ b/plenum/test/input_validation/fields_validation/test_sha256_hex_field.py @@ -0,0 +1,50 @@ +import base64 +import hashlib +from binascii import hexlify + +import base58 + +from plenum.common.messages.fields import Sha256HexField + +validator = Sha256HexField() +msg = b'some message' + + +def test_sha256_invalid_hex_field(): + assert validator.validate('') == 'not a valid hash (needs to be in hex too)' + assert validator.validate(1) == "expected types 'str', got 'int'" + + +def test_sha256_valid_hex_field(): + assert validator.validate(hashlib.sha256(msg).hexdigest()) is None + + +def test_only_sha256_field(): + # Any other hashing algo like sha512 or md5 not allowed, only sha256 + h512 = hashlib.sha512(msg) + hex_h512 = h512.hexdigest() + assert validator.validate( + hex_h512) == 'not a valid hash (needs to be in hex too)' + hmd5 = hashlib.md5(msg) + hex_hmd5 = hmd5.hexdigest() + assert validator.validate( + hex_hmd5) == 'not a valid hash (needs to be in hex too)' + + # Only hex representation of sha256 will work + h256 = hashlib.sha256(msg) + hex_h256 = h256.hexdigest() + assert validator.validate(hex_h256) is None + + +def test_only_sha256_hex_field(): + h256 = hashlib.sha256(msg) + + # Base64 or base58 representations will not work, only hex will + b64_h256 = base64.b64encode(h256.digest()).decode() + assert validator.validate( + b64_h256) == 'not a valid hash (needs to be in hex too)' + b58_h256 = base58.b58encode(h256.digest()) + assert validator.validate( + b58_h256) == 'not a valid hash (needs to be in hex too)' + + assert validator.validate(hexlify(h256.digest()).decode()) is None diff --git a/plenum/test/input_validation/fields_validation/test_timestamp_field.py b/plenum/test/input_validation/fields_validation/test_timestamp_field.py index a763841f98..fd1e9156de 100644 --- a/plenum/test/input_validation/fields_validation/test_timestamp_field.py +++ b/plenum/test/input_validation/fields_validation/test_timestamp_field.py @@ -1,20 +1,16 @@ -import pytest from plenum.common.messages.fields import TimestampField -from datetime import datetime +from plenum.common.util import get_utc_epoch validator = TimestampField() -timestamp = datetime.now().timestamp() +timestamp = get_utc_epoch() def test_valid_value(): assert not validator.validate(timestamp) - # This is needed because timestamp is usually multiplied - # by 1000 to "make it compatible to JavaScript Date()" - assert not validator.validate(round(timestamp * 1000)) - def test_invalid_value(): assert validator.validate(-1) + assert validator.validate(validator._oldest_time-1) diff --git a/plenum/test/input_validation/fields_validation/test_txn_seq_no_field.py b/plenum/test/input_validation/fields_validation/test_txn_seq_no_field.py new file mode 100644 index 0000000000..b9e05b9e97 --- /dev/null +++ b/plenum/test/input_validation/fields_validation/test_txn_seq_no_field.py @@ -0,0 +1,12 @@ +from plenum.common.messages.fields import TxnSeqNoField + +validator = TxnSeqNoField() + + +def test_valid_txn_seq_no(): + assert validator.validate(-1) == "cannot be smaller than 1" + assert validator.validate(0) == "cannot be smaller than 1" + assert validator.validate(2.2) == "expected types 'int', got 'float'" + assert validator.validate('') == "expected types 'int', got 'str'" + assert validator.validate(1) is None + assert validator.validate(200) is None diff --git a/plenum/test/input_validation/fields_validation/test_verkey_field.py b/plenum/test/input_validation/fields_validation/test_verkey_field.py new file mode 100644 index 0000000000..25de3f376b --- /dev/null +++ b/plenum/test/input_validation/fields_validation/test_verkey_field.py @@ -0,0 +1,36 @@ +import pytest + +import base58 +from plenum.common.messages.fields import VerkeyField + +from plenum.test.input_validation.utils import b58_by_len + +validator = VerkeyField() + + +def test_non_empty_verkeys(): + for byte_len in range(1, 33): + val = b58_by_len(byte_len) + as_long = validator.validate(val) + as_short = validator.validate('~' + val) + + if byte_len == 16: + assert (as_long == + 'b58 decoded value length 16 should be one of [32]') + assert not as_short + elif byte_len == 32: + assert not as_long + assert (as_short == + 'b58 decoded value length 32 should be one of [16]') + else: + assert (as_long == + 'b58 decoded value length {} should be one of [32]' + .format(byte_len)) + assert (as_short == + 'b58 decoded value length {} should be one of [16]' + .format(byte_len)) + + +def test_empty_verkey(): + res = validator.validate('') + assert res == 'b58 decoded value length 0 should be one of [32]' diff --git a/plenum/test/input_validation/fields_validation/test_version_field.py b/plenum/test/input_validation/fields_validation/test_version_field.py new file mode 100644 index 0000000000..0ffba9af22 --- /dev/null +++ b/plenum/test/input_validation/fields_validation/test_version_field.py @@ -0,0 +1,53 @@ +import pytest + +from plenum.common.messages.fields import VersionField + + +validator = VersionField(components_number=(2, 3,)) + + +def test_empty_version(): + assert validator.validate('') + + +def test_valid_version(): + assert not validator.validate('1.2.3') + assert not validator.validate('0.2.0') + assert not validator.validate('0.2') + + +def test_one_component_fails(): + assert validator.validate('123') + + +def test_a_string_component_fails(): + assert validator.validate('asdf.asdf') + + +def test_invalid_version(): + assert validator.validate('123.ads.00') + + +def test_invalid_number_of_comp(): + assert validator.validate('1.2.3.4') + + +def test_invalid_negative_comp(): + assert validator.validate('-1.-2.-3') + assert validator.validate('-1.2.3') + assert validator.validate('1.2.-3') + + +def test_spaces(): + assert validator.validate(' 1.2.3') + assert validator.validate('1. 2.3') + assert validator.validate('1.2. 3') + assert validator.validate('1 .2.3') + assert validator.validate('1.2 .3') + assert validator.validate('1.2.3 ') + assert validator.validate(' 1 .2.3') + assert validator.validate('1. 2 .3') + assert validator.validate('1.2. 3 ') + assert validator.validate(' -1.2.3') + assert validator.validate('1. -2 .3') + assert validator.validate('1.2.-3 ') \ No newline at end of file diff --git a/plenum/test/input_validation/message_validation/test_batch_message.py b/plenum/test/input_validation/message_validation/test_batch_message.py new file mode 100644 index 0000000000..eff71d2ac8 --- /dev/null +++ b/plenum/test/input_validation/message_validation/test_batch_message.py @@ -0,0 +1,27 @@ +import pytest + +from collections import OrderedDict +from plenum.common.messages.fields import IterableField, \ + SignatureField +from plenum.common.messages.node_messages import Batch + +EXPECTED_ORDERED_FIELDS = OrderedDict([ + ("messages", IterableField), + ("signature", SignatureField) + +]) + + +def test_hash_expected_type(): + assert Batch.typename == "BATCH" + + +def test_has_expected_fields(): + actual_field_names = OrderedDict(Batch.schema).keys() + assert list(actual_field_names) == list(EXPECTED_ORDERED_FIELDS.keys()) + + +def test_has_expected_validators(): + schema = dict(Batch.schema) + for field, validator in EXPECTED_ORDERED_FIELDS.items(): + assert isinstance(schema[field], validator) diff --git a/plenum/test/input_validation/message_validation/test_catchuprep_message.py b/plenum/test/input_validation/message_validation/test_catchuprep_message.py new file mode 100644 index 0000000000..51229e92ad --- /dev/null +++ b/plenum/test/input_validation/message_validation/test_catchuprep_message.py @@ -0,0 +1,26 @@ +from plenum.common.messages.node_messages import CatchupRep, AnyValueField +from collections import OrderedDict +from plenum.common.messages.fields import \ + IterableField, LedgerIdField, MapField + + +EXPECTED_ORDERED_FIELDS = OrderedDict([ + ("ledgerId", LedgerIdField), + ("txns", AnyValueField), + ("consProof", IterableField), +]) + + +def test_hash_expected_type(): + assert CatchupRep.typename == "CATCHUP_REP" + + +def test_has_expected_fields(): + actual_field_names = OrderedDict(CatchupRep.schema).keys() + assert list(actual_field_names) == list(EXPECTED_ORDERED_FIELDS.keys()) + + +def test_has_expected_validators(): + schema = dict(CatchupRep.schema) + for field, validator in EXPECTED_ORDERED_FIELDS.items(): + assert isinstance(schema[field], validator) diff --git a/plenum/test/input_validation/message_validation/test_catchupreq_message.py b/plenum/test/input_validation/message_validation/test_catchupreq_message.py new file mode 100644 index 0000000000..c49482a5f7 --- /dev/null +++ b/plenum/test/input_validation/message_validation/test_catchupreq_message.py @@ -0,0 +1,28 @@ +import pytest +from plenum.common.messages.node_messages import CatchupReq +from collections import OrderedDict +from plenum.common.messages.fields import \ + NonNegativeNumberField, LedgerIdField + + +EXPECTED_ORDERED_FIELDS = OrderedDict([ + ("ledgerId", LedgerIdField), + ("seqNoStart", NonNegativeNumberField), + ("seqNoEnd", NonNegativeNumberField), + ("catchupTill", NonNegativeNumberField), +]) + + +def test_hash_expected_type(): + assert CatchupReq.typename == "CATCHUP_REQ" + + +def test_has_expected_fields(): + actual_field_names = OrderedDict(CatchupReq.schema).keys() + assert list(actual_field_names) == list(EXPECTED_ORDERED_FIELDS.keys()) + + +def test_has_expected_validators(): + schema = dict(CatchupReq.schema) + for field, validator in EXPECTED_ORDERED_FIELDS.items(): + assert isinstance(schema[field], validator) diff --git a/plenum/test/input_validation/message_validation/test_checkpoint_message.py b/plenum/test/input_validation/message_validation/test_checkpoint_message.py new file mode 100644 index 0000000000..065cce66d7 --- /dev/null +++ b/plenum/test/input_validation/message_validation/test_checkpoint_message.py @@ -0,0 +1,29 @@ +import pytest +from plenum.common.messages.node_messages import Checkpoint +from collections import OrderedDict +from plenum.common.messages.fields import \ + NonNegativeNumberField, NonEmptyStringField + + +EXPECTED_ORDERED_FIELDS = OrderedDict([ + ("instId", NonNegativeNumberField), + ("viewNo", NonNegativeNumberField), + ("seqNoStart", NonNegativeNumberField), + ("seqNoEnd", NonNegativeNumberField), + ("digest", NonEmptyStringField), +]) + + +def test_hash_expected_type(): + assert Checkpoint.typename == "CHECKPOINT" + + +def test_has_expected_fields(): + actual_field_names = OrderedDict(Checkpoint.schema).keys() + assert list(actual_field_names) == list(EXPECTED_ORDERED_FIELDS.keys()) + + +def test_has_expected_validators(): + schema = dict(Checkpoint.schema) + for field, validator in EXPECTED_ORDERED_FIELDS.items(): + assert isinstance(schema[field], validator) diff --git a/plenum/test/input_validation/message_validation/test_commit_message.py b/plenum/test/input_validation/message_validation/test_commit_message.py index 4631617d85..77be34a9b7 100644 --- a/plenum/test/input_validation/message_validation/test_commit_message.py +++ b/plenum/test/input_validation/message_validation/test_commit_message.py @@ -1,5 +1,5 @@ import pytest -from plenum.common.types import Commit +from plenum.common.messages.node_messages import Commit from collections import OrderedDict from plenum.common.messages.fields import NonNegativeNumberField @@ -17,7 +17,7 @@ def test_hash_expected_type(): def test_has_expected_fields(): actual_field_names = OrderedDict(Commit.schema).keys() - assert actual_field_names == EXPECTED_ORDERED_FIELDS.keys() + assert list(actual_field_names) == list(EXPECTED_ORDERED_FIELDS.keys()) def test_has_expected_validators(): diff --git a/plenum/test/input_validation/message_validation/test_consistencyproof_message.py b/plenum/test/input_validation/message_validation/test_consistencyproof_message.py index 984327ea3e..a5d19a7638 100644 --- a/plenum/test/input_validation/message_validation/test_consistencyproof_message.py +++ b/plenum/test/input_validation/message_validation/test_consistencyproof_message.py @@ -1,5 +1,5 @@ import pytest -from plenum.common.types import ConsistencyProof +from plenum.common.messages.node_messages import ConsistencyProof from collections import OrderedDict from plenum.common.messages.fields import NonNegativeNumberField, \ LedgerIdField, MerkleRootField, IterableField @@ -8,6 +8,7 @@ ("ledgerId", LedgerIdField), ("seqNoStart", NonNegativeNumberField), ("seqNoEnd", NonNegativeNumberField), + ("viewNo", NonNegativeNumberField), ("ppSeqNo", NonNegativeNumberField), ("oldMerkleRoot", MerkleRootField), ("newMerkleRoot", MerkleRootField), @@ -21,7 +22,7 @@ def test_hash_expected_type(): def test_has_expected_fields(): actual_field_names = OrderedDict(ConsistencyProof.schema).keys() - assert actual_field_names == EXPECTED_ORDERED_FIELDS.keys() + assert list(actual_field_names) == list(EXPECTED_ORDERED_FIELDS.keys()) def test_has_expected_validators(): diff --git a/plenum/test/input_validation/message_validation/test_currentstate_message.py b/plenum/test/input_validation/message_validation/test_currentstate_message.py new file mode 100644 index 0000000000..1eca2514d9 --- /dev/null +++ b/plenum/test/input_validation/message_validation/test_currentstate_message.py @@ -0,0 +1,25 @@ +import pytest + +from collections import OrderedDict +from plenum.common.messages.fields import NonNegativeNumberField, IterableField +from plenum.common.messages.node_messages import CurrentState + +EXPECTED_ORDERED_FIELDS = OrderedDict([ + ("viewNo", NonNegativeNumberField), + ("primary", IterableField), +]) + + +def test_hash_expected_type(): + assert CurrentState.typename == "CURRENT_STATE" + + +def test_has_expected_fields(): + actual_field_names = OrderedDict(CurrentState.schema).keys() + assert list(actual_field_names) == list(EXPECTED_ORDERED_FIELDS.keys()) + + +def test_has_expected_validators(): + schema = dict(CurrentState.schema) + for field, validator in EXPECTED_ORDERED_FIELDS.items(): + assert isinstance(schema[field], validator) diff --git a/plenum/test/input_validation/message_validation/test_instanceChange_message.py b/plenum/test/input_validation/message_validation/test_instanceChange_message.py index d9625456b3..0e363ecf51 100644 --- a/plenum/test/input_validation/message_validation/test_instanceChange_message.py +++ b/plenum/test/input_validation/message_validation/test_instanceChange_message.py @@ -2,7 +2,7 @@ from collections import OrderedDict from plenum.common.messages.fields import NonNegativeNumberField -from plenum.common.types import InstanceChange +from plenum.common.messages.node_messages import InstanceChange EXPECTED_ORDERED_FIELDS = OrderedDict([ ("viewNo", NonNegativeNumberField), @@ -16,7 +16,7 @@ def test_hash_expected_type(): def test_has_expected_fields(): actual_field_names = OrderedDict(InstanceChange.schema).keys() - assert actual_field_names == EXPECTED_ORDERED_FIELDS.keys() + assert list(actual_field_names) == list(EXPECTED_ORDERED_FIELDS.keys()) def test_has_expected_validators(): diff --git a/plenum/test/input_validation/message_validation/test_ledgerstatus_message.py b/plenum/test/input_validation/message_validation/test_ledgerstatus_message.py index d2806fcaa4..31e7387677 100644 --- a/plenum/test/input_validation/message_validation/test_ledgerstatus_message.py +++ b/plenum/test/input_validation/message_validation/test_ledgerstatus_message.py @@ -3,11 +3,13 @@ from collections import OrderedDict from plenum.common.messages.fields import NonNegativeNumberField, \ LedgerIdField, MerkleRootField -from plenum.common.types import LedgerStatus +from plenum.common.messages.node_messages import LedgerStatus EXPECTED_ORDERED_FIELDS = OrderedDict([ ("ledgerId", LedgerIdField), ("txnSeqNo", NonNegativeNumberField), + ("viewNo", NonNegativeNumberField), + ("ppSeqNo", NonNegativeNumberField), ("merkleRoot", MerkleRootField), ]) @@ -18,7 +20,7 @@ def test_hash_expected_type(): def test_has_expected_fields(): actual_field_names = OrderedDict(LedgerStatus.schema).keys() - assert actual_field_names == EXPECTED_ORDERED_FIELDS.keys() + assert list(actual_field_names) == list(EXPECTED_ORDERED_FIELDS.keys()) def test_has_expected_validators(): diff --git a/plenum/test/input_validation/message_validation/test_nomination_message.py b/plenum/test/input_validation/message_validation/test_nomination_message.py index 0ca3d2e12e..41e894ff43 100644 --- a/plenum/test/input_validation/message_validation/test_nomination_message.py +++ b/plenum/test/input_validation/message_validation/test_nomination_message.py @@ -3,7 +3,7 @@ from collections import OrderedDict from plenum.common.messages.fields import NonNegativeNumberField, \ NonEmptyStringField -from plenum.common.types import Nomination +from plenum.common.messages.node_messages import Nomination EXPECTED_ORDERED_FIELDS = OrderedDict([ ("name", NonEmptyStringField), @@ -19,7 +19,7 @@ def test_hash_expected_type(): def test_has_expected_fields(): actual_field_names = OrderedDict(Nomination.schema).keys() - assert actual_field_names == EXPECTED_ORDERED_FIELDS.keys() + assert list(actual_field_names) == list(EXPECTED_ORDERED_FIELDS.keys()) def test_has_expected_validators(): diff --git a/plenum/test/input_validation/message_validation/test_ordered_message.py b/plenum/test/input_validation/message_validation/test_ordered_message.py index f62021c80e..43ed88eb33 100644 --- a/plenum/test/input_validation/message_validation/test_ordered_message.py +++ b/plenum/test/input_validation/message_validation/test_ordered_message.py @@ -2,8 +2,8 @@ from collections import OrderedDict from plenum.common.messages.fields import NonNegativeNumberField, \ - LedgerIdField, IterableField, TimestampField, HexField -from plenum.common.types import Ordered + LedgerIdField, IterableField, TimestampField, HexField, MerkleRootField, AnyValueField +from plenum.common.messages.node_messages import Ordered EXPECTED_ORDERED_FIELDS = OrderedDict([ ("instId", NonNegativeNumberField), @@ -12,8 +12,8 @@ ("ppSeqNo", NonNegativeNumberField), ("ppTime", TimestampField), ("ledgerId", LedgerIdField), - ("stateRootHash", HexField), - ("txnRootHash", HexField), + ("stateRootHash", MerkleRootField), + ("txnRootHash", MerkleRootField), ]) @@ -23,7 +23,7 @@ def test_hash_expected_type(): def test_has_expected_fields(): actual_field_names = OrderedDict(Ordered.schema).keys() - assert actual_field_names == EXPECTED_ORDERED_FIELDS.keys() + assert list(actual_field_names) == list(EXPECTED_ORDERED_FIELDS.keys()) def test_has_expected_validators(): diff --git a/plenum/test/input_validation/message_validation/test_prepare_message.py b/plenum/test/input_validation/message_validation/test_prepare_message.py index 5ed19ae96d..486e84acfe 100644 --- a/plenum/test/input_validation/message_validation/test_prepare_message.py +++ b/plenum/test/input_validation/message_validation/test_prepare_message.py @@ -1,28 +1,26 @@ -import pytest - from collections import OrderedDict from plenum.common.messages.fields import NonNegativeNumberField, \ - NonEmptyStringField, \ - HexField -from plenum.common.types import Prepare + NonEmptyStringField, MerkleRootField, TimestampField +from plenum.common.messages.node_messages import Prepare EXPECTED_ORDERED_FIELDS = OrderedDict([ ("instId", NonNegativeNumberField), ("viewNo", NonNegativeNumberField), ("ppSeqNo", NonNegativeNumberField), + ("ppTime", TimestampField), ("digest", NonEmptyStringField), - ("stateRootHash", HexField), - ("txnRootHash", HexField), + ("stateRootHash", MerkleRootField), + ("txnRootHash", MerkleRootField), ]) -def test_hash_expected_type(): +def test_has_expected_type(): assert Prepare.typename == "PREPARE" def test_has_expected_fields(): actual_field_names = OrderedDict(Prepare.schema).keys() - assert actual_field_names == EXPECTED_ORDERED_FIELDS.keys() + assert list(actual_field_names) == list(EXPECTED_ORDERED_FIELDS.keys()) def test_has_expected_validators(): diff --git a/plenum/test/input_validation/message_validation/test_preprepare_message.py b/plenum/test/input_validation/message_validation/test_preprepare_message.py index 6f86c3cb8b..007fa41b89 100644 --- a/plenum/test/input_validation/message_validation/test_preprepare_message.py +++ b/plenum/test/input_validation/message_validation/test_preprepare_message.py @@ -1,10 +1,8 @@ -import pytest - from collections import OrderedDict from plenum.common.messages.fields import NonNegativeNumberField, \ LedgerIdField, IterableField, NonEmptyStringField, \ - TimestampField, HexField -from plenum.common.types import PrePrepare + TimestampField, MerkleRootField +from plenum.common.messages.node_messages import PrePrepare EXPECTED_ORDERED_FIELDS = OrderedDict([ ("instId", NonNegativeNumberField), @@ -15,18 +13,18 @@ ("discarded", NonNegativeNumberField), ("digest", NonEmptyStringField), ("ledgerId", LedgerIdField), - ("stateRootHash", HexField), - ("txnRootHash", HexField), + ("stateRootHash", MerkleRootField), + ("txnRootHash", MerkleRootField), ]) -def test_hash_expected_type(): +def test_has_expected_type(): assert PrePrepare.typename == "PREPREPARE" def test_has_expected_fields(): actual_field_names = OrderedDict(PrePrepare.schema).keys() - assert actual_field_names == EXPECTED_ORDERED_FIELDS.keys() + assert list(actual_field_names) == list(EXPECTED_ORDERED_FIELDS.keys()) def test_has_expected_validators(): diff --git a/plenum/test/input_validation/message_validation/test_primary_message.py b/plenum/test/input_validation/message_validation/test_primary_message.py index 678162bb4c..64e1aadc31 100644 --- a/plenum/test/input_validation/message_validation/test_primary_message.py +++ b/plenum/test/input_validation/message_validation/test_primary_message.py @@ -1,9 +1,7 @@ -import pytest - from collections import OrderedDict from plenum.common.messages.fields import NonNegativeNumberField, \ NonEmptyStringField -from plenum.common.types import Primary +from plenum.common.messages.node_messages import Primary EXPECTED_ORDERED_FIELDS = OrderedDict([ ("name", NonEmptyStringField), @@ -19,7 +17,7 @@ def test_hash_expected_type(): def test_has_expected_fields(): actual_field_names = OrderedDict(Primary.schema).keys() - assert actual_field_names == EXPECTED_ORDERED_FIELDS.keys() + assert list(actual_field_names) == list(EXPECTED_ORDERED_FIELDS.keys()) def test_has_expected_validators(): diff --git a/plenum/test/input_validation/message_validation/test_propagate_message.py b/plenum/test/input_validation/message_validation/test_propagate_message.py index ebe578edd2..c5c86a1d85 100644 --- a/plenum/test/input_validation/message_validation/test_propagate_message.py +++ b/plenum/test/input_validation/message_validation/test_propagate_message.py @@ -1,8 +1,7 @@ -import pytest - from collections import OrderedDict from plenum.common.messages.fields import NonEmptyStringField -from plenum.common.types import Propagate, ClientMessageValidator +from plenum.common.messages.client_request import ClientMessageValidator +from plenum.common.messages.node_messages import Propagate EXPECTED_ORDERED_FIELDS = OrderedDict([ ("request", ClientMessageValidator), @@ -16,7 +15,7 @@ def test_hash_expected_type(): def test_has_expected_fields(): actual_field_names = OrderedDict(Propagate.schema).keys() - assert actual_field_names == EXPECTED_ORDERED_FIELDS.keys() + assert list(actual_field_names) == list(EXPECTED_ORDERED_FIELDS.keys()) def test_has_expected_validators(): diff --git a/plenum/test/input_validation/message_validation/test_reelection_message.py b/plenum/test/input_validation/message_validation/test_reelection_message.py index d4b3231d3a..016fcb30cf 100644 --- a/plenum/test/input_validation/message_validation/test_reelection_message.py +++ b/plenum/test/input_validation/message_validation/test_reelection_message.py @@ -1,9 +1,7 @@ -import pytest - from collections import OrderedDict from plenum.common.messages.fields import NonNegativeNumberField, \ IterableField -from plenum.common.types import Reelection +from plenum.common.messages.node_messages import Reelection EXPECTED_ORDERED_FIELDS = OrderedDict([ ("instId", NonNegativeNumberField), @@ -19,7 +17,7 @@ def test_hash_expected_type(): def test_has_expected_fields(): actual_field_names = OrderedDict(Reelection.schema).keys() - assert actual_field_names == EXPECTED_ORDERED_FIELDS.keys() + assert list(actual_field_names) == list(EXPECTED_ORDERED_FIELDS.keys()) def test_has_expected_validators(): diff --git a/plenum/test/input_validation/message_validation/test_threepcstate_message.py b/plenum/test/input_validation/message_validation/test_threepcstate_message.py new file mode 100644 index 0000000000..ba24fe1178 --- /dev/null +++ b/plenum/test/input_validation/message_validation/test_threepcstate_message.py @@ -0,0 +1,25 @@ +from plenum.common.messages.node_messages import ThreePCState +from collections import OrderedDict +from plenum.common.messages.fields import \ + NonNegativeNumberField, IterableField + + +EXPECTED_ORDERED_FIELDS = OrderedDict([ + ("instId", NonNegativeNumberField), + ("messages", IterableField), +]) + + +def test_hash_expected_type(): + assert ThreePCState.typename == "THREE_PC_STATE" + + +def test_has_expected_fields(): + actual_field_names = OrderedDict(ThreePCState.schema).keys() + assert list(actual_field_names) == list(EXPECTED_ORDERED_FIELDS.keys()) + + +def test_has_expected_validators(): + schema = dict(ThreePCState.schema) + for field, validator in EXPECTED_ORDERED_FIELDS.items(): + assert isinstance(schema[field], validator) diff --git a/plenum/test/input_validation/message_validation/test_viewchangedone_messsage.py b/plenum/test/input_validation/message_validation/test_viewchangedone_messsage.py new file mode 100644 index 0000000000..7592bfae77 --- /dev/null +++ b/plenum/test/input_validation/message_validation/test_viewchangedone_messsage.py @@ -0,0 +1,25 @@ +from collections import OrderedDict +from plenum.common.messages.fields import NonNegativeNumberField, \ + IterableField, NonEmptyStringField +from plenum.common.messages.node_messages import ViewChangeDone + +EXPECTED_ORDERED_FIELDS = OrderedDict([ + ("viewNo", NonNegativeNumberField), + ("name", NonEmptyStringField), + ("ledgerInfo", IterableField) +]) + + +def test_hash_expected_type(): + assert ViewChangeDone.typename == "VIEW_CHANGE_DONE" + + +def test_has_expected_fields(): + actual_field_names = OrderedDict(ViewChangeDone.schema).keys() + assert list(actual_field_names) == list(EXPECTED_ORDERED_FIELDS.keys()) + + +def test_has_expected_validators(): + schema = dict(ViewChangeDone.schema) + for field, validator in EXPECTED_ORDERED_FIELDS.items(): + assert isinstance(schema[field], validator) diff --git a/plenum/test/input_validation/messages.py b/plenum/test/input_validation/messages.py index 849b6321ae..c66cec981a 100644 --- a/plenum/test/input_validation/messages.py +++ b/plenum/test/input_validation/messages.py @@ -1,6 +1,6 @@ -from plenum.common.request import Request -from plenum.common.types import Nomination, Reelection, Primary, Ordered, Propagate, PrePrepare, Prepare, Commit, \ - InstanceChange, Checkpoint, ThreePCState, LedgerStatus, ConsistencyProof, ConsProofRequest, CatchupReq, CatchupRep +from plenum.common.messages.node_messages import Nomination, Reelection, Primary, Ordered, \ + Propagate, PrePrepare, Prepare, Commit, InstanceChange, Checkpoint, \ + ThreePCState, LedgerStatus, ConsistencyProof, CatchupReq, CatchupRep from plenum.test.input_validation.fields import * from plenum.test.input_validation.helper import * @@ -150,15 +150,6 @@ ], ), - MessageDescriptor( - klass=ConsProofRequest, - fields=[ - ledger_id_field, - seq_no_start_field, - seq_no_stop_field, - ], - ), - MessageDescriptor( klass=CatchupReq, fields=[ diff --git a/plenum/test/input_validation/stub_messages.py b/plenum/test/input_validation/stub_messages.py new file mode 100644 index 0000000000..ae6cf5430e --- /dev/null +++ b/plenum/test/input_validation/stub_messages.py @@ -0,0 +1,43 @@ +from plenum.common.messages.fields import NonNegativeNumberField, NonEmptyStringField, AnyValueField, IterableField, \ + MapField, HexField, BooleanField +from plenum.common.messages.message_base import MessageBase + + +class Message1(MessageBase): + typename = 'Message1' + schema = ( + ('a', NonNegativeNumberField()), + ('b', NonEmptyStringField()), + ) + + +class Message2(MessageBase): + typename = 'Message2' + schema = ( + ('a', NonNegativeNumberField()), + ('b', AnyValueField()), + ) + + +class Message3(MessageBase): + typename = 'Message3' + schema = ( + ('a', NonNegativeNumberField()), + ('b', IterableField(BooleanField())), + ) + + +class Message4(MessageBase): + typename = 'Message4' + schema = ( + ('a', NonNegativeNumberField()), + ('b', MapField(HexField(), HexField())), + ) + + +class SomeNonMessageClass: + typename = 'SomeNonMessageClass' + schema = ( + ('a', NonNegativeNumberField()), + ('b', NonEmptyStringField()), + ) diff --git a/plenum/test/input_validation/test_client_get_txn_op.py b/plenum/test/input_validation/test_client_get_txn_op.py new file mode 100644 index 0000000000..297524971e --- /dev/null +++ b/plenum/test/input_validation/test_client_get_txn_op.py @@ -0,0 +1,33 @@ +import pytest + +from plenum.test.input_validation.constants import \ + TEST_SEQ_SMALL, TEST_SEQ_ONE, TEST_SEQ_NORMAL + +from plenum.common.messages.client_request import ClientGetTxnOperation, \ + TXN_TYPE, GET_TXN, DATA + +op_get_txn = ClientGetTxnOperation() + + +def test_small_seq_no_fails(): + with pytest.raises(TypeError) as ex_info: + op_get_txn.validate({ + TXN_TYPE: GET_TXN, + DATA: TEST_SEQ_SMALL + }) + ex_info.match(r'cannot be smaller than 1') + + +def test_one_seq_no_passes(): + op_get_txn.validate({ + TXN_TYPE: GET_TXN, + DATA: TEST_SEQ_ONE + }) + + +def test_one_seq_no_passes(): + op_get_txn.validate({ + TXN_TYPE: GET_TXN, + DATA: TEST_SEQ_NORMAL + }) + diff --git a/plenum/test/input_validation/test_client_node_op.py b/plenum/test/input_validation/test_client_node_op.py index b9543239eb..33c2311656 100644 --- a/plenum/test/input_validation/test_client_node_op.py +++ b/plenum/test/input_validation/test_client_node_op.py @@ -13,24 +13,24 @@ def test_only_alias_passes(): def test_empty_alias_fails(): with pytest.raises(TypeError) as ex_info: op.validate({ALIAS: ''}) - ex_info.match('validation error: empty string') + ex_info.match('validation error \[ClientNodeOperationData\]: empty string \(alias=\)') def test_missed_alias_fails(): - with pytest.raises(TypeError) as ex_info: + with pytest.raises(Exception) as ex_info: op.validate({SERVICES: []}) - ex_info.match('validation error: missed fields alias') + ex_info.match('validation error \[ClientNodeOperationData\]: missed fields - alias') def test_missed_a_ha_field_fails(): - with pytest.raises(TypeError) as ex_info: + with pytest.raises(Exception) as ex_info: op.validate({ ALIAS: 'aNode', NODE_PORT: 9700, CLIENT_IP: '8.8.8.8', CLIENT_PORT: 9701, }) - ex_info.match('validation error: missed fields node_ip') + ex_info.match('validation error \[ClientNodeOperationData\]: missed fields - node_ip') def test_update_services_passes(): diff --git a/plenum/test/input_validation/test_client_nym_op.py b/plenum/test/input_validation/test_client_nym_op.py new file mode 100644 index 0000000000..e14b51e267 --- /dev/null +++ b/plenum/test/input_validation/test_client_nym_op.py @@ -0,0 +1,53 @@ +import pytest + +from plenum.test.input_validation.constants import \ + TEST_TARGET_NYM, TEST_VERKEY_ABBREVIATED, TEST_VERKEY_FULL + +from plenum.test.input_validation.utils import b58_by_len + +from plenum.common.messages.client_request import ClientNYMOperation, \ + TXN_TYPE, NYM, TARGET_NYM, VERKEY + +op_nym = ClientNYMOperation() + + +def test_odd_length_verkey_fails(): + with pytest.raises(TypeError) as ex_info: + op_nym.validate({ + TXN_TYPE: NYM, + TARGET_NYM: TEST_TARGET_NYM, + VERKEY: 'F'*45 + }) + ex_info.match(r'b58 decoded value length 33 should be one of \[32\]') + + +def test_short_length_verkeys(): + assert not op_nym.validate({ + TXN_TYPE: NYM, + TARGET_NYM: TEST_TARGET_NYM, + VERKEY: TEST_VERKEY_ABBREVIATED + }) + + with pytest.raises(TypeError) as ex_info: + op_nym.validate({ + TXN_TYPE: NYM, + TARGET_NYM: TEST_TARGET_NYM, + VERKEY: b58_by_len(16) + }) + ex_info.match(r'b58 decoded value length 16 should be one of \[32\]') + + with pytest.raises(TypeError) as ex_info: + op_nym.validate({ + TXN_TYPE: NYM, + TARGET_NYM: TEST_TARGET_NYM, + VERKEY: '~' + b58_by_len(32) + }) + ex_info.match(r'b58 decoded value length 32 should be one of \[16\]') + + +def test_long_length_verkey_passes(): + assert not op_nym.validate({ + TXN_TYPE: NYM, + TARGET_NYM: TEST_TARGET_NYM, + VERKEY: TEST_VERKEY_FULL + }) diff --git a/plenum/test/input_validation/test_message_factory.py b/plenum/test/input_validation/test_message_factory.py new file mode 100644 index 0000000000..a611b51ed6 --- /dev/null +++ b/plenum/test/input_validation/test_message_factory.py @@ -0,0 +1,94 @@ +import pytest + +from plenum.common.exceptions import MissingNodeOp, InvalidNodeOp +from plenum.common.messages.fields import NonNegativeNumberField, AnyValueField, HexField, BooleanField, Base58Field +from plenum.common.messages.message_base import MessageBase +from plenum.common.messages.node_message_factory import MessageFactory, NodeMessageFactory +from plenum.test.input_validation.stub_messages import Message1, Message2, Message3, Message4 + + +@pytest.fixture +def factory(): + return MessageFactory('plenum.test.input_validation.stub_messages') + + +def test_message_factory_module_is_not_found_fails(): + with pytest.raises(ImportError): + MessageFactory('foo.bar') + + +def test_message_factory_missed_op_fails(factory): + msg = {'a': 0, 'b': 'bar'} + with pytest.raises(MissingNodeOp): + factory.get_instance(**msg) + + +def test_message_factory_invalid_op_fails(factory): + msg = {'op': 'unknown_op', 'a': 0, 'b': 'bar'} + with pytest.raises(InvalidNodeOp): + factory.get_instance(**msg) + + +def test_message_factory_stub_module_is_loaded(factory): + msg = {'op': 'Message1', 'a': 0, 'b': 'bar'} + assert isinstance(factory.get_instance(**msg), Message1) + + +def test_message_factory_set_non_message_class_fails(factory): + class NonMessageClass: + pass + + with pytest.raises(AssertionError): + factory.set_message_class(NonMessageClass) + + +def test_message_factory_set_message_class_can_add_message_class(factory): + class ANewMessageClass(MessageBase): + typename = 'NewMessage' + schema = ( + ('a', NonNegativeNumberField()), + ) + + factory.set_message_class(ANewMessageClass) + msg = {'op': 'NewMessage', 'a': 0} + assert isinstance(factory.get_instance(**msg), ANewMessageClass) + + +def test_node_message_factory_module_is_loaded(): + NodeMessageFactory() + + +def test_message_factory_can_replace_field(factory): + # check precondition + msg = {'op': 'Message2', 'a': 0, 'b': 'foo'} + assert isinstance(factory.get_instance(**msg), Message2) + + factory.update_schemas_by_field_type(AnyValueField, NonNegativeNumberField) + + with pytest.raises(TypeError) as exc_info: + factory.get_instance(**msg) + exc_info.match("expected types 'int', got 'str'") + + +def test_message_factory_can_replace_iterable_field(factory): + # check precondition + msg = {'op': 'Message3', 'a': 0, 'b': [True, False]} + assert isinstance(factory.get_instance(**msg), Message3) + + factory.update_schemas_by_field_type(BooleanField, Base58Field) + + with pytest.raises(TypeError) as exc_info: + factory.get_instance(**msg) + exc_info.match("expected types 'str', got 'bool'") + + +def test_message_factory_can_replace_map_field(factory): + # check precondition + msg = {'op': 'Message4', 'a': 0, 'b': {'123': 'abc'}} + assert isinstance(factory.get_instance(**msg), Message4) + + factory.update_schemas_by_field_type(HexField, NonNegativeNumberField) + + with pytest.raises(TypeError) as exc_info: + factory.get_instance(**msg) + exc_info.match("expected types 'int', got 'str'") diff --git a/plenum/test/input_validation/test_message_serialization.py b/plenum/test/input_validation/test_message_serialization.py new file mode 100644 index 0000000000..b958edf1cb --- /dev/null +++ b/plenum/test/input_validation/test_message_serialization.py @@ -0,0 +1,42 @@ +from plenum.common.messages.node_messages import LedgerStatus, MessageRep +from plenum.common.types import f +from stp_zmq.zstack import ZStack + + +def test_that_service_fields_not_being_serialized(): + """ + Checks that service fields of validators, like 'typename' and 'schema' ] + are excluded from serialized message + """ + + message = LedgerStatus(1,10,None,None,"AwgQhPR9cgRubttBGjRruCRMLhZFBffbejbPipj7WBBm") + serialized = ZStack.serializeMsg(message) + deserialized = ZStack.deserializeMsg(serialized) + service_fields = {'typename', 'schema', 'optional', 'nullable'} + assert service_fields - set(deserialized) == service_fields + + +def test_that_dir_returns_only_message_keys(): + message = LedgerStatus(1, 10, None, None, "AwgQhPR9cgRubttBGjRruCRMLhZFBffbejbPipj7WBBm") + assert set(dir(message)) == set(message.keys()) + +def test_serialization_of_submessages_to_dict(): + message = LedgerStatus(1, 10, None, None, "AwgQhPR9cgRubttBGjRruCRMLhZFBffbejbPipj7WBBm") + message_rep = MessageRep(**{ + f.MSG_TYPE.nm: "LEDGER_STATUS", + f.PARAMS.nm: {"ledger_id": 1}, + f.MSG.nm: message + }) + serialized_message = ZStack.serializeMsg(message).decode() + serialized_message_reply = ZStack.serializeMsg(message_rep).decode() + + # check that submessage (LedgerStatus) is serialized to the same dict as it were a common message + assert serialized_message in serialized_message_reply + + # check that de-serialized into the same message + deserialized_message = LedgerStatus(**ZStack.deserializeMsg(serialized_message)) + deserialized_submessage = LedgerStatus(**ZStack.deserializeMsg(serialized_message_reply)[f.MSG.nm]) + assert message == deserialized_message + assert message_rep.msg == deserialized_submessage + assert message == deserialized_submessage + diff --git a/plenum/test/input_validation/utils.py b/plenum/test/input_validation/utils.py index 4b6c7b7b00..c8b962e770 100644 --- a/plenum/test/input_validation/utils.py +++ b/plenum/test/input_validation/utils.py @@ -1,3 +1,6 @@ +import base58 + +from plenum.test.input_validation.constants import VALID_BASE58 def assert_valid(smth): @@ -5,4 +8,13 @@ def assert_valid(smth): def assert_invalid(smth): - assert smth \ No newline at end of file + assert smth + +B58_BY_BYTE_LENGTH = { + len(base58.b58decode(VALID_BASE58[:i])): + VALID_BASE58[:i] for i in range(1, len(VALID_BASE58) + 1) +} + + +def b58_by_len(byte_length): + return B58_BY_BYTE_LENGTH.get(byte_length) diff --git a/plenum/test/instances/helper.py b/plenum/test/instances/helper.py index d4dee5c0dd..9ed18ede61 100644 --- a/plenum/test/instances/helper.py +++ b/plenum/test/instances/helper.py @@ -2,8 +2,12 @@ from plenum.test.test_node import TestReplica -def recvdPrePrepare(replica: TestReplica): - return getAllArgs(replica, TestReplica.processPrePrepare) +def recvd_pre_prepares(replica: TestReplica): + return [arg['pp'] for arg in getAllArgs(replica, TestReplica.processPrePrepare)] + + +def processedPrePrepare(replica: TestReplica): + return getAllArgs(replica, TestReplica.addToPrePrepares) def processedPrePrepare(replica: TestReplica): @@ -18,5 +22,5 @@ def sentPrepare(replica: TestReplica, viewNo: int = None, ppSeqNo: int = None): (viewNo is None or param["pp"].viewNo == viewNo)] -def recvdPrepare(replica: TestReplica): - return getAllArgs(replica, TestReplica.processPrepare) +def recvd_prepares(replica: TestReplica): + return [arg['prepare'] for arg in getAllArgs(replica, TestReplica.processPrepare)] diff --git a/plenum/test/instances/test_instance_cannot_become_active_with_less_than_four_servers.py b/plenum/test/instances/test_instance_cannot_become_active_with_less_than_four_servers.py index a5b06d7814..8e68770126 100644 --- a/plenum/test/instances/test_instance_cannot_become_active_with_less_than_four_servers.py +++ b/plenum/test/instances/test_instance_cannot_become_active_with_less_than_four_servers.py @@ -21,6 +21,11 @@ def limitTestRunningTime(): return 200 +@pytest.fixture(scope="function", autouse=True) +def limitTestRunningTime(): + return 200 + + # noinspection PyIncorrectDocstring def testProtocolInstanceCannotBecomeActiveWithLessThanFourServers( tdir_for_func): diff --git a/plenum/test/instances/test_msgs_from_slow_instances.py b/plenum/test/instances/test_msgs_from_slow_instances.py index f30d2aa4e6..9b78150acc 100644 --- a/plenum/test/instances/test_msgs_from_slow_instances.py +++ b/plenum/test/instances/test_msgs_from_slow_instances.py @@ -1,7 +1,7 @@ import pytest from stp_core.loop.eventually import eventually -from plenum.common.types import Commit +from plenum.common.messages.node_messages import Commit from plenum.server.replica import Replica from plenum.test.delayers import delayerMsgTuple from plenum.test.test_node import TestNode diff --git a/plenum/test/instances/test_multiple_commit.py b/plenum/test/instances/test_multiple_commit.py index 7d87937da2..0b14063655 100644 --- a/plenum/test/instances/test_multiple_commit.py +++ b/plenum/test/instances/test_multiple_commit.py @@ -1,10 +1,11 @@ +import types from functools import partial import pytest from stp_core.loop.eventually import eventually -from plenum.common.types import Commit -from plenum.common.util import adict +from plenum.common.messages.node_messages import Commit +from stp_core.common.util import adict from plenum.server.suspicion_codes import Suspicions from plenum.test.helper import getNodeSuspicions, whitelistNode from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ @@ -34,6 +35,12 @@ def setup(nodeSet, up): [node for node in nodeSet if node != faultyRep.node], Suspicions.DUPLICATE_CM_SENT.code) + # If the request is ordered then COMMIT will be rejected much earlier + for r in [primaryRep, *nonPrimaryReps]: + def do_nothing(self, commit): + pass + r.doOrder = types.MethodType(do_nothing, r) + return adict(primaryRep=primaryRep, nonPrimaryReps=nonPrimaryReps, faultyRep=faultyRep) diff --git a/plenum/test/instances/test_multiple_instance_change_msgs.py b/plenum/test/instances/test_multiple_instance_change_msgs.py index 0b5c148377..2cb648521c 100644 --- a/plenum/test/instances/test_multiple_instance_change_msgs.py +++ b/plenum/test/instances/test_multiple_instance_change_msgs.py @@ -2,7 +2,7 @@ from stp_core.loop.eventually import eventually from plenum.common.exceptions import SuspiciousNode -from plenum.common.types import InstanceChange +from plenum.common.messages.node_messages import InstanceChange from plenum.server.node import Node from plenum.server.suspicion_codes import Suspicions from plenum.test.helper import getNodeSuspicions diff --git a/plenum/test/instances/test_multiple_pre_prepare.py b/plenum/test/instances/test_multiple_pre_prepare.py index d6572cf994..07cdb27311 100644 --- a/plenum/test/instances/test_multiple_pre_prepare.py +++ b/plenum/test/instances/test_multiple_pre_prepare.py @@ -3,7 +3,7 @@ import pytest from stp_core.loop.eventually import eventually -from plenum.common.types import PrePrepare +from plenum.common.messages.node_messages import PrePrepare from plenum.common.util import adict from plenum.server.suspicion_codes import Suspicions from plenum.test.helper import getNodeSuspicions diff --git a/plenum/test/instances/test_multiple_prepare.py b/plenum/test/instances/test_multiple_prepare.py index 7ef9080d82..f8dcfd7bfd 100644 --- a/plenum/test/instances/test_multiple_prepare.py +++ b/plenum/test/instances/test_multiple_prepare.py @@ -3,8 +3,8 @@ import pytest from stp_core.loop.eventually import eventually -from plenum.common.types import Prepare -from plenum.common.util import adict +from plenum.common.messages.node_messages import Prepare +from stp_core.common.util import adict from plenum.server.suspicion_codes import Suspicions from plenum.test.helper import getNodeSuspicions, whitelistNode from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ diff --git a/plenum/test/instances/test_pre_prepare_digest.py b/plenum/test/instances/test_pre_prepare_digest.py index 8f88f97971..6e2e85d1f9 100644 --- a/plenum/test/instances/test_pre_prepare_digest.py +++ b/plenum/test/instances/test_pre_prepare_digest.py @@ -3,8 +3,9 @@ import pytest from stp_core.loop.eventually import eventually -from plenum.common.types import PrePrepare +from plenum.common.messages.node_messages import PrePrepare from plenum.common.util import adict +from stp_core.common.util import adict from plenum.server.suspicion_codes import Suspicions from plenum.test.helper import getNodeSuspicions from plenum.test.instances.helper import sentPrepare diff --git a/plenum/test/instances/test_prepare_digest.py b/plenum/test/instances/test_prepare_digest.py index fd56d07ed7..e13d2302e7 100644 --- a/plenum/test/instances/test_prepare_digest.py +++ b/plenum/test/instances/test_prepare_digest.py @@ -3,7 +3,7 @@ import pytest from stp_core.loop.eventually import eventually -from plenum.common.types import Prepare +from plenum.common.messages.node_messages import Prepare from plenum.common.util import adict from plenum.server.suspicion_codes import Suspicions from plenum.test.helper import getNodeSuspicions diff --git a/plenum/test/malicious_behaviors_node.py b/plenum/test/malicious_behaviors_node.py index 26228097e5..cdfe404986 100644 --- a/plenum/test/malicious_behaviors_node.py +++ b/plenum/test/malicious_behaviors_node.py @@ -5,8 +5,9 @@ import time import plenum.common.error -from plenum.common.types import Propagate, PrePrepare, Prepare, ThreePhaseMsg, \ - Commit, Reply, f +from plenum.common.types import f + +from plenum.common.messages.node_messages import * from plenum.common.request import Request, ReqDigest from plenum.common import util @@ -16,7 +17,7 @@ from plenum.test.helper import TestReplica from plenum.test.test_node import TestNode, TestReplica, getPrimaryReplica, \ getNonPrimaryReplicas -from plenum.test.delayers import ppDelay +from plenum.test.delayers import ppDelay, cDelay logger = getlogger() @@ -45,28 +46,30 @@ def delaysPrePrepareProcessing(node, delay: float=30, instId: int=None): node.nodeIbStasher.delay(ppDelay(delay=delay, instId=instId)) +def delaysCommitProcessing(node, delay: float=30, instId: int=None): + node.nodeIbStasher.delay(cDelay(delay=delay, instId=instId)) + + # Could have this method directly take a replica rather than a node and an # instance id but this looks more useful as a complete node can be malicious def sendDuplicate3PhaseMsg(node: TestNode, msgType: ThreePhaseMsg, count: int=2, instId=None): def evilSendPrePrepareRequest(self, ppReq: PrePrepare): - # tm = time.time() - # prePrepare = PrePrepare(self.instId, self.viewNo, - # self.lastPrePrepareSeqNo+1, tm, *reqDigest) logger.debug("EVIL: Sending duplicate pre-prepare message: {}". format(ppReq)) self.sentPrePrepares[self.viewNo, self.lastPrePrepareSeqNo] = ppReq sendDup(self, ppReq, TPCStat.PrePrepareSent, count) - def evilSendPrepare(self, request): + def evilSendPrepare(self, ppReq: PrePrepare): prepare = Prepare(self.instId, - request.viewNo, - request.ppSeqNo, - request.digest, - request.stateRootHash, - request.txnRootHash) + ppReq.viewNo, + ppReq.ppSeqNo, + ppReq.ppTime, + ppReq.digest, + ppReq.stateRootHash, + ppReq.txnRootHash) logger.debug("EVIL: Creating prepare message for request {}: {}". - format(request, prepare)) + format(ppReq, prepare)) self.addToPrepares(prepare, self.name) sendDup(self, prepare, TPCStat.PrepareSent, count) @@ -123,10 +126,6 @@ def malignInstancesOfNode(node: TestNode, malignMethod, instId: int=None): def send3PhaseMsgWithIncorrectDigest(node: TestNode, msgType: ThreePhaseMsg, instId: int=None): def evilSendPrePrepareRequest(self, ppReq: PrePrepare): - # reqDigest = ReqDigest(reqDigest.identifier, reqDigest.reqId, "random") - # tm = time.time() - # prePrepare = PrePrepare(self.instId, self.viewNo, - # self.lastPrePrepareSeqNo+1, *reqDigest, tm) logger.debug("EVIL: Creating pre-prepare message for request : {}". format(ppReq)) ppReq = updateNamedTuple(ppReq, digest=ppReq.digest+'random') @@ -138,6 +137,7 @@ def evilSendPrepare(self, ppReq): prepare = Prepare(self.instId, ppReq.viewNo, ppReq.ppSeqNo, + ppReq.ppTime, digest, ppReq.stateRootHash, ppReq.txnRootHash) diff --git a/plenum/test/monitoring/test_avg_latency.py b/plenum/test/monitoring/test_avg_latency.py index b8550a2b4a..1d9020eb9b 100644 --- a/plenum/test/monitoring/test_avg_latency.py +++ b/plenum/test/monitoring/test_avg_latency.py @@ -1,4 +1,3 @@ -from stp_core.loop.eventually import eventually from stp_core.common.log import getlogger from stp_core.loop.looper import Looper from plenum.server.node import Node diff --git a/plenum/test/monitoring/test_instance_change_with_Delta.py b/plenum/test/monitoring/test_instance_change_with_Delta.py index 49a8b6f1b8..ab9e6643e2 100644 --- a/plenum/test/monitoring/test_instance_change_with_Delta.py +++ b/plenum/test/monitoring/test_instance_change_with_Delta.py @@ -2,15 +2,13 @@ import pytest -from plenum.common.types import PrePrepare -from plenum.common.util import adict +from stp_core.common.util import adict from plenum.server.node import Node from plenum.test import waits from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies from plenum.test.malicious_behaviors_node import slow_primary from plenum.test.test_node import getPrimaryReplica from plenum.test.view_change.helper import provoke_and_wait_for_view_change -from plenum.test.helper import waitForViewChange from stp_core.common.log import getlogger from stp_core.loop.eventually import eventually diff --git a/plenum/test/monitoring/test_instance_change_with_req_Lambda.py b/plenum/test/monitoring/test_instance_change_with_req_Lambda.py index aff6d4cff2..f7d221175c 100644 --- a/plenum/test/monitoring/test_instance_change_with_req_Lambda.py +++ b/plenum/test/monitoring/test_instance_change_with_req_Lambda.py @@ -1,10 +1,7 @@ -from functools import partial - import pytest -from stp_core.loop.eventually import eventually -from plenum.common.types import PrePrepare, f -from plenum.common.util import adict +from plenum.common.messages.node_messages import PrePrepare +from stp_core.common.util import adict from plenum.test.helper import waitForViewChange, \ sendReqsToNodesAndVerifySuffReplies from plenum.test.test_node import getPrimaryReplica @@ -28,17 +25,23 @@ @pytest.fixture('module') def setup(looper, tconf, startedNodes, up, wallet1, client1): + sendReqsToNodesAndVerifySuffReplies(looper, + wallet1, + client1, + numReqs=5) # Get the master replica of the master protocol instance P = getPrimaryReplica(startedNodes) - # Make `Delta` small enough so throughput check passes. - for node in startedNodes: - node.monitor.Delta = .001 - - # set LAMBDA not so huge like it set in the production config + # set LAMBDA smaller than the production config to make the test faster testLambda = 30 + delay_by = testLambda + 5 + for node in startedNodes: + # Make `Delta` small enough so throughput check passes. + node.monitor.Delta = .001 node.monitor.Lambda = testLambda + for r in node.replicas: + r.config.ACCEPTABLE_DEVIATION_PREPREPARE_SECS += delay_by slowed_request = False @@ -48,7 +51,7 @@ def specificPrePrepare(msg): nonlocal slowed_request if isinstance(msg, PrePrepare) and slowed_request is False: slowed_request = True - return testLambda + 5 # just more that LAMBDA + return delay_by # just more that LAMBDA P.outBoxTestStasher.delay(specificPrePrepare) # TODO select or create a timeout for this case in 'waits' diff --git a/plenum/test/monitoring/test_throughput.py b/plenum/test/monitoring/test_throughput.py index 415bc8d556..93c2f7ec14 100644 --- a/plenum/test/monitoring/test_throughput.py +++ b/plenum/test/monitoring/test_throughput.py @@ -1,6 +1,5 @@ from typing import Iterable -from stp_core.loop.eventually import eventually from stp_core.common.log import getlogger from plenum.server.node import Node from plenum.test.helper import sendRandomRequest, \ diff --git a/plenum/test/msgs.py b/plenum/test/msgs.py index ca76cf0615..1514203db3 100644 --- a/plenum/test/msgs.py +++ b/plenum/test/msgs.py @@ -1,13 +1,20 @@ -from plenum.common.types import TaggedTuple +from plenum.common.messages.message_base import MessageBase +from plenum.common.messages.node_message_factory import node_message_factory +from plenum.common.messages.node_messages import NonEmptyStringField from plenum.common.util import randomString -def randomMsg() -> TaggedTuple: +def randomMsg(): return TestMsg('subject ' + randomString(), 'content ' + randomString()) -TESTMSG = "TESTMSG" -TestMsg = TaggedTuple(TESTMSG, [ - ("subject", str), - ("content", str)]) \ No newline at end of file +class TestMsg(MessageBase): + typename = "TESTMSG" + schema = ( + ("subject", NonEmptyStringField()), + ("content", NonEmptyStringField()), + ) + + +node_message_factory.set_message_class(TestMsg) diff --git a/plenum/test/node_catchup/conftest.py b/plenum/test/node_catchup/conftest.py index c5ca1f68a3..a32ebbc66d 100644 --- a/plenum/test/node_catchup/conftest.py +++ b/plenum/test/node_catchup/conftest.py @@ -1,15 +1,18 @@ import pytest +from plenum.test.spy_helpers import getAllReturnVals from stp_core.loop.eventually import eventually from stp_core.common.log import getlogger from plenum.common.util import randomString from plenum.test.conftest import getValueFromModule from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies -from plenum.test.node_catchup.helper import waitNodeDataEquality +from plenum.test.node_catchup.helper import waitNodeDataEquality, \ + check_last_3pc_master from plenum.test.pool_transactions.helper import \ addNewStewardAndNode, buildPoolClientAndWallet from plenum.test.pool_transactions.conftest import stewardAndWallet1, \ - steward1, stewardWallet + steward1, stewardWallet, clientAndWallet1, client1, wallet1, \ + client1Connected from plenum.test.test_client import TestClient from plenum.test.test_node import checkNodesConnected @@ -27,7 +30,7 @@ def looper(txnPoolNodesLooper): @pytest.yield_fixture("module") -def nodeCreatedAfterSomeTxns(looper, txnPoolNodesLooper, txnPoolNodeSet, +def nodeCreatedAfterSomeTxns(looper, txnPoolNodeSet, tdirWithPoolTxns, poolTxnStewardData, tconf, allPluginsPath, request): client, wallet = buildPoolClientAndWallet(poolTxnStewardData, @@ -36,7 +39,7 @@ def nodeCreatedAfterSomeTxns(looper, txnPoolNodesLooper, txnPoolNodeSet, looper.add(client) looper.run(client.ensureConnectedToNodes()) txnCount = getValueFromModule(request, "txnCount", 5) - sendReqsToNodesAndVerifySuffReplies(txnPoolNodesLooper, + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, txnCount) @@ -64,4 +67,35 @@ def nodeSetWithNodeAddedAfterSomeTxns(txnPoolNodeSet, nodeCreatedAfterSomeTxns): def newNodeCaughtUp(txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns): looper, newNode, _, _, _, _ = nodeSetWithNodeAddedAfterSomeTxns waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:4]) + check_last_3pc_master(newNode, txnPoolNodeSet[:4]) + + # Check if catchup done once + catchup_done_once = True + for li in newNode.ledgerManager.ledgerRegistry.values(): + catchup_done_once = catchup_done_once and (li.num_txns_caught_up > 0) + + if not catchup_done_once: + # It might be the case that node has to do catchup again, in that case + # check the return value of `num_txns_caught_up_in_last_catchup` to be + # greater than 0 + + assert max(getAllReturnVals(newNode, + newNode.num_txns_caught_up_in_last_catchup)) > 0 return newNode + + +@pytest.yield_fixture("module") +def poolAfterSomeTxns(looper, txnPoolNodesLooper, txnPoolNodeSet, tdirWithPoolTxns, + poolTxnStewardData, tconf, allPluginsPath, request): + client, wallet = buildPoolClientAndWallet(poolTxnStewardData, + tdirWithPoolTxns, + clientClass=TestClient) + looper.run(checkNodesConnected(txnPoolNodeSet)) + looper.add(client) + looper.run(client.ensureConnectedToNodes()) + txnCount = getValueFromModule(request, "txnCount", 5) + sendReqsToNodesAndVerifySuffReplies(txnPoolNodesLooper, + wallet, + client, + txnCount) + yield looper, client, wallet diff --git a/plenum/test/node_catchup/helper.py b/plenum/test/node_catchup/helper.py index 5ae469a367..bca6ef8808 100644 --- a/plenum/test/node_catchup/helper.py +++ b/plenum/test/node_catchup/helper.py @@ -1,27 +1,30 @@ -from stp_zmq.zstack import KITZStack +import types + +from plenum.common.util import check_if_all_equal_in_list +from stp_core.common.log import getlogger from typing import Iterable from plenum.common.constants import POOL_LEDGER_ID, DOMAIN_LEDGER_ID from stp_core.loop.eventually import eventually from stp_core.types import HA from plenum.test.helper import checkLedgerEquality, checkStateEquality, \ - check_seqno_db_equality, assertEquality, check_last_ordered_pp_seq_no + check_seqno_db_equality, assertEquality, check_last_ordered_3pc from plenum.test.test_client import TestClient from plenum.test.test_node import TestNode from plenum.test import waits -from plenum.common import util import pytest -# TODO: This should just take an arbitrary number of nodes and check for their -# ledgers to be equal +logger = getlogger() +# TODO: This should just take an arbitrary number of nodes and check for their +# ledgers to be equal def checkNodeDataForEquality(node: TestNode, *otherNodes: Iterable[TestNode]): # Checks for node's ledgers and state's to be equal for n in otherNodes: - check_last_ordered_pp_seq_no(node, n) + check_last_ordered_3pc(node, n) check_seqno_db_equality(node.seqNoDB, n.seqNoDB) checkLedgerEquality(node.domainLedger, n.domainLedger) checkStateEquality(node.getState(DOMAIN_LEDGER_ID), n.getState(DOMAIN_LEDGER_ID)) @@ -30,8 +33,8 @@ def checkNodeDataForEquality(node: TestNode, checkStateEquality(node.getState(POOL_LEDGER_ID), n.getState(POOL_LEDGER_ID)) -def checkNodeDataForUnequality(node: TestNode, - *otherNodes: Iterable[TestNode]): +def checkNodeDataForInequality(node: TestNode, + *otherNodes: Iterable[TestNode]): # Checks for node's ledgers and state's to be unequal with pytest.raises(AssertionError): checkNodeDataForEquality(node, *otherNodes) @@ -55,10 +58,10 @@ def waitNodeDataEquality(looper, retryWait=1, timeout=timeout)) -def waitNodeDataUnequality(looper, - referenceNode: TestNode, - *otherNodes: Iterable[TestNode], - customTimeout=None): +def waitNodeDataInequality(looper, + referenceNode: TestNode, + *otherNodes: Iterable[TestNode], + customTimeout=None): """ Wait for node ledger to become equal @@ -67,12 +70,18 @@ def waitNodeDataUnequality(looper, numOfNodes = len(otherNodes) + 1 timeout = customTimeout or waits.expectedPoolGetReadyTimeout(numOfNodes) - looper.run(eventually(checkNodeDataForUnequality, + looper.run(eventually(checkNodeDataForInequality, referenceNode, *otherNodes, retryWait=1, timeout=timeout)) +def ensure_all_nodes_have_same_data(looper, nodes, custom_timeout=None): + node = next(iter(nodes)) + other_nodes = [n for n in nodes if n != node] + waitNodeDataEquality(looper, node, *other_nodes, customTimeout=custom_timeout) + + def ensureNewNodeConnectedClient(looper, client: TestClient, node: TestNode): stackParams = node.clientStackParams client.nodeReg[stackParams['name']] = HA('127.0.0.1', stackParams['ha'][1]) @@ -93,9 +102,51 @@ def ensureClientConnectedToNodesAndPoolLedgerSame(looper, looper.run(eventually(checkClientPoolLedgerSameAsNodes, client, *nodes, + retryWait=.5, timeout=timeout)) def check_ledger_state(node, ledger_id, ledger_state): assertEquality(node.ledgerManager.getLedgerInfoByType(ledger_id).state, - ledger_state) \ No newline at end of file + ledger_state) + + +def check_last_3pc_master(node, other_nodes): + last_3pc = [node.replicas[0].last_ordered_3pc] + for n in other_nodes: + last_3pc.append(n.replicas[0].last_ordered_3pc) + assert check_if_all_equal_in_list(last_3pc) + + +def make_a_node_catchup_twice(target_node, other_nodes, ledger_id, shorten_by): + """ + All `other_nodes` make the `node` catchup multiple times by serving + consistency proof of a ledger smaller by `shorten_by` txns + """ + nodes_to_send_proof_of_small_ledger = {n.name for n in other_nodes} + orig_methods = {} + for node in other_nodes: + orig_methods[node.name] = node.ledgerManager._buildConsistencyProof + + def patched_method(self, ledgerId, seqNoStart, seqNoEnd): + if self.owner.name in nodes_to_send_proof_of_small_ledger: + import inspect + curframe = inspect.currentframe() + calframe = inspect.getouterframes(curframe, 2) + # For domain ledger, send a proof for a small ledger to the bad node + if calframe[1][ + 3] == node.ledgerManager.getConsistencyProof.__name__ \ + and calframe[2].frame.f_locals['frm'] == target_node.name \ + and ledgerId == ledger_id: + # Pop so this node name, so proof for smaller ledger is not + # served again + nodes_to_send_proof_of_small_ledger.remove(self.owner.name) + logger.debug('{} sending a proof to {} for {} instead ' + 'of {}'.format(self.owner.name, target_node.name, + seqNoEnd - shorten_by, seqNoEnd)) + return orig_methods[node.name](ledgerId, seqNoStart, + seqNoEnd - shorten_by) + return orig_methods[node.name](ledgerId, seqNoStart, seqNoEnd) + + node.ledgerManager._buildConsistencyProof = types.MethodType( + patched_method, node.ledgerManager) \ No newline at end of file diff --git a/plenum/test/node_catchup/test_catchup_delayed_nodes.py b/plenum/test/node_catchup/test_catchup_delayed_nodes.py index ae588a7bb7..f74ede57a6 100644 --- a/plenum/test/node_catchup/test_catchup_delayed_nodes.py +++ b/plenum/test/node_catchup/test_catchup_delayed_nodes.py @@ -1,6 +1,5 @@ import pytest -from stp_core.loop.eventually import eventually from stp_core.common.log import getlogger from plenum.test import waits diff --git a/plenum/test/node_catchup/test_catchup_demoted.py b/plenum/test/node_catchup/test_catchup_demoted.py index fcd12c0b7d..4065a4fd70 100644 --- a/plenum/test/node_catchup/test_catchup_demoted.py +++ b/plenum/test/node_catchup/test_catchup_demoted.py @@ -1,22 +1,24 @@ from plenum.common.constants import ALIAS, SERVICES, VALIDATOR from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies -from plenum.test.node_catchup.conftest import whitelist from plenum.test.node_catchup.helper import waitNodeDataEquality, \ - checkNodeDataForUnequality, checkNodeDataForEquality + checkNodeDataForInequality from plenum.test.pool_transactions.helper import \ updateNodeData from stp_core.common.log import getlogger -# Logger.setLogLevel(logging.WARNING) +from plenum.test.node_catchup.conftest import whitelist + logger = getlogger() def test_catch_up_after_demoted(txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns): - # 1. add a new node after sending some txns and check that catch-up is done (the new node is up to date) - looper, newNode, client, wallet, newStewardClient, newStewardWallet = nodeSetWithNodeAddedAfterSomeTxns + logger.info("1. add a new node after sending some txns and check that catch-up " + "is done (the new node is up to date)") + looper, newNode, client, wallet, newStewardClient, \ + newStewardWallet = nodeSetWithNodeAddedAfterSomeTxns waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:4]) - # 2. turn the new node off (demote) + logger.info("2. turn the new node off (demote)") node_data = { ALIAS: newNode.name, SERVICES: [] @@ -25,11 +27,12 @@ def test_catch_up_after_demoted(txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxn newStewardWallet, newNode, node_data) - # 3. send more requests, so that the new node's state is outdated + logger.info("3. send more requests, " + "so that the new node's state is outdated") sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5) - checkNodeDataForUnequality(newNode, *txnPoolNodeSet[:-1]) + checkNodeDataForInequality(newNode, *txnPoolNodeSet[:-1]) - # 4. turn the new node on + logger.info("4. turn the new node on") node_data = { ALIAS: newNode.name, SERVICES: [VALIDATOR] @@ -38,9 +41,11 @@ def test_catch_up_after_demoted(txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxn newStewardWallet, newNode, node_data) - # 5. make sure catch-up is done (the new node is up to date again) + logger.info("5. make sure catch-up is done " + "(the new node is up to date again)") waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1]) - # 6. send more requests and make sure that the new node participates in processing them + logger.info("6. send more requests and make sure " + "that the new node participates in processing them") sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 10) - checkNodeDataForEquality(newNode, *txnPoolNodeSet[:-1]) + waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1]) diff --git a/plenum/test/node_catchup/test_catchup_f_plus_one.py b/plenum/test/node_catchup/test_catchup_f_plus_one.py new file mode 100644 index 0000000000..5dbc9a9f43 --- /dev/null +++ b/plenum/test/node_catchup/test_catchup_f_plus_one.py @@ -0,0 +1,53 @@ +from stp_core.common.log import getlogger +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies +from plenum.test.node_catchup.helper import waitNodeDataEquality, \ + waitNodeDataInequality, checkNodeDataForEquality, check_last_3pc_master +from plenum.test.pool_transactions.helper import \ + disconnect_node_and_ensure_disconnected, reconnect_node_and_ensure_connected + +# Do not remove the next import +from plenum.test.node_catchup.conftest import whitelist + +logger = getlogger() +txnCount = 5 + + +def testNodeCatchupFPlusOne(txnPoolNodeSet, poolAfterSomeTxns): + """ + Check that f+1 nodes is enough for catchup + """ + looper, client, wallet = poolAfterSomeTxns + + assert len(txnPoolNodeSet) == 4 + + node1 = txnPoolNodeSet[-1] + node0 = txnPoolNodeSet[-2] + + logger.debug("Stopping node0 with pool ledger size {}". + format(node0.poolManager.txnSeqNo)) + disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node0, stopNode=False) + looper.removeProdable(node0) + + logger.debug("Sending requests") + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5) + + logger.debug("Stopping node1 with pool ledger size {}". + format(node1.poolManager.txnSeqNo)) + disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node1, stopNode=False) + looper.removeProdable(node1) + + # Make sure new node got out of sync + waitNodeDataInequality(looper, node0, *txnPoolNodeSet[:-2]) + + # TODO: Check if the node has really stopped processing requests? + + logger.debug("Starting the stopped node0") + looper.add(node0) + reconnect_node_and_ensure_connected(looper, txnPoolNodeSet[:-1], node0) + + logger.debug("Waiting for the node0 to catch up") + waitNodeDataEquality(looper, node0, *txnPoolNodeSet[:-2]) + + logger.debug("Sending more requests") + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 2) + checkNodeDataForEquality(node0, *txnPoolNodeSet[:-2]) \ No newline at end of file diff --git a/plenum/test/node_catchup/test_catchup_inlcuding_3PC.py b/plenum/test/node_catchup/test_catchup_inlcuding_3PC.py new file mode 100644 index 0000000000..c851a9c0ff --- /dev/null +++ b/plenum/test/node_catchup/test_catchup_inlcuding_3PC.py @@ -0,0 +1,69 @@ +import pytest + +from plenum.common.constants import DOMAIN_LEDGER_ID +from plenum.common.util import check_if_all_equal_in_list +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies +from plenum.test.node_catchup.helper import check_last_3pc_master, \ + waitNodeDataEquality +from plenum.test.pool_transactions.helper import buildPoolClientAndWallet +from plenum.test.test_client import TestClient +from stp_core.loop.eventually import eventually + + +def chk_if_equal_txn_to_3pc(nodes, count=None): + txn_to_tpc = [] + for node in nodes: + txn_to_tpc.append(node.txn_seq_range_to_3phase_key[DOMAIN_LEDGER_ID]) + assert check_if_all_equal_in_list(txn_to_tpc) + if count is not None: + assert len(txn_to_tpc[0]) == count + + +@pytest.fixture("module") +def tconf(tconf, request): + old_size = tconf.ProcessedBatchMapsToKeep + tconf.ProcessedBatchMapsToKeep = 5 + + def reset(): + tconf.ProcessedBatchMapsToKeep = old_size + + request.addfinalizer(reset) + return tconf + + +@pytest.fixture("module") +def pre_check(tconf, looper, txnPoolNodeSet, tdirWithPoolTxns, + poolTxnStewardData): + # TODO: Maybe this needs to be extracted in another fixture + client, wallet = buildPoolClientAndWallet(poolTxnStewardData, + tdirWithPoolTxns, + clientClass=TestClient) + looper.add(client) + looper.run(client.ensureConnectedToNodes()) + for i in range(tconf.ProcessedBatchMapsToKeep-1): + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 1) + + # All node maintain the same map from txn range to 3PC + looper.run(eventually(chk_if_equal_txn_to_3pc, txnPoolNodeSet)) + for i in range(3): + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 1) + + # All node maintain the same map from txn range to 3PC and its equal to + # `tconf.ProcessedBatchMapsToKeep` even after sending more batches than + # `tconf.ProcessedBatchMapsToKeep`, which shows the garbage cleaning in + # action + looper.run(eventually(chk_if_equal_txn_to_3pc, txnPoolNodeSet, + tconf.ProcessedBatchMapsToKeep)) + + +def test_nodes_maintain_master_txn_3PC_map(looper, txnPoolNodeSet, pre_check, + nodeCreatedAfterSomeTxns): + _, new_node, client, wallet, _, _ = nodeCreatedAfterSomeTxns + txnPoolNodeSet.append(new_node) + waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:4]) + # Check the new node has set same `last_3pc_ordered` for master as others + check_last_3pc_master(new_node, txnPoolNodeSet[:4]) + chk_if_equal_txn_to_3pc(txnPoolNodeSet[:4]) + # Requests still processed + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 2) + waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:4]) diff --git a/plenum/test/node_catchup/test_catchup_scenarios.py b/plenum/test/node_catchup/test_catchup_scenarios.py index 787251c6d8..0f1cd4fa1a 100644 --- a/plenum/test/node_catchup/test_catchup_scenarios.py +++ b/plenum/test/node_catchup/test_catchup_scenarios.py @@ -30,7 +30,7 @@ def nodeStashingOrderedRequests(txnPoolNodeSet, nodeCreatedAfterSomeTxns): def stashing(): assert newNode.mode != Mode.participating assert len(newNode.stashedOrderedReqs) > 0 - assert len(newNode.reqsFromCatchupReplies) > 0 + # assert len(newNode.reqsFromCatchupReplies) > 0 timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) looper.run(eventually(stashing, retryWait=1, timeout=timeout)) diff --git a/plenum/test/node_catchup/test_catchup_while_new_request_incoming.py b/plenum/test/node_catchup/test_catchup_while_new_request_incoming.py index 92adbc96fc..2f4f4992ee 100644 --- a/plenum/test/node_catchup/test_catchup_while_new_request_incoming.py +++ b/plenum/test/node_catchup/test_catchup_while_new_request_incoming.py @@ -1,14 +1,15 @@ import types from plenum.common.constants import DOMAIN_LEDGER_ID -from plenum.common.types import CatchupReq, f +from plenum.common.types import f +from plenum.common.messages.node_messages import CatchupReq from plenum.common.util import randomString from plenum.test.delayers import cqDelay from plenum.test.helper import sendRandomRequests, \ sendReqsToNodesAndVerifySuffReplies from plenum.test.node_catchup.helper import checkNodeDataForEquality from plenum.test.pool_transactions.helper import addNewStewardAndNode -from plenum.test.test_node import checkNodesConnected, TestNode +from plenum.test.test_node import TestNode from stp_core.loop.eventually import eventually @@ -37,7 +38,7 @@ def chkAfterCall(self, req, frm): chkAfterCall, node.ledgerManager) node.nodeIbStasher.delay(cqDelay(3)) - print('Sending 10 requests') + print('Sending 5 requests') sendRandomRequests(stewardWallet, steward1, 5) looper.runFor(1) newStewardName = randomString() diff --git a/plenum/test/node_catchup/test_discard_view_no.py b/plenum/test/node_catchup/test_discard_view_no.py index 197a984824..dedacb8132 100644 --- a/plenum/test/node_catchup/test_discard_view_no.py +++ b/plenum/test/node_catchup/test_discard_view_no.py @@ -1,11 +1,8 @@ import time -from functools import partial - -import pytest +from plenum.common.util import get_utc_epoch from stp_core.loop.eventually import eventually -from plenum.common.types import Nomination, PrePrepare -from plenum.common.util import randomString +from plenum.common.messages.node_messages import PrePrepare from plenum.common.constants import DOMAIN_LEDGER_ID from plenum.test.helper import checkDiscardMsg from plenum.test.view_change.helper import ensure_view_change @@ -18,12 +15,13 @@ whitelist = ['found legacy entry'] # warnings + def testNodeDiscardMessageFromUnknownView(txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns, newNodeCaughtUp, tdirWithPoolTxns, tconf, allPluginsPath): """ - Node discards 3-phase and election messages from view nos that it does not + Node discards 3-phase or ViewChangeDone messages from view nos that it does not know of (view nos before it joined the pool) :return: """ @@ -34,31 +32,23 @@ def testNodeDiscardMessageFromUnknownView(txnPoolNodeSet, # at least two less than node's. Current protocol implementation # needs to hold messages from the previous view as well as # from the current view. - ensure_view_change(looper, txnPoolNodeSet, client, wallet) - ensure_view_change(looper, txnPoolNodeSet, client, wallet) + for i in range(2): + ensure_view_change(looper, txnPoolNodeSet) + waitNodeDataEquality(looper, nodeX, *txnPoolNodeSet[:-1]) + checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) - newStewardName = "testClientSteward" + randomString(3) - nodeName = "Theta" - _, _, nodeTheta = addNewStewardAndNode(looper, client, - wallet, - newStewardName, - nodeName, - tdirWithPoolTxns, tconf, - allPluginsPath) - txnPoolNodeSet.append(nodeTheta) - looper.run(checkNodesConnected(txnPoolNodeSet)) - looper.run(client.ensureConnectedToNodes()) - waitNodeDataEquality(looper, nodeTheta, *txnPoolNodeSet[:-1]) - checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) - electMsg = Nomination(nodeX.name, 0, viewNo, - nodeX.replicas[0].lastOrderedPPSeqNo) + sender = txnPoolNodeSet[0] + rid_x_node = sender.nodestack.getRemote(nodeX.name).uid + messageTimeout = waits.expectedNodeToNodeMessageDeliveryTime() + + # 3 pc msg (PrePrepare) needs to be discarded primaryRepl = getPrimaryReplica(txnPoolNodeSet) - threePMsg = PrePrepare( + three_pc = PrePrepare( 0, viewNo, 10, - time.time(), + get_utc_epoch(), [[wallet.defaultId, wallet._getIdData().lastReqId+1]], 1, "random digest", @@ -66,14 +56,14 @@ def testNodeDiscardMessageFromUnknownView(txnPoolNodeSet, primaryRepl.stateRootHash(DOMAIN_LEDGER_ID), primaryRepl.txnRootHash(DOMAIN_LEDGER_ID), ) - ridTheta = nodeX.nodestack.getRemote(nodeTheta.name).uid - nodeX.send(electMsg, ridTheta) - - messageTimeout = waits.expectedNodeToNodeMessageDeliveryTime() - looper.run(eventually(checkDiscardMsg, [nodeTheta, ], electMsg, - 'un-acceptable viewNo', - retryWait=1, timeout=messageTimeout)) - nodeX.send(threePMsg, ridTheta) - looper.run(eventually(checkDiscardMsg, [nodeTheta, ], threePMsg, + sender.send(three_pc, rid_x_node) + looper.run(eventually(checkDiscardMsg, [nodeX, ], three_pc, 'un-acceptable viewNo', retryWait=1, timeout=messageTimeout)) + + # TODO: the same check for ViewChangeDone + + + + + diff --git a/plenum/test/node_catchup/test_new_node_catchup.py b/plenum/test/node_catchup/test_new_node_catchup.py index ed0cf20a7d..e6b71c2e9e 100644 --- a/plenum/test/node_catchup/test_new_node_catchup.py +++ b/plenum/test/node_catchup/test_new_node_catchup.py @@ -3,17 +3,18 @@ import pytest from plenum.common.constants import DOMAIN_LEDGER_ID, LedgerState -from plenum.common.util import updateNamedTuple -from plenum.test.delayers import cqDelay, cr_delay -from stp_zmq.zstack import KITZStack +from plenum.test.delayers import cr_delay +from plenum.test.spy_helpers import get_count from stp_core.loop.eventually import eventually from plenum.common.types import HA from stp_core.common.log import getlogger -from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, \ + check_last_ordered_3pc from plenum.test.node_catchup.helper import waitNodeDataEquality, \ check_ledger_state -from plenum.test.pool_transactions.helper import disconnect_node_and_ensure_disconnected +from plenum.test.pool_transactions.helper import \ + disconnect_node_and_ensure_disconnected from plenum.test.test_ledger_manager import TestLedgerManager from plenum.test.test_node import checkNodesConnected, TestNode from plenum.test import waits @@ -27,7 +28,7 @@ def testNewNodeCatchup(newNodeCaughtUp): """ - A new node that joins after some transactions should eventually get + A new node that joins after some transactions are done should eventually get those transactions. TODO: Test correct statuses are exchanged TODO: Test correct consistency proofs are generated @@ -99,7 +100,8 @@ def testNodeCatchupAfterRestart(newNodeCaughtUp, txnPoolNodeSet, tconf, # txnPoolNodeSet[:4], retryWait=1, timeout=5)) # TODO: Check if the node has really stopped processing requests? logger.debug("Sending requests") - sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5) + more_requests = 5 + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, more_requests) logger.debug("Starting the stopped node, {}".format(newNode)) nodeHa, nodeCHa = HA(*newNode.nodestack.ha), HA(*newNode.clientstack.ha) newNode = TestNode(newNode.name, basedirpath=tdirWithPoolTxns, config=tconf, @@ -117,7 +119,8 @@ def testNodeCatchupAfterRestart(newNodeCaughtUp, txnPoolNodeSet, tconf, LedgerState.syncing, retryWait=.5, timeout=5)) confused_node = txnPoolNodeSet[0] - cp = newNode.ledgerManager.ledgerRegistry[DOMAIN_LEDGER_ID].catchUpTill + new_node_ledger = newNode.ledgerManager.ledgerRegistry[DOMAIN_LEDGER_ID] + cp = new_node_ledger.catchUpTill start, end = cp.seqNoStart, cp.seqNoEnd cons_proof = confused_node.ledgerManager._buildConsistencyProof( DOMAIN_LEDGER_ID, start, end) @@ -147,10 +150,42 @@ def send_and_chk(ledger_state): # Not accurate timeout but a conservative one timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) + \ 2*delay_catchup_reply - waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:4], + waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1], customTimeout=timeout) - + assert new_node_ledger.num_txns_caught_up == more_requests send_and_chk(LedgerState.synced) - # cons_proof = updateNamedTuple(cons_proof, seqNoEnd=cons_proof.seqNoStart, - # seqNoStart=cons_proof.seqNoEnd) - # send_and_chk(LedgerState.synced) + + +def testNodeCatchupAfterRestart1(newNodeCaughtUp, txnPoolNodeSet, tconf, + nodeSetWithNodeAddedAfterSomeTxns, + tdirWithPoolTxns, allPluginsPath): + """ + A node restarts but no transactions have happened while it was down. + It would then use the `LedgerStatus` to catchup + """ + looper, new_node, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns + + logger.debug("Stopping node {} with pool ledger size {}". + format(new_node, new_node.poolManager.txnSeqNo)) + disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, new_node) + looper.removeProdable(name=new_node.name) + + logger.debug("Starting the stopped node, {}".format(new_node)) + nodeHa, nodeCHa = HA(*new_node.nodestack.ha), HA(*new_node.clientstack.ha) + new_node = TestNode(new_node.name, basedirpath=tdirWithPoolTxns, config=tconf, + ha=nodeHa, cliha=nodeCHa, pluginPaths=allPluginsPath) + looper.add(new_node) + txnPoolNodeSet[-1] = new_node + looper.run(checkNodesConnected(txnPoolNodeSet)) + + def chk(): + for node in txnPoolNodeSet[:-1]: + check_last_ordered_3pc(new_node, node) + + looper.run(eventually(chk, retryWait=1)) + + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5) + waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) + # Did not receive any consistency proofs + assert get_count(new_node.ledgerManager, + new_node.ledgerManager.processConsistencyProof) == 0 diff --git a/plenum/test/node_catchup/test_new_node_catchup2.py b/plenum/test/node_catchup/test_new_node_catchup2.py index 4d31a4db61..e6a0b5bfb0 100644 --- a/plenum/test/node_catchup/test_new_node_catchup2.py +++ b/plenum/test/node_catchup/test_new_node_catchup2.py @@ -1,18 +1,12 @@ -from itertools import chain -from time import perf_counter - import pytest -from plenum.common.constants import DOMAIN_LEDGER_ID, LedgerState -from plenum.common.util import updateNamedTuple -from plenum.test.delayers import cqDelay, cr_delay +from plenum.test import waits +from plenum.test.delayers import cqDelay +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies +from plenum.test.node_catchup.helper import waitNodeDataEquality +from plenum.test.node_request.node_request_helper import chk_commits_prepares_recvd from plenum.test.test_node import ensureElectionsDone -from stp_zmq.zstack import KITZStack - from stp_core.common.log import getlogger -from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies -from plenum.test.node_catchup.helper import waitNodeDataEquality, \ - check_ledger_state # Do not remove the next import from plenum.test.node_catchup.conftest import whitelist @@ -45,36 +39,22 @@ def testNodeDoesNotParticipateUntilCaughtUp(txnPoolNodeSet, nodeCreatedAfterSomeTxns txnPoolNodeSet.append(new_node) old_nodes = txnPoolNodeSet[:-1] - ensureElectionsDone(looper, txnPoolNodeSet) sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5) - new_node_replica_names = {r.instId: r.name for r in new_node.replicas} - - def chk_commits_prepares_recvd(count): - counts = {} - for node in old_nodes: - for replica in node.replicas: - if replica.instId not in counts: - counts[replica.instId] = 0 - nm = new_node_replica_names[replica.instId] - for commit in replica.commits.values(): - counts[replica.instId] += int(nm in commit.voters) - for prepare in replica.prepares.values(): - counts[replica.instId] += int(nm in prepare.voters) - for c in counts.values(): - assert count == c - - chk_commits_prepares_recvd(0) + chk_commits_prepares_recvd(0, old_nodes, new_node) for node in old_nodes: - node.resetDelays() - node.force_process_delayeds() - - waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) + node.reset_delays_and_process_delayeds() - looper.runFor(20) + timeout = waits.expectedPoolCatchupTime(len(txnPoolNodeSet)) + \ + catchup_delay + \ + waits.expectedPoolElectionTimeout(len(txnPoolNodeSet)) + ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=timeout) + waitNodeDataEquality(looper, new_node, *old_nodes) sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 2) # Commits and Prepares are received by all old nodes - chk_commits_prepares_recvd(2 * (len(old_nodes))) - waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:4]) + with pytest.raises(AssertionError): + # Since nodes discard 3PC messages for already ordered requests. + chk_commits_prepares_recvd(0, old_nodes, new_node) + waitNodeDataEquality(looper, new_node, *old_nodes) diff --git a/plenum/test/node_catchup/test_no_catchup_if_got_from_3pc.py b/plenum/test/node_catchup/test_no_catchup_if_got_from_3pc.py new file mode 100644 index 0000000000..2af4ff5f44 --- /dev/null +++ b/plenum/test/node_catchup/test_no_catchup_if_got_from_3pc.py @@ -0,0 +1,70 @@ +from plenum.common.constants import DOMAIN_LEDGER_ID +from plenum.common.messages.node_messages import Commit, ConsistencyProof +from plenum.test.delayers import cpDelay, cDelay +from plenum.test.pool_transactions.conftest import clientAndWallet1, \ + client1, wallet1, client1Connected, looper + +from plenum.test.helper import send_reqs_batches_and_get_suff_replies +from plenum.test.node_catchup.helper import ensure_all_nodes_have_same_data, \ + waitNodeDataInequality, waitNodeDataEquality +from plenum.test.primary_selection.test_primary_selection_pool_txn import \ + ensure_pool_functional +from plenum.test.spy_helpers import getAllReturnVals +from plenum.test.test_node import getNonPrimaryReplicas +from plenum.test.view_change.helper import ensure_view_change + + +def test_no_catchup_if_got_from_3pc(looper, txnPoolNodeSet, wallet1, client1, + client1Connected): + """ + A node is slow to receive COMMIT messages so after a view change it + starts catchup. But before it can start requesting txns, the COMMITs messages + are received and are ordered. The node should not request any transactions. + :return: + """ + send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, 2 * 3, 3) + ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) + slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node + other_nodes = [n for n in txnPoolNodeSet if n != slow_node] + + delay_cm = 30 + delat_cp = 40 + slow_node.nodeIbStasher.delay(cDelay(delay_cm)) + # The slow node receives consistency proofs after some delay, this delay + # gives the opportunity to deliver all 3PC messages + slow_node.nodeIbStasher.delay(cpDelay(delat_cp)) + + # Count of `getCatchupReqs` which is called to construct the `CatchupReq` + # to be sent + domain_cr_count = lambda: sum(1 for entry in + slow_node.ledgerManager.spylog.getAll( + slow_node.ledgerManager.getCatchupReqs) if + entry.params['consProof'].ledgerId == DOMAIN_LEDGER_ID) + + old_count = domain_cr_count() + sent_batches = 10 + send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, + 2 * sent_batches, sent_batches) + ensure_view_change(looper, nodes=txnPoolNodeSet) + + # After view change, the `slow_node` is behind + waitNodeDataInequality(looper, slow_node, *other_nodes) + + # Unstash only COMMIT messages + slow_node.nodeIbStasher.reset_delays_and_process_delayeds(Commit.__name__) + + looper.runFor(2) + + slow_node.nodeIbStasher.reset_delays_and_process_delayeds( + ConsistencyProof.__name__) + + waitNodeDataEquality(looper, slow_node, *other_nodes) + + # No `CatchupReq`s constructed, hence no `CatchupReq`s could have + # been sent + assert domain_cr_count() == old_count + # Some stashed ordered requests have been processed + rv = getAllReturnVals(slow_node, slow_node.processStashedOrderedReqs) + assert sent_batches in rv + + ensure_pool_functional(looper, txnPoolNodeSet, wallet1, client1) diff --git a/plenum/test/node_catchup/test_node_catchup_after_checkpoints.py b/plenum/test/node_catchup/test_node_catchup_after_checkpoints.py new file mode 100644 index 0000000000..73095f569a --- /dev/null +++ b/plenum/test/node_catchup/test_node_catchup_after_checkpoints.py @@ -0,0 +1,83 @@ +from logging import getLogger + +import pytest + +from plenum.common.constants import DOMAIN_LEDGER_ID +from plenum.test.delayers import cDelay, delay_3pc_messages + +from plenum.test.helper import send_reqs_batches_and_get_suff_replies +from plenum.test.node_catchup.helper import waitNodeDataInequality, waitNodeDataEquality +from plenum.test.test_node import getNonPrimaryReplicas + + +logger = getLogger() + + +def test_node_catchup_after_checkpoints(looper, txnPoolNodeSet, chk_freq_patched, + wallet1, client1, client1Connected, + slow_node_and_others): + """ + For some reason a node misses 3pc messages but eventually the node stashes + some amount checkpoints and decides to catchup. + """ + slow_node, other_nodes = slow_node_and_others + completed_catchups_before = get_number_of_completed_catchups(slow_node) + + logger.info("Step 1: Send less than required for start the catchup procedure" + "and check the slow node falls behind") + batches_num = 2 * chk_freq_patched - 1 + send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, + num_reqs=batches_num, + num_batches=batches_num, + ) + waitNodeDataInequality(looper, slow_node, *other_nodes) + + logger.info("Step 2: Send remaining requests in order to trigger the catchup" + "procedure for the slow node, then check data equality") + send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, + num_reqs=1 + ) + waitNodeDataEquality(looper, slow_node, *other_nodes) + # check if there was a catchup + completed_catchups_after = get_number_of_completed_catchups(slow_node) + assert completed_catchups_after == completed_catchups_before + 1 + + +@pytest.fixture +def slow_node_and_others(txnPoolNodeSet): + node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node + other = [n for n in txnPoolNodeSet if n != node] + + delay = 1000 + logger.info("Delay 3pc messages for {} on {} sec".format(node, delay)) + + node.nodeIbStasher.delay( + cDelay(delay_3pc_messages([node, ], inst_id=None, delay=delay)) + ) + return node, other + + +@pytest.fixture(scope="module") +def chk_freq_patched(tconf, request): + oldChkFreq = tconf.CHK_FREQ + oldLogSize = tconf.LOG_SIZE + + tconf.CHK_FREQ = 2 + tconf.LOG_SIZE = 2*tconf.CHK_FREQ + + def reset(): + tconf.CHK_FREQ = oldChkFreq + tconf.LOG_SIZE = oldLogSize + + request.addfinalizer(reset) + + return tconf.CHK_FREQ + + +def get_number_of_completed_catchups(node): + cnt = 0 + for entry in node.ledgerManager.spylog.getAll(node.ledgerManager.catchupCompleted): + if entry.params['ledgerId'] == DOMAIN_LEDGER_ID: + cnt += 1 + return cnt + diff --git a/plenum/test/node_catchup/test_node_catchup_after_disconnect.py b/plenum/test/node_catchup/test_node_catchup_after_disconnect.py index 7ec2bb8185..bb39f6294d 100644 --- a/plenum/test/node_catchup/test_node_catchup_after_disconnect.py +++ b/plenum/test/node_catchup/test_node_catchup_after_disconnect.py @@ -1,7 +1,9 @@ from stp_core.common.log import getlogger from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies -from plenum.test.node_catchup.helper import waitNodeDataEquality, waitNodeDataUnequality, checkNodeDataForEquality -from plenum.test.pool_transactions.helper import disconnect_node_and_ensure_disconnected, reconnect_node_and_ensure_connected +from plenum.test.node_catchup.helper import waitNodeDataEquality, \ + waitNodeDataInequality, checkNodeDataForEquality +from plenum.test.pool_transactions.helper import \ + disconnect_node_and_ensure_disconnected, reconnect_node_and_ensure_connected # Do not remove the next import from plenum.test.node_catchup.conftest import whitelist @@ -29,7 +31,7 @@ def testNodeCatchupAfterDisconnect(newNodeCaughtUp, txnPoolNodeSet, logger.debug("Sending requests") sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5) # Make sure new node got out of sync - waitNodeDataUnequality(looper, newNode, *txnPoolNodeSet[:-1]) + waitNodeDataInequality(looper, newNode, *txnPoolNodeSet[:-1]) logger.debug("Starting the stopped node, {}".format(newNode)) looper.add(newNode) diff --git a/plenum/test/node_catchup/test_node_catchup_after_lost_connection.py b/plenum/test/node_catchup/test_node_catchup_after_lost_connection.py index 9b44681991..35d2b224f5 100644 --- a/plenum/test/node_catchup/test_node_catchup_after_lost_connection.py +++ b/plenum/test/node_catchup/test_node_catchup_after_lost_connection.py @@ -1,7 +1,10 @@ -from stp_core.common.log import getlogger +from plenum.test.pool_transactions.helper import \ + disconnect_node_and_ensure_disconnected from plenum.test.test_node import ensure_node_disconnected +from stp_core.common.log import getlogger from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies -from plenum.test.node_catchup.helper import waitNodeDataEquality, waitNodeDataUnequality, checkNodeDataForEquality +from plenum.test.node_catchup.helper import waitNodeDataEquality, \ + waitNodeDataInequality, checkNodeDataForEquality # Do not remove the next import from plenum.test.node_catchup.conftest import whitelist @@ -14,25 +17,29 @@ def testNodeCatchupAfterLostConnection(newNodeCaughtUp, txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns): """ - A node that has poor internet connection and got unsynced after some transactions should eventually get the - transactions which happened while it was not accessible + A node that has poor internet connection and got unsynced after some + transactions should eventually get the transactions which happened while + it was not accessible :return: """ looper, newNode, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns - logger.debug("Stopping node {} with pool ledger size {}". - format(newNode, newNode.poolManager.txnSeqNo)) + logger.debug("Disconnecting node {}, ledger size {}". + format(newNode, newNode.domainLedger.size)) + disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, newNode, + stopNode=False) looper.removeProdable(newNode) # TODO: Check if the node has really stopped processing requests? logger.debug("Sending requests") sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5) # Make sure new node got out of sync - waitNodeDataUnequality(looper, newNode, *txnPoolNodeSet[:-1]) + waitNodeDataInequality(looper, newNode, *txnPoolNodeSet[:-1]) - logger.debug("Ensure node {} gets disconnected".format(newNode)) + # logger.debug("Ensure node {} gets disconnected".format(newNode)) ensure_node_disconnected(looper, newNode, txnPoolNodeSet[:-1]) - logger.debug("Starting the stopped node, {}".format(newNode)) + logger.debug("Connecting the node {} back, ledger size {}". + format(newNode, newNode.domainLedger.size)) looper.add(newNode) logger.debug("Waiting for the node to catch up, {}".format(newNode)) diff --git a/plenum/test/node_catchup/test_node_catchup_and_view_change_after_start.py b/plenum/test/node_catchup/test_node_catchup_and_view_change_after_start.py new file mode 100644 index 0000000000..1e692346f3 --- /dev/null +++ b/plenum/test/node_catchup/test_node_catchup_and_view_change_after_start.py @@ -0,0 +1 @@ +# TODO: Write a test for View change timeout on node startup \ No newline at end of file diff --git a/plenum/test/node_catchup/test_node_reject_invalid_txn_during_catchup.py b/plenum/test/node_catchup/test_node_reject_invalid_txn_during_catchup.py index 1a007980c5..fd4d4db6b4 100644 --- a/plenum/test/node_catchup/test_node_reject_invalid_txn_during_catchup.py +++ b/plenum/test/node_catchup/test_node_reject_invalid_txn_during_catchup.py @@ -1,14 +1,16 @@ +import pytest import types from plenum.common.ledger import Ledger from stp_core.common.log import getlogger from plenum.common.constants import TXN_TYPE, DOMAIN_LEDGER_ID -from plenum.common.types import CatchupReq, f, CatchupRep -from plenum.test.helper import sendRandomRequests +from plenum.common.messages.node_messages import CatchupReq, CatchupRep +from plenum.common.types import f from plenum.test.node_catchup.helper import waitNodeDataEquality from plenum.test.test_node import checkNodesConnected, getNonPrimaryReplicas from plenum.test import waits + # Do not remove the next import from plenum.test.node_catchup.conftest import whitelist @@ -18,7 +20,8 @@ txnCount = 10 -def testNodeRejectingInvalidTxns(txnPoolNodeSet, nodeCreatedAfterSomeTxns): +def testNodeRejectingInvalidTxns(conf, txnPoolNodeSet, patched_node, + nodeCreatedAfterSomeTxns): """ A newly joined node is catching up and sends catchup requests to other nodes but one of the nodes replies with incorrect transactions. The newly @@ -27,49 +30,68 @@ def testNodeRejectingInvalidTxns(txnPoolNodeSet, nodeCreatedAfterSomeTxns): requests the missing transactions. """ looper, newNode, client, wallet, _, _ = nodeCreatedAfterSomeTxns + bad_node = patched_node - # So nodes wont tell the clients about the newly joined node so they - # dont send any request to the newly joined node - for node in txnPoolNodeSet: - node.sendPoolInfoToClients = types.MethodType(lambda x, y: None, node) + do_not_tell_clients_about_newly_joined_node(txnPoolNodeSet) + + logger.debug('Catchup request processor of {} patched'.format(bad_node)) - def sendIncorrectTxns(self, req, frm): - ledgerId = getattr(req, f.LEDGER_ID.nm) - if ledgerId == DOMAIN_LEDGER_ID: - logger.info("{} being malicious and sending incorrect transactions" - " for catchup request {} from {}". - format(self, req, frm)) - start, end = getattr(req, f.SEQ_NO_START.nm), \ - getattr(req, f.SEQ_NO_END.nm) - ledger = self.getLedgerForMsg(req) - txns = ledger.getAllTxn(start, end) - for seqNo in txns.keys(): - # Since the type of random request is `buy` - if txns[seqNo].get(TXN_TYPE) == "buy": - txns[seqNo][TXN_TYPE] = "randomtype" - consProof = [Ledger.hashToStr(p) for p in - ledger.tree.consistency_proof(end, ledger.size)] - self.sendTo(msg=CatchupRep(getattr(req, f.LEDGER_ID.nm), txns, - consProof), to=frm) - else: - self.processCatchupReq(req, frm) - - # One of the node sends incorrect txns in catchup reply. - npr = getNonPrimaryReplicas(txnPoolNodeSet, 0) - badReplica = npr[0] - badNode = badReplica.node - badNode.nodeMsgRouter.routes[CatchupReq] = types.MethodType( - sendIncorrectTxns, badNode.ledgerManager) - logger.debug( - 'Catchup request processor of {} patched'.format(badNode)) - - sendRandomRequests(wallet, client, 10) looper.run(checkNodesConnected(txnPoolNodeSet)) - # Since one of the nodes will send a bad catchup reply, this node will - # request transactions from another node, hence large timeout. - # Dont reduce it. + # catchup #1 -> CatchupTransactionsTimeout -> catchup #2 + catchup_timeout = waits.expectedPoolCatchupTime(len(txnPoolNodeSet) + 1) + timeout = 2 * catchup_timeout + conf.CatchupTransactionsTimeout + + # have to skip seqno_db check because the txns are not executed + # on the new node waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1], - customTimeout=45) + customTimeout=timeout) + + assert newNode.isNodeBlacklisted(bad_node.name) + + +@pytest.fixture +def patched_node(txnPoolNodeSet): + node = get_any_non_primary_node(txnPoolNodeSet) + node_will_send_incorrect_catchup(node) + return node + + +def get_any_non_primary_node(nodes): + npr = getNonPrimaryReplicas(nodes, 0) + return npr[0].node + + +def do_not_tell_clients_about_newly_joined_node(nodes): + for node in nodes: + node.sendPoolInfoToClients = types.MethodType(lambda x, y: None, node) + + +def node_will_send_incorrect_catchup(node): + node.nodeMsgRouter.routes[CatchupReq] = types.MethodType( + _sendIncorrectTxns, + node.ledgerManager + ) + - assert newNode.isNodeBlacklisted(badNode.name) +def _sendIncorrectTxns(self, req, frm): + ledgerId = getattr(req, f.LEDGER_ID.nm) + if ledgerId == DOMAIN_LEDGER_ID: + logger.info("{} being malicious and sending incorrect transactions" + " for catchup request {} from {}". + format(self, req, frm)) + start, end = getattr(req, f.SEQ_NO_START.nm), \ + getattr(req, f.SEQ_NO_END.nm) + ledger = self.getLedgerForMsg(req) + txns = {} + for seqNo, txn in ledger.getAllTxn(start, end): + # Since the type of random request is `buy` + if txn.get(TXN_TYPE) == "buy": + txn[TXN_TYPE] = "randomtype" + txns[seqNo] = txn + consProof = [Ledger.hashToStr(p) for p in + ledger.tree.consistency_proof(end, ledger.size)] + self.sendTo(msg=CatchupRep(getattr(req, f.LEDGER_ID.nm), txns, + consProof), to=frm) + else: + self.processCatchupReq(req, frm) diff --git a/plenum/test/node_catchup/test_node_request_consistency_proof.py b/plenum/test/node_catchup/test_node_request_consistency_proof.py index 40206261c0..b721e7da01 100644 --- a/plenum/test/node_catchup/test_node_request_consistency_proof.py +++ b/plenum/test/node_catchup/test_node_request_consistency_proof.py @@ -1,26 +1,28 @@ import types -from random import randint - -import pytest +from plenum.common.constants import DOMAIN_LEDGER_ID, CONSISTENCY_PROOF from plenum.common.ledger import Ledger -from stp_core.loop.eventually import eventually +from plenum.test.node_request.message_request.helper import \ + count_msg_reqs_of_type from stp_core.common.log import getlogger -from plenum.common.types import LedgerStatus +from plenum.common.messages.node_messages import LedgerStatus from plenum.test.helper import sendRandomRequests from plenum.test.node_catchup.helper import waitNodeDataEquality -from plenum.test.test_ledger_manager import TestLedgerManager from plenum.test.test_node import checkNodesConnected -from plenum.test import waits -# Do not remove the next import +# Do not remove the next imports from plenum.test.node_catchup.conftest import whitelist +from plenum.test.batching_3pc.conftest import tconf logger = getlogger() +# So that `three_phase_key_for_txn_seq_no` always works, it makes the test +# easy as the requesting node selects a random size for the ledger +Max3PCBatchSize = 1 -def testNodeRequestingConsProof(txnPoolNodeSet, nodeCreatedAfterSomeTxns): +def testNodeRequestingConsProof(tconf, txnPoolNodeSet, + nodeCreatedAfterSomeTxns): """ All of the 4 old nodes delay the processing of LEDGER_STATUS from the newly joined node while they are processing requests which results in them sending @@ -38,25 +40,28 @@ def testNodeRequestingConsProof(txnPoolNodeSet, nodeCreatedAfterSomeTxns): txnPoolNodeSet.append(newNode) # The new node sends different ledger statuses to every node so it # does not get enough similar consistency proofs - sentSizes = set() + next_size = 0 + origMethod = newNode.build_ledger_status + def build_broken_ledger_status(self, ledger_id): + nonlocal next_size + if ledger_id != DOMAIN_LEDGER_ID: + return origMethod(ledger_id) - def sendDLStatus(self, name): size = self.primaryStorage.size - newSize = randint(1, size) - while newSize in sentSizes: - newSize = randint(1, size) - print("new size {}".format(newSize)) - newRootHash = Ledger.hashToStr( - self.domainLedger.tree.merkle_tree_hash(0, newSize)) - ledgerStatus = LedgerStatus(1, newSize, - newRootHash) + next_size = next_size + 1 if next_size < size else 1 + print("new size {}".format(next_size)) + newRootHash = Ledger.hashToStr( + self.domainLedger.tree.merkle_tree_hash(0, next_size)) + three_pc_key = self.three_phase_key_for_txn_seq_no(ledger_id, + next_size) + v, p = three_pc_key if three_pc_key else None, None + ledgerStatus = LedgerStatus(1, next_size, v, p, newRootHash) print("dl status {}".format(ledgerStatus)) - rid = self.nodestack.getRemote(name).uid - self.send(ledgerStatus, rid) - sentSizes.add(newSize) + return ledgerStatus + - newNode.sendDomainLedgerStatus = types.MethodType(sendDLStatus, newNode) + newNode.build_ledger_status = types.MethodType(build_broken_ledger_status, newNode) logger.debug( 'Domain Ledger status sender of {} patched'.format(newNode)) @@ -70,7 +75,6 @@ def sendDLStatus(self, name): waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1], customTimeout=75) - # Other nodes should have received a `ConsProofRequest` and processed it. + # Other nodes should have received a request for `CONSISTENCY_PROOF` and processed it. for node in txnPoolNodeSet[:-1]: - assert node.ledgerManager.spylog.count( - TestLedgerManager.processConsistencyProofReq.__name__) > 0 + assert count_msg_reqs_of_type(node, CONSISTENCY_PROOF) > 0, node diff --git a/plenum/test/node_catchup/test_node_request_missing_transactions.py b/plenum/test/node_catchup/test_node_request_missing_transactions.py index 8994115e3b..14e4220405 100644 --- a/plenum/test/node_catchup/test_node_request_missing_transactions.py +++ b/plenum/test/node_catchup/test_node_request_missing_transactions.py @@ -1,11 +1,10 @@ -import time import types import pytest -from stp_core.loop.eventually import eventually +from plenum.common.constants import DOMAIN_LEDGER_ID from stp_core.common.log import getlogger -from plenum.common.types import CatchupReq +from plenum.common.messages.node_messages import CatchupReq from plenum.test.helper import sendRandomRequests from plenum.test.node_catchup.helper import waitNodeDataEquality from plenum.test.test_node import checkNodesConnected, getNonPrimaryReplicas @@ -17,12 +16,14 @@ logger = getlogger() +TestRunningTimeLimitSec = 150 + @pytest.fixture(scope="module") -def catchupTimeoutReduced(conf, tdir, request): +def reduced_catchup_timeout_conf(conf, tdir, request): defaultCatchupTransactionsTimeout = conf.CatchupTransactionsTimeout conf.baseDir = tdir - conf.CatchupTransactionsTimeout = 1 + conf.CatchupTransactionsTimeout = 10 def reset(): conf.CatchupTransactionsTimeout = defaultCatchupTransactionsTimeout @@ -31,7 +32,8 @@ def reset(): return conf -def testNodeRequestingTxns(txnPoolNodeSet, nodeCreatedAfterSomeTxns): +def testNodeRequestingTxns(reduced_catchup_timeout_conf, txnPoolNodeSet, + nodeCreatedAfterSomeTxns): """ A newly joined node is catching up and sends catchup requests to other nodes but one of the nodes does not reply and the newly joined node cannot @@ -39,6 +41,10 @@ def testNodeRequestingTxns(txnPoolNodeSet, nodeCreatedAfterSomeTxns): transactions. """ looper, newNode, client, wallet, _, _ = nodeCreatedAfterSomeTxns + new_node_ledger = newNode.ledgerManager.ledgerRegistry[DOMAIN_LEDGER_ID] + old_size = len(new_node_ledger.ledger) + old_size_others = txnPoolNodeSet[0].ledgerManager.ledgerRegistry[DOMAIN_LEDGER_ID].ledger.size + # So nodes wont tell the clients about the newly joined node so they # dont send any request to the newly joined node for node in txnPoolNodeSet: @@ -56,11 +62,18 @@ def ignoreCatchupReq(self, req, frm): badNode.nodeMsgRouter.routes[CatchupReq] = types.MethodType( ignoreCatchupReq, badNode.ledgerManager) - sendRandomRequests(wallet, client, 10) + more_requests = 10 + sendRandomRequests(wallet, client, more_requests) looper.run(checkNodesConnected(txnPoolNodeSet)) # Since one of the nodes does not reply, this new node will experience a # timeout and retry catchup requests, hence a long test timeout. - # Dont reduce it. + timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) + \ + reduced_catchup_timeout_conf.CatchupTransactionsTimeout waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1], - customTimeout=100) + customTimeout=timeout) + new_size = len(new_node_ledger.ledger) + + # The new node ledger might catchup some transactions from the batch of `more_request` transactions + assert old_size_others - old_size <= new_node_ledger.num_txns_caught_up <= new_size - old_size + sendRandomRequests(wallet, client, 2) diff --git a/plenum/test/node_catchup/test_revert_during_catchup.py b/plenum/test/node_catchup/test_revert_during_catchup.py new file mode 100644 index 0000000000..63216b2695 --- /dev/null +++ b/plenum/test/node_catchup/test_revert_during_catchup.py @@ -0,0 +1,122 @@ +from itertools import combinations + +from plenum.common.constants import DOMAIN_LEDGER_ID +from plenum.test import waits +from plenum.test.delayers import cDelay, cqDelay +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, \ + check_last_ordered_3pc +from plenum.test.node_catchup.helper import waitNodeDataInequality, \ + make_a_node_catchup_twice, ensure_all_nodes_have_same_data +from plenum.test.spy_helpers import getAllReturnVals +from plenum.test.test_node import getNonPrimaryReplicas, \ + checkProtocolInstanceSetup +from plenum.test.view_change.helper import ensure_view_change +from stp_core.loop.eventually import eventually + +# Do not remove the next imports +from plenum.test.batching_3pc.conftest import tconf + +Max3PCBatchSize = 2 + + +def test_slow_node_reverts_unordered_state_during_catchup(looper, + txnPoolNodeSet, + client1, + wallet1, + client1Connected): + """ + Delay COMMITs to a node such that when it needs to catchup, it needs to + revert some unordered state. Also till this time the node should have + receive all COMMITs such that it will apply some of the COMMITs ( + for which it has not received txns from catchup). + For this delay COMMITs by long, do catchup for a little older than the state received in + LedgerStatus, once catchup completes, reset delays and try to process + delayed COMMITs, some COMMITs will be rejected but some will be processed + since catchup was done for older ledger. + """ + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, + 3 * Max3PCBatchSize) + nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0) + slow_node = nprs[-1].node + other_nodes = [n for n in txnPoolNodeSet if n != slow_node] + slow_master_replica = slow_node.master_replica + + commit_delay = 150 + catchup_req_delay = 15 + + # Delay COMMITs to one node + slow_node.nodeIbStasher.delay(cDelay(commit_delay, 0)) + + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, + 6 * Max3PCBatchSize) + waitNodeDataInequality(looper, slow_node, *other_nodes) + + # Make the slow node receive txns for a smaller ledger so it still finds + # the need to catchup + delay_batches = 2 + make_a_node_catchup_twice(slow_node, other_nodes, DOMAIN_LEDGER_ID, + delay_batches * Max3PCBatchSize) + + def is_catchup_needed_count(): + return len(getAllReturnVals(slow_node, slow_node.is_catchup_needed, + compare_val_to=True)) + old_lcu_count = slow_node.spylog.count(slow_node.allLedgersCaughtUp) + old_cn_count = is_catchup_needed_count() + + # Other nodes are slow to respond to CatchupReq, so that `slow_node` + # gets a chance to order COMMITs + for n in other_nodes: + n.nodeIbStasher.delay(cqDelay(catchup_req_delay)) + + ensure_view_change(looper, txnPoolNodeSet) + + # Check last ordered of `other_nodes` is same + for n1, n2 in combinations(other_nodes, 2): + lst_3pc = check_last_ordered_3pc(n1, n2) + + def chk1(): + # `slow_node` has prepared all 3PC messages which + # `other_nodes` have ordered + assert slow_master_replica.last_prepared_before_view_change == lst_3pc + + looper.run(eventually(chk1, retryWait=1)) + + old_pc_count = slow_master_replica.spylog.count( + slow_master_replica.can_process_since_view_change_in_progress) + + # Repair the network so COMMITs are delayed and processed + slow_node.resetDelays() + slow_node.force_process_delayeds() + + def chk2(): + # COMMITs are processed for prepared messages + assert slow_master_replica.spylog.count( + slow_master_replica.can_process_since_view_change_in_progress) > old_pc_count + + looper.run(eventually(chk2, retryWait=1, timeout=5)) + + def chk3(): + # Some COMMITs were ordered but stashed and they were processed + rv = getAllReturnVals(slow_node, slow_node.processStashedOrderedReqs) + assert rv[0] == delay_batches + + looper.run(eventually(chk3, retryWait=1, timeout=catchup_req_delay+5)) + + def chk4(): + # Catchup was done once + assert slow_node.spylog.count(slow_node.allLedgersCaughtUp) > old_lcu_count + + looper.run(eventually(chk4, retryWait=1, + timeout=waits.expectedPoolCatchupTime(len(txnPoolNodeSet)))) + + def chk5(): + # Once catchup was done, need of other catchup was not found + assert is_catchup_needed_count() == old_cn_count + + looper.run(eventually(chk5, retryWait=1, timeout=5)) + + checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) + ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, + 2 * Max3PCBatchSize) + ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) diff --git a/plenum/test/node_request/message_request/__init__.py b/plenum/test/node_request/message_request/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/plenum/test/node_request/message_request/conftest.py b/plenum/test/node_request/message_request/conftest.py new file mode 100644 index 0000000000..50641b8098 --- /dev/null +++ b/plenum/test/node_request/message_request/conftest.py @@ -0,0 +1,30 @@ +import pytest + +from plenum.common.util import check_if_all_equal_in_list +from plenum.test.helper import send_reqs_to_nodes_and_verify_all_replies +from plenum.test.node_catchup.helper import ensure_all_nodes_have_same_data +from plenum.test.pool_transactions.conftest import looper, clientAndWallet1, \ + client1, wallet1, client1Connected + + +@pytest.fixture(scope="module") +def teardown(request, looper, txnPoolNodeSet, client1, wallet1): + + def tear(): + # Repair any broken network + for node in txnPoolNodeSet: + node.reset_delays_and_process_delayeds() + # Give a little time to process any delayed messages + looper.runFor(3) + + # Check each node has same data + ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) + + # Check each node has ordered all requests (no catchup) + assert check_if_all_equal_in_list([n.master_replica.ordered + for n in txnPoolNodeSet]) + + # Check the network is functional since all nodes reply + send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 5) + + request.addfinalizer(tear) diff --git a/plenum/test/node_request/message_request/helper.py b/plenum/test/node_request/message_request/helper.py new file mode 100644 index 0000000000..929ff10f94 --- /dev/null +++ b/plenum/test/node_request/message_request/helper.py @@ -0,0 +1,20 @@ +from plenum.test.test_node import get_master_primary_node, getNonPrimaryReplicas + + +def count_msg_reqs_of_type(node, typ): + return sum([1 for entry in node.spylog.getAll(node.process_message_req) + if entry.params['msg'].msg_type == typ]) + + +def count_msg_reps_of_type(node, typ): + return sum([1 for entry in node.spylog.getAll(node.process_message_rep) + if entry.params['msg'].msg_type == typ]) + + +def split_nodes(nodes): + primary_node = get_master_primary_node(nodes) + slow_node = getNonPrimaryReplicas(nodes, 0)[-1].node + other_nodes = [n for n in nodes if n != slow_node] + other_non_primary_nodes = [n for n in nodes if n not in + (slow_node, primary_node)] + return slow_node, other_nodes, primary_node, other_non_primary_nodes diff --git a/plenum/test/node_request/message_request/test_node_requests_missing_preprepare.py b/plenum/test/node_request/message_request/test_node_requests_missing_preprepare.py new file mode 100644 index 0000000000..f56a7304e8 --- /dev/null +++ b/plenum/test/node_request/message_request/test_node_requests_missing_preprepare.py @@ -0,0 +1,158 @@ +import types + +import pytest + +from plenum.common.constants import PREPREPARE +from plenum.common.messages.node_messages import MessageReq, MessageRep +from plenum.common.types import f +from plenum.common.util import check_if_all_equal_in_list, updateNamedTuple +from plenum.test.delayers import ppDelay +from plenum.test.helper import send_reqs_batches_and_get_suff_replies, \ + countDiscarded +from plenum.test.node_catchup.helper import waitNodeDataEquality +from plenum.test.node_request.message_request.helper import split_nodes +from plenum.test.spy_helpers import get_count +from plenum.test.test_node import getNonPrimaryReplicas, get_master_primary_node + + +whitelist = ['does not have expected state'] + + +def test_node_requests_missing_preprepare(looper, txnPoolNodeSet, client1, + wallet1, client1Connected, teardown): + """ + A node has bad network with primary and thus loses PRE-PREPARE, + it requests PRE-PREPARE from non-primaries once it has sufficient PREPAREs + """ + slow_node, other_nodes, _, _ = split_nodes(txnPoolNodeSet) + + # Delay PRE-PREPAREs by large amount simulating loss + slow_node.nodeIbStasher.delay(ppDelay(300, 0)) + old_count_pp = get_count(slow_node.master_replica, + slow_node.master_replica.processPrePrepare) + old_count_mrq = {n.name: get_count(n, n.process_message_req) + for n in other_nodes} + old_count_mrp = get_count(slow_node, slow_node.process_message_rep) + + send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, 15, 5) + + waitNodeDataEquality(looper, slow_node, *other_nodes) + + assert not slow_node.master_replica.requested_pre_prepares + + # `slow_node` processed PRE-PREPARE + assert get_count(slow_node.master_replica, + slow_node.master_replica.processPrePrepare) > old_count_pp + + # `slow_node` did receive `MessageRep` + assert get_count(slow_node, slow_node.process_message_rep) > old_count_mrp + + # More than `f` nodes received `MessageReq` + recv_reqs = set() + for n in other_nodes: + if get_count(n, n.process_message_req) > old_count_mrq[n.name]: + recv_reqs.add(n.name) + + assert len(recv_reqs) > slow_node.f + + # All nodes including the `slow_node` ordered the same requests + assert check_if_all_equal_in_list([n.master_replica.ordered + for n in txnPoolNodeSet]) + + +@pytest.fixture(scope='module', params=['do_not_send', 'send_bad']) +def malicious_setup(request, txnPoolNodeSet): + primary_node = get_master_primary_node(txnPoolNodeSet) + slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node + other_nodes = [n for n in txnPoolNodeSet if n != slow_node] + bad_node = [n for n in other_nodes if n != primary_node][0] + good_non_primary_node = [n for n in other_nodes if n != slow_node + and n != bad_node and n != primary_node][0] + + if request.param == 'do_not_send': + orig_method = bad_node.nodeMsgRouter.routes[MessageReq] + + def do_not_send(self, msg, frm): + if msg.msg_type == PREPREPARE: + return + else: + return orig_method(msg, frm) + + bad_node.nodeMsgRouter.routes[MessageReq] = types.MethodType(do_not_send, + bad_node) + return primary_node, bad_node, good_non_primary_node, slow_node, \ + other_nodes, do_not_send, orig_method + + if request.param == 'send_bad': + orig_method = bad_node.nodeMsgRouter.routes[MessageReq] + + def send_bad(self, msg, frm): + if msg.msg_type == PREPREPARE: + resp = self.replicas[msg.params['instId']].getPrePrepare( + msg.params['viewNo'], msg.params['ppSeqNo']) + resp = updateNamedTuple(resp, digest='11908ffq') + self.sendToNodes(MessageRep(**{ + f.MSG_TYPE.nm: msg.msg_type, + f.PARAMS.nm: msg.params, + f.MSG.nm: resp + }), names=[frm, ]) + else: + return orig_method(msg, frm) + + bad_node.nodeMsgRouter.routes[MessageReq] = types.MethodType(send_bad, + bad_node) + return primary_node, bad_node, good_non_primary_node, slow_node, \ + other_nodes, send_bad, orig_method + + +def test_node_requests_missing_preprepare_malicious(looper, txnPoolNodeSet, + client1, wallet1, + client1Connected, + malicious_setup, teardown): + """ + A node has bad network with primary and thus loses PRE-PREPARE, + it requests PRE-PREPARE from non-primaries once it has sufficient PREPAREs + but one of the non-primary does not send the PRE-PREPARE + """ + # primary_node = get_master_primary_node(txnPoolNodeSet) + # slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node + # other_nodes = [n for n in txnPoolNodeSet if n != slow_node] + # bad_node = [n for n in other_nodes if n != primary_node][0] + # good_non_primary_node = [n for n in other_nodes if n != slow_node + # and n != bad_node and n != primary_node][0] + primary_node, bad_node, good_non_primary_node, slow_node, other_nodes, \ + bad_method, orig_method = malicious_setup + + slow_node.nodeIbStasher.delay(ppDelay(300, 0)) + + def get_reply_count_frm(node): + return sum([1 for entry in slow_node.spylog.getAll( + slow_node.process_message_rep) + if entry.params['msg'].msg_type == PREPREPARE and + entry.params['frm'] == node.name]) + + old_reply_count_from_bad_node = get_reply_count_frm(bad_node) + old_reply_count_from_good_node = get_reply_count_frm(good_non_primary_node) + old_discarded = countDiscarded(slow_node.master_replica, 'does not have ' + 'expected state') + + send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, 10, 2) + + waitNodeDataEquality(looper, slow_node, *other_nodes) + + assert check_if_all_equal_in_list([n.master_replica.ordered + for n in txnPoolNodeSet]) + + assert not slow_node.master_replica.requested_pre_prepares + + if bad_method.__name__ == 'do_not_send': + assert get_reply_count_frm(bad_node) == old_reply_count_from_bad_node + else: + assert countDiscarded(slow_node.master_replica, + 'does not have expected state') > old_discarded + + assert get_reply_count_frm(good_non_primary_node) > \ + old_reply_count_from_good_node + + slow_node.reset_delays_and_process_delayeds() + bad_node.nodeMsgRouter.routes[MessageReq] = orig_method diff --git a/plenum/test/node_request/message_request/test_preprepare_request.py b/plenum/test/node_request/message_request/test_preprepare_request.py new file mode 100644 index 0000000000..3a04ba9624 --- /dev/null +++ b/plenum/test/node_request/message_request/test_preprepare_request.py @@ -0,0 +1,107 @@ +from plenum.common.constants import PROPAGATE +from plenum.common.messages.node_messages import Prepare +from plenum.test.delayers import ppDelay, pDelay, ppgDelay, msg_rep_delay +from plenum.test.helper import send_reqs_batches_and_get_suff_replies +from plenum.test.node_catchup.helper import checkNodeDataForInequality, \ + waitNodeDataEquality +from plenum.test.node_request.message_request.helper import split_nodes +from plenum.test.spy_helpers import getAllReturnVals, get_count +from stp_core.loop.eventually import eventually + + +def count_requested_preprepare_resp(node): + # Returns the number of times PRE-PREPARE was requested + sr = node.master_replica + return len(getAllReturnVals(sr, sr._request_pre_prepare_if_possible, + compare_val_to=True)) + + +def count_requested_preprepare_req(node): + # Returns the number of times an attempt was made to request PRE-PREPARE + sr = node.master_replica + return get_count(sr, sr._request_pre_prepare_if_possible) + + +def test_node_request_preprepare(looper, txnPoolNodeSet, client1, + wallet1, client1Connected, teardown): + """ + Node requests PRE-PREPARE only once after getting PREPAREs. + """ + slow_node, other_nodes, primary_node, \ + other_primary_nodes = split_nodes(txnPoolNodeSet) + # Drop PrePrepares and Prepares + slow_node.nodeIbStasher.delay(ppDelay(300, 0)) + slow_node.nodeIbStasher.delay(pDelay(300, 0)) + + send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, 10, 5) + slow_node.nodeIbStasher.drop_delayeds() + slow_node.nodeIbStasher.resetDelays() + + old_count_req = count_requested_preprepare_req(slow_node) + old_count_resp = count_requested_preprepare_resp(slow_node) + + def chk(increase=True): + # Method is called + assert count_requested_preprepare_req(slow_node) > old_count_req + # Requesting Preprepare + assert count_requested_preprepare_resp(slow_node) - old_count_resp == (1 if increase else 0) + + for pp in primary_node.master_replica.sentPrePrepares.values(): + for rep in [n.master_replica for n in other_primary_nodes]: + prepare = Prepare(rep.instId, + pp.viewNo, + pp.ppSeqNo, + pp.ppTime, + pp.digest, + pp.stateRootHash, + pp.txnRootHash + ) + rep.send(prepare) + + looper.run(eventually(chk, True, retryWait=1)) + + old_count_resp = count_requested_preprepare_resp(slow_node) + + prepare = Prepare(rep.instId, + pp.viewNo, + pp.ppSeqNo, + pp.ppTime, + pp.digest, + pp.stateRootHash, + pp.txnRootHash + ) + rep.send(prepare) + + looper.run(eventually(chk, False, retryWait=1)) + + old_count_req = count_requested_preprepare_req(slow_node) + + old_count_resp = count_requested_preprepare_resp(slow_node) + + +def test_no_preprepare_requested(looper, txnPoolNodeSet, client1, + wallet1, client1Connected, teardown): + """ + Node missing Propagates hence request not finalised, hence stashes + PRE-PREPARE but does not request PRE-PREPARE on receiving PREPARE + """ + slow_node, other_nodes, _, _ = split_nodes(txnPoolNodeSet) + slow_node.nodeIbStasher.delay(ppgDelay(20)) + slow_node.nodeIbStasher.delay(msg_rep_delay(20, [PROPAGATE, ])) + + old_count_resp = count_requested_preprepare_resp(slow_node) + send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, 4, 2) + + # The slow node is behind + checkNodeDataForInequality(slow_node, *other_nodes) + + # PRE-PREPARE were not requested + assert count_requested_preprepare_resp(slow_node) == old_count_resp + + slow_node.nodeIbStasher.reset_delays_and_process_delayeds() + + # The slow node has processed all requests + waitNodeDataEquality(looper, slow_node, *other_nodes) + + # PRE-PREPARE were not requested + assert count_requested_preprepare_resp(slow_node) == old_count_resp diff --git a/plenum/test/node_request/message_request/test_requested_preprepare_handling.py b/plenum/test/node_request/message_request/test_requested_preprepare_handling.py new file mode 100644 index 0000000000..eb035da446 --- /dev/null +++ b/plenum/test/node_request/message_request/test_requested_preprepare_handling.py @@ -0,0 +1,66 @@ +import types + +from plenum.common.constants import PREPREPARE +from plenum.common.messages.node_messages import MessageRep +from plenum.common.types import f +from plenum.test.delayers import ppDelay +from plenum.test.helper import send_reqs_batches_and_get_suff_replies +from plenum.test.node_catchup.helper import waitNodeDataEquality +from plenum.test.node_request.message_request.helper import split_nodes +from plenum.test.spy_helpers import get_count +from stp_core.loop.eventually import eventually + + +def test_handle_delayed_preprepares(looper, txnPoolNodeSet, client1, + wallet1, client1Connected, teardown): + """ + Make a node send PREPREPARE again after the slow node has ordered + """ + slow_node, other_nodes, primary_node, other_non_primary_nodes = \ + split_nodes(txnPoolNodeSet) + # This node will send PRE-PREPARE again + confused_node = other_non_primary_nodes[0] + orig_method = confused_node._serve_preprepare_request + + last_pp = None + + def patched_method(self, msg): + nonlocal last_pp + last_pp = orig_method(msg) + return last_pp + + confused_node.req_handlers[PREPREPARE] = types.MethodType(patched_method, + confused_node) + + # Delay PRE-PREPAREs by large amount simulating loss + slow_node.nodeIbStasher.delay(ppDelay(300, 0)) + + send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, 10, 5) + waitNodeDataEquality(looper, slow_node, *other_nodes) + + slow_master_replica = slow_node.master_replica + count_pr_req = get_count(slow_master_replica, + slow_master_replica.process_requested_pre_prepare) + + count_pr_tpc = get_count(slow_master_replica, + slow_master_replica.processThreePhaseMsg) + + confused_node.sendToNodes(MessageRep(**{ + f.MSG_TYPE.nm: PREPREPARE, + f.PARAMS.nm: { + f.INST_ID.nm: last_pp.instId, + f.VIEW_NO.nm: last_pp.viewNo, + f.PP_SEQ_NO.nm: last_pp.ppSeqNo + }, + f.MSG.nm: last_pp + }), names=[slow_node.name, ]) + + def chk(): + # `process_requested_pre_prepare` is called but + # `processThreePhaseMsg` is not called + assert get_count(slow_master_replica, + slow_master_replica.process_requested_pre_prepare) > count_pr_req + assert get_count(slow_master_replica, + slow_master_replica.processThreePhaseMsg) == count_pr_tpc + + looper.run(eventually(chk, retryWait=1)) diff --git a/plenum/test/node_request/message_request/test_valid_message_request.py b/plenum/test/node_request/message_request/test_valid_message_request.py new file mode 100644 index 0000000000..9992066934 --- /dev/null +++ b/plenum/test/node_request/message_request/test_valid_message_request.py @@ -0,0 +1,155 @@ +import pytest +import time + +from plenum.common.constants import LEDGER_STATUS, CONSISTENCY_PROOF, \ + PREPREPARE, PROPAGATE +from plenum.common.messages.node_messages import MessageReq, ChooseField, \ + AnyMapField, MessageRep, AnyField, LedgerStatus, ConsistencyProof, \ + PrePrepare, Propagate +from plenum.common.types import f +from plenum.common.util import get_utc_epoch +from plenum.test.helper import countDiscarded +from stp_core.loop.eventually import eventually + + +invalid_type_discard_log = "unknown value 'invalid_type'" +invalid_req_discard_log = "cannot serve request" +invalid_rep_discard_log = "cannot process requested message response" + + +whitelist = [invalid_type_discard_log, ] + + +patched_schema = ( + (f.MSG_TYPE.nm, ChooseField(values={'invalid_type', LEDGER_STATUS, + CONSISTENCY_PROOF, PREPREPARE, + PROPAGATE})), + (f.PARAMS.nm, AnyMapField()) +) + + +def patched_MessageReq(): + class PMessageReq(MessageReq): + schema = patched_schema + return PMessageReq + + +def patched_MessageRep(): + class PMessageRep(MessageRep): + schema = ( + *patched_schema, + (f.MSG.nm, AnyField()) + ) + return PMessageRep + + +discard_counts = {} + +pre_prepare_msg = PrePrepare( + 0, + 1, + 3, + get_utc_epoch(), + [['4AdS22kC7xzb4bcqg9JATuCfAMNcQYcZa1u5eWzs6cSJ', 1499707723017300]], + 1, + 'f99937241d4c891c08e92a3cc25966607315ca66b51827b170d492962d58a9be', + 1, + 'CZecK1m7VYjSNCC7pGHj938DSW2tfbqoJp1bMJEtFqvG', + '7WrAMboPTcMaQCU1raoj28vnhu2bPMMd2Lr9tEcsXeCJ', +) + +propagate_msg = Propagate(**{'request': + {'identifier': '5rArie7XKukPCaEwq5XGQJnM9Fc5aZE3M9HAPVfMU2xC', + 'signature': 'ZbZG68WiaK67eU3CsgpVi85jpgCztW9Yqe7D5ezDUfWbKdiPPVbWq4Tb5m4Ur3jcR5wJ8zmBUZXZudjvMN63Aa9', + 'operation': {'amount': 62, 'type': 'buy'}, + 'reqId': 1499782864169193}, + 'senderClient': '+DG1:vO9#de6?R?>:3RwdAXSdefgLLfxSoN4WMEe'}) + +bad_msgs = [ + (LEDGER_STATUS, {'p1': 'v1', 'p2': 'v2'}, LedgerStatus( + 1, 20, 1, 2, '77wuDUSr4FtAJzJbSqSW7bBw8bKAbra8ABSAjR72Nipq')), + (LEDGER_STATUS, {f.LEDGER_ID.nm: 100}, LedgerStatus( + 1, 20, 1, 2, '77wuDUSr4FtAJzJbSqSW7bBw8bKAbra8ABSAjR72Nipq')), + (CONSISTENCY_PROOF, {f.LEDGER_ID.nm: 1, f.SEQ_NO_START.nm: 10}, + ConsistencyProof(1, 2, 20, 1, 3, + 'BvmagFYpXAYNTuNW8Qssk9tMhEEPucLqL55YuwngUvMw', + 'Dce684wcwhV2wNZCuYTzdW9Kr13ZXFgiuAuAGibFZc4v', + ['58qasGZ9y3TB1pMz7ARKjJeccEbvbx6FT6g3NFnjYsTS'])), + (PREPREPARE, {f.INST_ID.nm: 1, f.VIEW_NO.nm: 0, f.SEQ_NO_START.nm: 10}, + pre_prepare_msg), + (PREPREPARE, {f.INST_ID.nm: -1, f.VIEW_NO.nm: 1, f.PP_SEQ_NO.nm: 10}, + pre_prepare_msg), + (PROPAGATE, {f.IDENTIFIER.nm: 'aa', f.REQ_ID.nm: 'fr'}, propagate_msg), + (PROPAGATE, {f.IDENTIFIER.nm: '4AdS22kC7xzb4bcqg9JATuCfAMNcQYcZa1u5eWzs6cSJ'}, propagate_msg), + (PROPAGATE, {f.REQ_ID.nm: 1499707723017300}, propagate_msg), + ] + + +def fill_counters(nodes, log_message): + global discard_counts + discard_counts[log_message] = {n.name: countDiscarded(n, log_message) + for n in nodes} + + +def chk(nodes, log_message): + global discard_counts + for n in nodes: + assert countDiscarded(n, log_message) > discard_counts[log_message][n.name] + + +@pytest.fixture(scope='module') +def nodes(txnPoolNodeSet): + bad_node = txnPoolNodeSet[-1] + other_nodes = [n for n in txnPoolNodeSet if n != bad_node] + return bad_node, other_nodes + + +def test_node_reject_invalid_req_resp_type(looper, nodes): + """ + Node does not accept invalid `MessageReq`, with an unacceptable type. Also + it does not accept invalid `MessageRep` + """ + global discard_counts + bad_node, other_nodes = nodes + fill_counters(other_nodes, invalid_type_discard_log) + bad_msg = patched_MessageReq()('invalid_type', {'p1': 'v1', 'p2': 'v2'}) + bad_node.send(bad_msg) + + looper.run(eventually(chk, other_nodes, invalid_type_discard_log, retryWait=1)) + + fill_counters(other_nodes, invalid_type_discard_log) + + bad_msg = patched_MessageRep()('invalid_type', {'p1': 'v1', 'p2': 'v2'}, + {'some_message': 'message'}) + bad_node.send(bad_msg) + looper.run(eventually(chk, other_nodes, invalid_type_discard_log, retryWait=1)) + + +def test_node_reject_invalid_req_params(looper, nodes): + """ + Node does not accept invalid `MessageReq`, with missing params. + Also it does not accept invalid `MessageRep` + """ + global discard_counts, bad_msgs + bad_node, other_nodes = nodes + + for bad_msg in bad_msgs: + fill_counters(other_nodes, invalid_req_discard_log) + bad_node.send(patched_MessageReq()(*bad_msg[:2])) + looper.run(eventually(chk, other_nodes, invalid_req_discard_log, + retryWait=1)) + + +def test_node_reject_invalid_resp_params(looper, nodes): + """ + Node does not accept invalid `MessageReq`, with missing params. + Also it does not accept invalid `MessageRep` + """ + global discard_counts, bad_msgs + bad_node, other_nodes = nodes + + for bad_msg in bad_msgs: + fill_counters(other_nodes, invalid_rep_discard_log) + bad_node.send(patched_MessageRep()(*bad_msg)) + looper.run(eventually(chk, other_nodes, invalid_rep_discard_log, + retryWait=1)) \ No newline at end of file diff --git a/plenum/test/node_request/node_request_helper.py b/plenum/test/node_request/node_request_helper.py index bffdd3b369..6a88cd5fee 100644 --- a/plenum/test/node_request/node_request_helper.py +++ b/plenum/test/node_request/node_request_helper.py @@ -1,12 +1,12 @@ import time from functools import partial -from stp_core.loop.eventually import eventuallyAll -from plenum.common.types import PrePrepare, OPERATION, f -from plenum.common.constants import DOMAIN_LEDGER_ID +from plenum.common.messages.node_messages import PrePrepare from plenum.common.types import OPERATION, f -from plenum.common.util import getMaxFailures +from plenum.common.constants import DOMAIN_LEDGER_ID +from plenum.common.util import getMaxFailures, get_utc_epoch from plenum.server.node import Node +from plenum.server.quorums import Quorums from plenum.server.replica import Replica from plenum.test import waits from plenum.test.helper import chk_all_funcs @@ -15,6 +15,7 @@ getAllReplicas, getPrimaryReplica +# This code is unclear, refactor def checkPropagated(looper, nodeSet, request, faultyNodes=0): nodesSize = len(list(nodeSet.nodes)) @@ -80,7 +81,7 @@ def nonPrimarySeesCorrectNumberOfPREPREPAREs(): instId, primary.viewNo, primary.lastPrePrepareSeqNo, - time.time(), + get_utc_epoch(), [[propagated1.identifier, propagated1.reqId]], 1, Replica.batchDigest([propagated1,]), @@ -176,7 +177,7 @@ def nonPrimaryReceivesCorrectNumberOfPREPREPAREs(): def checkPrepared(looper, nodeSet, preprepared1, instIds, faultyNodes=0, timeout=30): nodeCount = len(list(nodeSet.nodes)) - f = getMaxFailures(nodeCount) + quorums = Quorums(nodeCount) def g(instId): allReplicas = getAllReplicas(nodeSet, instId) @@ -196,11 +197,11 @@ def allReplicasSeeCorrectNumberOfPREPAREs(): """ 1. no of PREPARE received by replicas must be n - 1; n = num of nodes without fault, and greater than or equal to - 2f with faults. + n-f-1 with faults. """ passes = 0 numOfMsgsWithZFN = nodeCount - 1 - numOfMsgsWithFaults = 2 * f + numOfMsgsWithFaults = quorums.prepare.value for replica in allReplicas: key = primary.viewNo, primary.lastPrePrepareSeqNo @@ -218,7 +219,7 @@ def primaryReceivesCorrectNumberOfPREPAREs(): """ num of PREPARE seen by primary replica is n - 1; n = num of nodes without fault, and greater than or equal to - 2f with faults. + n-f-1 with faults. """ actualMsgs = len([param for param in getAllArgs(primary, @@ -231,7 +232,7 @@ def primaryReceivesCorrectNumberOfPREPAREs(): param['sender'] != primary.name]) numOfMsgsWithZFN = nodeCount - 1 - numOfMsgsWithFaults = 2 * f - 1 + numOfMsgsWithFaults = quorums.prepare.value assert msgCountOK(nodeCount, faultyNodes, @@ -243,11 +244,11 @@ def primaryReceivesCorrectNumberOfPREPAREs(): def nonPrimaryReplicasReceiveCorrectNumberOfPREPAREs(): """ num of PREPARE seen by Non primary replica is n - 2 without - faults and 2f - 1 with faults. + faults and n-f-2 with faults. """ passes = 0 numOfMsgsWithZFN = nodeCount - 2 - numOfMsgsWithFaults = (2 * f) - 1 + numOfMsgsWithFaults = quorums.prepare.value - 1 for npr in nonPrimaryReplicas: actualMsgs = len([param for param in @@ -285,23 +286,21 @@ def nonPrimaryReplicasReceiveCorrectNumberOfPREPAREs(): def checkCommitted(looper, nodeSet, prepared1, instIds, faultyNodes=0): timeout = waits.expectedCommittedTime(len(nodeSet)) nodeCount = len((list(nodeSet))) - f = getMaxFailures(nodeCount) + quorums = Quorums(nodeCount) def g(instId): allReplicas = getAllReplicas(nodeSet, instId) primaryReplica = getPrimaryReplica(nodeSet, instId) - # Question: Why 2 checks are being made, one with the data structure - # and then the spylog - def replicasSeesCorrectNumOfCOMMITs(): + def replicas_gets_correct_num_of_COMMITs(): """ num of commit messages must be = n when zero fault; n = num of nodes and greater than or equal to - 2f + 1 with faults. + n-f with faults. """ passes = 0 - numOfMsgsWithZFN = nodeCount - numOfMsgsWithFault = (2 * f) + 1 + numOfMsgsWithZFN = quorums.commit.value + numOfMsgsWithFault = quorums.commit.value key = (primaryReplica.viewNo, primaryReplica.lastPrePrepareSeqNo) for r in allReplicas: @@ -315,37 +314,10 @@ def replicasSeesCorrectNumOfCOMMITs(): numOfMsgsWithZFN, numOfMsgsWithFault)) - assert passes >= len(allReplicas) - faultyNodes - - def replicasReceivesCorrectNumberOfCOMMITs(): - """ - num of commit messages seen by replica must be equal to n - 1; - when zero fault and greater than or equal to - 2f+1 with faults. - """ - passes = 0 - numOfMsgsWithZFN = nodeCount - 1 - numOfMsgsWithFault = 2 * f + assert passes >= min(len(allReplicas) - faultyNodes, + numOfMsgsWithZFN) - for r in allReplicas: - args = getAllArgs(r, r.processCommit) - actualMsgsReceived = len(args) - - passes += int(msgCountOK(nodeCount, - faultyNodes, - actualMsgsReceived, - numOfMsgsWithZFN, - numOfMsgsWithFault)) - - for arg in args: - assert arg['commit'].viewNo == primaryReplica.viewNo and \ - arg['commit'].ppSeqNo == primaryReplica.lastPrePrepareSeqNo - assert r.name != arg['sender'] - - assert passes >= len(allReplicas) - faultyNodes - - replicasReceivesCorrectNumberOfCOMMITs() - replicasSeesCorrectNumOfCOMMITs() + replicas_gets_correct_num_of_COMMITs() funcs = [partial(g, instId) for instId in instIds] # TODO Select or create the timeout from 'waits'. Don't use constant. @@ -367,3 +339,19 @@ def msgCountOK(nodesSize, # Less than or equal to `numOfSufficientMsgs` since the faults may # not reduce the number of correct messages return actualMessagesReceived <= numOfSufficientMsgs + + +def chk_commits_prepares_recvd(count, receivers, sender): + counts = {} + sender_replica_names = {r.instId: r.name for r in sender.replicas} + for node in receivers: + for replica in node.replicas: + if replica.instId not in counts: + counts[replica.instId] = 0 + nm = sender_replica_names[replica.instId] + for commit in replica.commits.values(): + counts[replica.instId] += int(nm in commit.voters) + for prepare in replica.prepares.values(): + counts[replica.instId] += int(nm in prepare.voters) + for c in counts.values(): + assert count == c, "expected {}, but have {}".format(count, c) diff --git a/plenum/test/node_request/test_already_processed_request.py b/plenum/test/node_request/test_already_processed_request.py index ae6de19476..6f6266bb25 100644 --- a/plenum/test/node_request/test_already_processed_request.py +++ b/plenum/test/node_request/test_already_processed_request.py @@ -5,8 +5,8 @@ from plenum.test.spy_helpers import getAllReturnVals -def test_all_replicas_hold_request_keys(looper, txnPoolNodeSet, client1, - wallet1, client1Connected): +def test_already_processed_requests(looper, txnPoolNodeSet, client1, + wallet1, client1Connected): """ Client re-sending request and checking that nodes picked the reply from ledger and did not process the request again diff --git a/plenum/test/node_request/test_commit/test_commits_recvd_first.py b/plenum/test/node_request/test_commit/test_commits_recvd_first.py new file mode 100644 index 0000000000..04e991891f --- /dev/null +++ b/plenum/test/node_request/test_commit/test_commits_recvd_first.py @@ -0,0 +1,33 @@ +from plenum.common.util import check_if_all_equal_in_list +from plenum.test.delayers import ppDelay, pDelay +from plenum.test.helper import send_reqs_batches_and_get_suff_replies +from plenum.test.node_catchup.helper import waitNodeDataEquality +from plenum.test.pool_transactions.conftest import looper, clientAndWallet1, \ + client1, wallet1, client1Connected +from plenum.test.test_node import getNonPrimaryReplicas + + +def test_commits_recvd_first(looper, txnPoolNodeSet, client1, + wallet1, client1Connected): + slow_node = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)][-1] + other_nodes = [n for n in txnPoolNodeSet if n != slow_node] + delay = 50 + slow_node.nodeIbStasher.delay(ppDelay(delay, 0)) + slow_node.nodeIbStasher.delay(pDelay(delay, 0)) + + send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, 20, 4) + + assert not slow_node.master_replica.prePrepares + assert not slow_node.master_replica.prepares + assert not slow_node.master_replica.commits + assert len(slow_node.master_replica.commitsWaitingForPrepare) > 0 + + slow_node.reset_delays_and_process_delayeds() + waitNodeDataEquality(looper, slow_node, *other_nodes) + assert check_if_all_equal_in_list([n.master_replica.ordered + for n in txnPoolNodeSet]) + + assert slow_node.master_replica.prePrepares + assert slow_node.master_replica.prepares + assert slow_node.master_replica.commits + assert not slow_node.master_replica.commitsWaitingForPrepare diff --git a/plenum/test/node_request/test_commit/test_commits_without_prepares.py b/plenum/test/node_request/test_commit/test_commits_without_prepares.py new file mode 100644 index 0000000000..a3b9bb9698 --- /dev/null +++ b/plenum/test/node_request/test_commit/test_commits_without_prepares.py @@ -0,0 +1,29 @@ +from plenum.test.delayers import pDelay +from plenum.test.helper import send_reqs_batches_and_get_suff_replies, \ + send_reqs_to_nodes_and_verify_all_replies +from plenum.test.pool_transactions.conftest import looper, clientAndWallet1, \ + client1, wallet1, client1Connected +from plenum.test.test_node import get_master_primary_node + + +def test_primary_receives_delayed_prepares(looper, txnPoolNodeSet, + client1, wallet1, + client1Connected): + """ + Primary gets all PREPAREs after COMMITs + """ + delay = 50 + primary_node = get_master_primary_node(txnPoolNodeSet) + other_nodes = [n for n in txnPoolNodeSet if n != primary_node] + primary_node.nodeIbStasher.delay(pDelay(delay, 0)) + + send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 10) + + for node in other_nodes: + assert node.master_replica.prePrepares + assert node.master_replica.prepares + assert node.master_replica.commits + + assert primary_node.master_replica.sentPrePrepares + assert not primary_node.master_replica.prepares + assert primary_node.master_replica.commits diff --git a/plenum/test/node_request/test_commit/test_num_commit_with_2_of_6_faulty.py b/plenum/test/node_request/test_commit/test_num_commit_with_2_of_6_faulty.py new file mode 100644 index 0000000000..cdbb25f428 --- /dev/null +++ b/plenum/test/node_request/test_commit/test_num_commit_with_2_of_6_faulty.py @@ -0,0 +1,33 @@ +from functools import partial + +import pytest +from plenum.common.util import getNoInstances + +from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ + delaysPrePrepareProcessing, changesRequest +from plenum.test.node_request.node_request_helper import checkCommitted + +nodeCount = 6 +f = 1 +faultyNodes = f + 1 + +whitelist = ['cannot process incoming PREPARE'] + + +@pytest.fixture(scope="module") +def evilNodes(startedNodes): + # Delay processing of PRE-PREPARE messages for 90 + # seconds since the timeout for checking sufficient commits is 60 seconds + for node in startedNodes.nodes_by_rank[-faultyNodes:]: + makeNodeFaulty(node, changesRequest, partial(delaysPrePrepareProcessing, + delay=90)) + +def test_num_of_commit_msg_with_2_of_6_faulty(evilNodes, looper, + nodeSet, prepared1, noRetryReq): + with pytest.raises(AssertionError): + # To raise an error pass less than the actual number of faults + checkCommitted(looper, + nodeSet, + prepared1, + range(getNoInstances(len(nodeSet))), + f) diff --git a/plenum/test/node_request/test_commit/test_num_of_commit_with_f_plus_one_faults.py b/plenum/test/node_request/test_commit/test_num_of_commit_with_f_plus_one_faults.py index e0eba05d8a..d9969d9786 100644 --- a/plenum/test/node_request/test_commit/test_num_of_commit_with_f_plus_one_faults.py +++ b/plenum/test/node_request/test_commit/test_num_of_commit_with_f_plus_one_faults.py @@ -2,7 +2,8 @@ import pytest -from plenum.common.util import getNoInstances, adict +from plenum.common.util import getNoInstances +from stp_core.common.util import adict from plenum.test.node_request.node_request_helper import checkCommitted from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ delaysPrePrepareProcessing, \ @@ -18,13 +19,15 @@ @pytest.fixture(scope="module") def setup(startedNodes): - A = startedNodes.Alpha - B = startedNodes.Beta - G = startedNodes.Gamma + # Making nodes faulty such that no primary is chosen + A = startedNodes.Eta + B = startedNodes.Gamma + G = startedNodes.Zeta for node in A, B, G: makeNodeFaulty(node, changesRequest, partial(delaysPrePrepareProcessing, delay=90)) - node.delaySelfNomination(10) + # Delaying nomination to avoid becoming primary + # node.delaySelfNomination(10) return adict(faulties=(A, B, G)) diff --git a/plenum/test/node_request/test_commit/test_num_of_sufficient_commit.py b/plenum/test/node_request/test_commit/test_num_of_sufficient_commit.py index 914e45ecd7..ff5c3ff188 100644 --- a/plenum/test/node_request/test_commit/test_num_of_sufficient_commit.py +++ b/plenum/test/node_request/test_commit/test_num_of_sufficient_commit.py @@ -1,7 +1,7 @@ from functools import partial import pytest -from plenum.common.util import adict +from stp_core.common.util import adict from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ delaysPrePrepareProcessing @@ -13,14 +13,16 @@ @pytest.fixture(scope="module") def setup(startedNodes): - A = startedNodes.Alpha - B = startedNodes.Beta + # Making nodes faulty such that no primary is chosen + A = startedNodes.Gamma + B = startedNodes.Zeta # Delay processing of PRE-PREPARE messages by Alpha and Beta for 90 # seconds since the timeout for checking sufficient commits is 60 seconds makeNodeFaulty(A, partial(delaysPrePrepareProcessing, delay=90)) makeNodeFaulty(B, partial(delaysPrePrepareProcessing, delay=90)) - A.delaySelfNomination(10) - B.delaySelfNomination(10) + # Delaying nomination to avoid becoming primary + # A.delaySelfNomination(10) + # B.delaySelfNomination(10) return adict(faulties=(A, B)) diff --git a/plenum/test/node_request/test_different_ledger_request_interleave.py b/plenum/test/node_request/test_different_ledger_request_interleave.py new file mode 100644 index 0000000000..33c8189ffb --- /dev/null +++ b/plenum/test/node_request/test_different_ledger_request_interleave.py @@ -0,0 +1,70 @@ +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, \ + sendRandomRequests, waitForSufficientRepliesForRequests +from plenum.test.node_catchup.helper import ensure_all_nodes_have_same_data +from plenum.test.pool_transactions.helper import addNewSteward, sendAddNewNode +from plenum.test.primary_selection.test_primary_selection_pool_txn import \ + ensure_pool_functional +from plenum.test.test_node import checkProtocolInstanceSetup +from plenum.test.view_change.helper import ensure_view_change + + +from plenum.test.conftest import tdirWithPoolTxns +from plenum.test.pool_transactions.conftest import clientAndWallet1, \ + client1, wallet1, client1Connected, looper, nodeThetaAdded, \ + stewardAndWallet1, steward1, stewardWallet +from plenum.test.primary_selection.conftest import one_node_added +from plenum.test.batching_3pc.conftest import tconf + + +def test_different_ledger_request_interleave(tconf, looper, txnPoolNodeSet, + client1, wallet1, one_node_added, + client1Connected, + tdirWithPoolTxns, steward1, + stewardWallet, allPluginsPath): + """ + Send pool and domain ledger requests such that they interleave, and do + view change in between and verify the pool is functional + """ + new_node = one_node_added + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 2) + ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) + + # Send domain ledger requests but don't wait for replies + requests = sendRandomRequests(wallet1, client1, 2) + # Add another node by sending pool ledger request + _, _, new_theta = nodeThetaAdded(looper, txnPoolNodeSet, tdirWithPoolTxns, + tconf, steward1, stewardWallet, + allPluginsPath, name='new_theta') + + # Send more domain ledger requests but don't wait for replies + requests.extend(sendRandomRequests(wallet1, client1, 3)) + + # Do view change without waiting for replies + ensure_view_change(looper, nodes=txnPoolNodeSet) + checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) + + # Make sure all requests are completed + waitForSufficientRepliesForRequests(looper, client1, + requests=requests) + + ensure_pool_functional(looper, txnPoolNodeSet, wallet1, client1) + + new_steward, new_steward_wallet = addNewSteward(looper, tdirWithPoolTxns, + steward1, stewardWallet, + 'another_ste') + + # Send another pool ledger request (NODE) but don't wait for completion of + # request + next_node_name = 'next_node' + r = sendAddNewNode(next_node_name, new_steward, new_steward_wallet) + node_req = r[0] + + # Send more domain ledger requests but don't wait for replies + requests = [node_req, *sendRandomRequests(new_steward_wallet, new_steward, 5)] + + # Make sure all requests are completed + waitForSufficientRepliesForRequests(looper, new_steward, + requests=requests) + + # Make sure pool is functional + ensure_pool_functional(looper, txnPoolNodeSet, wallet1, client1) diff --git a/plenum/test/node_request/test_discard_3pc_for_ordered.py b/plenum/test/node_request/test_discard_3pc_for_ordered.py new file mode 100644 index 0000000000..ae78fa3623 --- /dev/null +++ b/plenum/test/node_request/test_discard_3pc_for_ordered.py @@ -0,0 +1,63 @@ +from plenum.test.delayers import delay_3pc_messages +from plenum.test.helper import countDiscarded, \ + send_reqs_batches_and_get_suff_replies +from plenum.test.node_catchup.helper import waitNodeDataEquality +from plenum.test.node_request.node_request_helper import \ + chk_commits_prepares_recvd +from plenum.test.pool_transactions.conftest import looper, clientAndWallet1, \ + client1, wallet1, client1Connected +from plenum.test.test_node import getNonPrimaryReplicas +from stp_core.loop.eventually import eventually + + +def test_discard_3PC_messages_for_already_ordered(looper, txnPoolNodeSet, + client1, wallet1, + client1Connected): + """ + Nodes discard any 3PC messages for already ordered 3PC keys + (view_no, pp_seq_no). Delay all 3PC messages to a node so it cannot respond + to them unless the other nodes order them, now when the slow node will get + them it will respond but other nodes will not process them and discard them + """ + slow_node = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)][-1] + other_nodes = [n for n in txnPoolNodeSet if n != slow_node] + delay = 20 + delay_3pc_messages([slow_node], 0, delay) + delay_3pc_messages([slow_node], 1, delay) + + sent_batches = 3 + send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, + 2 * sent_batches, sent_batches) + + def chk(node, inst_id, p_count, c_count): + # A node will still record PREPRAREs even if more than n-f-1, till the + # request is not ordered + assert len(node.replicas[inst_id].prepares) >= p_count + assert len(node.replicas[inst_id].commits) == c_count + + def count_discarded(inst_id, count): + for node in other_nodes: + assert countDiscarded(node.replicas[inst_id], + 'already ordered 3 phase message') == count + + # `slow_node` did not receive any PREPAREs or COMMITs + chk(slow_node, 0, 0, 0) + + # `other_nodes` have not discarded any 3PC message + count_discarded(0, 0) + + # `other_nodes` have not recorded any PREPAREs or COMMITs from `slow_node` + chk_commits_prepares_recvd(0, other_nodes, slow_node) + + slow_node.reset_delays_and_process_delayeds() + waitNodeDataEquality(looper, slow_node, *other_nodes) + + # `slow_node` did receive correct number of PREPAREs and COMMITs + looper.run(eventually(chk, slow_node, 0, sent_batches-1, sent_batches, + retryWait=1)) + + # `other_nodes` have not recorded any PREPAREs or COMMITs from `slow_node` + chk_commits_prepares_recvd(0, other_nodes, slow_node) + + # `other_nodes` have discarded PREPAREs and COMMITs all batches + count_discarded(0, 2*sent_batches) diff --git a/plenum/test/node_request/test_order/test_ordering_when_pre_prepare_not_received.py b/plenum/test/node_request/test_order/test_ordering_when_pre_prepare_not_received.py index 8d5426c981..97ee05ab67 100644 --- a/plenum/test/node_request/test_order/test_ordering_when_pre_prepare_not_received.py +++ b/plenum/test/node_request/test_order/test_ordering_when_pre_prepare_not_received.py @@ -3,7 +3,7 @@ from stp_core.loop.eventually import eventually from plenum.test import waits -from plenum.test.delayers import ppDelay +from plenum.test.delayers import ppDelay, pDelay from plenum.test.helper import sendRandomRequest from plenum.test.test_node import getNonPrimaryReplicas @@ -11,8 +11,8 @@ def testOrderingWhenPrePrepareNotReceived(looper, nodeSet, up, client1, wallet1): """ - Send commits and prepares but delay pre-prepare such that enough prepares - and commits are received, now the request should not be ordered until + Send commits but delay pre-prepare and prepares such that enough + commits are received, now the request should not be ordered until pre-prepare is received and ordering should just happen once, """ delay = 10 @@ -20,30 +20,39 @@ def testOrderingWhenPrePrepareNotReceived(looper, nodeSet, up, client1, slowRep = nonPrimReps[0] slowNode = slowRep.node slowNode.nodeIbStasher.delay(ppDelay(delay, 0)) + slowNode.nodeIbStasher.delay(pDelay(delay, 0)) - stash = [] - origMethod = slowRep.processPrePrepare + stash_pp = [] + stash_p = [] + orig_pp_method = slowRep.processPrePrepare + orig_p_method = slowRep.processPrepare - def patched(self, msg, sender): - stash.append((msg, sender)) + def patched_pp(self, msg, sender): + stash_pp.append((msg, sender)) - patchedMethod = types.MethodType(patched, slowRep) - slowRep.processPrePrepare = patchedMethod + def patched_p(self, msg, sender): + stash_p.append((msg, sender)) + + slowRep.processPrePrepare = types.MethodType(patched_pp, slowRep) + slowRep.processPrepare = types.MethodType(patched_p, slowRep) def chk1(): assert len(slowRep.commitsWaitingForPrepare) > 0 sendRandomRequest(wallet1, client1) timeout = waits.expectedPrePrepareTime(len(nodeSet)) + delay - looper.run(eventually(chk1, timeout=timeout)) + looper.run(eventually(chk1, retryWait=1, timeout=timeout)) + + for m, s in stash_pp: + orig_pp_method(m, s) - for m, s in stash: - origMethod(m, s) + for m, s in stash_p: + orig_p_method(m, s) def chk2(): assert len(slowRep.commitsWaitingForPrepare) == 0 assert slowRep.spylog.count(slowRep.doOrder.__name__) == 1 - timeout = waits.expectedOrderingTime(len(nonPrimReps) + 1) + delay - looper.run(eventually(chk2, timeout=timeout)) + timeout = waits.expectedOrderingTime(len(nonPrimReps) + 1) + 2*delay + looper.run(eventually(chk2, retryWait=1, timeout=timeout)) diff --git a/plenum/test/node_request/test_order/test_request_ordering_2.py b/plenum/test/node_request/test_order/test_request_ordering_2.py index 306b4048a2..424de9aaa1 100644 --- a/plenum/test/node_request/test_order/test_request_ordering_2.py +++ b/plenum/test/node_request/test_order/test_request_ordering_2.py @@ -1,6 +1,6 @@ from stp_core.loop.eventually import eventually from stp_core.common.log import getlogger -from plenum.common.types import Commit, PrePrepare +from plenum.common.messages.node_messages import PrePrepare, Commit from plenum.test.helper import sendRandomRequests, \ waitForSufficientRepliesForRequests, checkLedgerEquality, checkAllLedgersEqual from plenum.test.test_node import getNonPrimaryReplicas, getPrimaryReplica diff --git a/plenum/test/node_request/test_pre_prepare/test_ignore_pre_prepare_pp_seq_no_less_than_expected.py b/plenum/test/node_request/test_pre_prepare/test_ignore_pre_prepare_pp_seq_no_less_than_expected.py index e309821b6c..0628d6daaf 100644 --- a/plenum/test/node_request/test_pre_prepare/test_ignore_pre_prepare_pp_seq_no_less_than_expected.py +++ b/plenum/test/node_request/test_pre_prepare/test_ignore_pre_prepare_pp_seq_no_less_than_expected.py @@ -1,6 +1,6 @@ import pytest -from plenum.common.util import adict +from stp_core.common.util import adict from plenum.test.helper import sendRandomRequests, waitForSufficientRepliesForRequests from plenum.test.test_node import getNonPrimaryReplicas @@ -17,7 +17,7 @@ def test_ignore_pre_prepare_pp_seq_no_less_than_expected(looper, """ replica = getNonPrimaryReplicas(nodeSet, instId=0)[0] - replica.lastOrderedPPSeqNo = 10 + replica.last_ordered_3pc = (replica.viewNo, 10) requests = sendRandomRequests(wallet1, client1, 1) waitForSufficientRepliesForRequests(looper, client1, diff --git a/plenum/test/node_request/test_pre_prepare/test_non_primary_sends_a_pre_prepare.py b/plenum/test/node_request/test_pre_prepare/test_non_primary_sends_a_pre_prepare.py index 1fc53f94be..ac1b3b062c 100644 --- a/plenum/test/node_request/test_pre_prepare/test_non_primary_sends_a_pre_prepare.py +++ b/plenum/test/node_request/test_pre_prepare/test_non_primary_sends_a_pre_prepare.py @@ -6,13 +6,13 @@ from stp_core.loop.eventually import eventually from stp_core.common.log import getlogger from plenum.common.request import ReqDigest -from plenum.common.types import PrePrepare, f +from plenum.common.types import f from plenum.common.constants import DOMAIN_LEDGER_ID from plenum.common.util import compareNamedTuple from plenum.server.suspicion_codes import Suspicions from plenum.test.helper import getNodeSuspicions from plenum.test import waits -from plenum.test.instances.helper import recvdPrePrepare +from plenum.test.instances.helper import recvd_pre_prepares from plenum.test.test_node import getNonPrimaryReplicas, getPrimaryReplica logger = getlogger() @@ -42,7 +42,7 @@ def testNonPrimarySendsAPrePrepare(looper, nodeSet, setup, propagated1): remainingNpr = nonPrimaryReplicas[1:] def sendPrePrepareFromNonPrimary(): - firstNpr.requestQueues[DOMAIN_LEDGER_ID].add(propagated1) + firstNpr.requestQueues[DOMAIN_LEDGER_ID].add(propagated1.key) ppReq = firstNpr.create3PCBatch(DOMAIN_LEDGER_ID) firstNpr.sendPrePrepare(ppReq) return ppReq @@ -51,9 +51,9 @@ def sendPrePrepareFromNonPrimary(): def chk(): for r in remainingNpr: - recvdPps = recvdPrePrepare(r) + recvdPps = recvd_pre_prepares(r) assert len(recvdPps) == 1 - assert compareNamedTuple(recvdPps[0]['pp'], ppr, + assert compareNamedTuple(recvdPps[0], ppr, f.DIGEST.nm, f.STATE_ROOT.nm, f.TXN_ROOT.nm) nodeSuspicions = len(getNodeSuspicions( diff --git a/plenum/test/node_request/test_pre_prepare/test_num_of_pre_prepare_with_f_plus_one_faults.py b/plenum/test/node_request/test_pre_prepare/test_num_of_pre_prepare_with_f_plus_one_faults.py index e070a3b546..2a293b1ae5 100644 --- a/plenum/test/node_request/test_pre_prepare/test_num_of_pre_prepare_with_f_plus_one_faults.py +++ b/plenum/test/node_request/test_pre_prepare/test_num_of_pre_prepare_with_f_plus_one_faults.py @@ -6,7 +6,8 @@ from plenum.test import waits from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ delaysPrePrepareProcessing, changesRequest -from plenum.common.util import adict, getNoInstances +from plenum.common.util import getNoInstances +from stp_core.common.util import adict from stp_core.common.log import getlogger from plenum.test.node_request.node_request_helper import checkPrePrepared @@ -26,14 +27,16 @@ @pytest.fixture(scope="module") def setup(startedNodes): - A = startedNodes.Alpha - B = startedNodes.Beta - G = startedNodes.Gamma + # Making nodes faulty such that no primary is chosen + A = startedNodes.Eta + B = startedNodes.Gamma + G = startedNodes.Zeta for node in A, B, G: makeNodeFaulty(node, changesRequest, partial(delaysPrePrepareProcessing, delay=delayPrePrepareSec)) - node.delaySelfNomination(10) + # Delaying nomination to avoid becoming primary + # node.delaySelfNomination(10) return adict(faulties=(A, B, G)) diff --git a/plenum/test/node_request/test_pre_prepare/test_num_of_pre_prepare_with_one_fault.py b/plenum/test/node_request/test_pre_prepare/test_num_of_pre_prepare_with_one_fault.py index f8bd38585c..906bc68dca 100644 --- a/plenum/test/node_request/test_pre_prepare/test_num_of_pre_prepare_with_one_fault.py +++ b/plenum/test/node_request/test_pre_prepare/test_num_of_pre_prepare_with_one_fault.py @@ -1,7 +1,7 @@ from functools import partial import pytest -from plenum.common.util import adict +from stp_core.common.util import adict from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ delaysPrePrepareProcessing @@ -13,10 +13,10 @@ @pytest.fixture(scope="module") def setup(startedNodes): - A = startedNodes.Alpha + A = startedNodes.Gamma makeNodeFaulty(A, partial(delaysPrePrepareProcessing, delay=60)) - A.delaySelfNomination(10) + # A.delaySelfNomination(10) return adict(faulties=A) diff --git a/plenum/test/node_request/test_pre_prepare/test_num_of_sufficient_preprepare.py b/plenum/test/node_request/test_pre_prepare/test_num_of_sufficient_preprepare.py index 36ecd54b56..7abd00b856 100644 --- a/plenum/test/node_request/test_pre_prepare/test_num_of_sufficient_preprepare.py +++ b/plenum/test/node_request/test_pre_prepare/test_num_of_sufficient_preprepare.py @@ -1,7 +1,7 @@ from functools import partial import pytest -from plenum.common.util import adict +from stp_core.common.util import adict from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ delaysPrePrepareProcessing @@ -13,12 +13,12 @@ @pytest.fixture(scope="module") def setup(startedNodes): - A = startedNodes.Alpha - B = startedNodes.Beta + A = startedNodes.Gamma + B = startedNodes.Zeta for node in A, B: makeNodeFaulty(node, partial(delaysPrePrepareProcessing, delay=60)) - node.delaySelfNomination(10) + # node.delaySelfNomination(10) return adict(faulties=(A, B)) diff --git a/plenum/test/node_request/test_pre_prepare/test_primary_sends_preprepare_of_high_num.py b/plenum/test/node_request/test_pre_prepare/test_primary_sends_preprepare_of_high_num.py index a132f30501..56796f5552 100644 --- a/plenum/test/node_request/test_pre_prepare/test_primary_sends_preprepare_of_high_num.py +++ b/plenum/test/node_request/test_pre_prepare/test_primary_sends_preprepare_of_high_num.py @@ -2,9 +2,10 @@ import pytest +from plenum.common.util import get_utc_epoch from stp_core.loop.eventually import eventually from plenum.common.request import ReqDigest -from plenum.common.types import PrePrepare +from plenum.common.messages.node_messages import PrePrepare from plenum.server.replica import TPCStat from plenum.server.suspicion_codes import Suspicions from plenum.test.helper import getNodeSuspicions @@ -44,7 +45,7 @@ def checkPreprepare(replica, viewNo, ppSeqNo, req, numOfPrePrepares): primary.viewNo, primary.lastPrePrepareSeqNo + 2, *newReqDigest, - time.time()) + get_utc_epoch()) primary.send(incorrectPrePrepareReq, TPCStat.PrePrepareSent) timeout = waits.expectedPrePrepareTime(len(nodeSet)) diff --git a/plenum/test/node_request/test_prepare/test_num_of_prepare_with_f_plus_one_faults.py b/plenum/test/node_request/test_prepare/test_num_of_prepare_with_f_plus_one_faults.py index ae5b467409..ee166e87e9 100644 --- a/plenum/test/node_request/test_prepare/test_num_of_prepare_with_f_plus_one_faults.py +++ b/plenum/test/node_request/test_prepare/test_num_of_prepare_with_f_plus_one_faults.py @@ -1,7 +1,8 @@ from functools import partial import pytest -from plenum.common.util import adict, getNoInstances +from plenum.common.util import getNoInstances +from stp_core.common.util import adict from plenum.test import waits from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ @@ -19,15 +20,17 @@ @pytest.fixture(scope="module") def setup(startedNodes): - A = startedNodes.Alpha - B = startedNodes.Beta + # Making nodes faulty such that no primary is chosen + E = startedNodes.Eta G = startedNodes.Gamma - for node in A, B, G: + Z = startedNodes.Zeta + for node in E, G, Z: makeNodeFaulty(node, - changesRequest, - partial(delaysPrePrepareProcessing, delay=delayPrePrepareSec)) - node.delaySelfNomination(10) - return adict(faulties=(A, B, G)) + changesRequest, partial(delaysPrePrepareProcessing, + delay=delayPrePrepareSec)) + # Delaying nomination to avoid becoming primary + # node.delaySelfNomination(10) + return adict(faulties=(E, G, Z)) @pytest.fixture(scope="module") diff --git a/plenum/test/node_request/test_prepare/test_num_of_prepare_with_one_fault.py b/plenum/test/node_request/test_prepare/test_num_of_prepare_with_one_fault.py index 1a5af358e1..8d3f2507c7 100644 --- a/plenum/test/node_request/test_prepare/test_num_of_prepare_with_one_fault.py +++ b/plenum/test/node_request/test_prepare/test_num_of_prepare_with_one_fault.py @@ -4,7 +4,7 @@ from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ delaysPrePrepareProcessing -from plenum.common.util import adict +from stp_core.common.util import adict nodeCount = 4 faultyNodes = 1 @@ -13,11 +13,13 @@ @pytest.fixture(scope="module") def setup(startedNodes): - A = startedNodes.Alpha - A.delaySelfNomination(10) - makeNodeFaulty(A, + # Making nodes faulty such that no primary is chosen + G = startedNodes.Gamma + # Delaying nomination to avoid becoming primary + # G.delaySelfNomination(10) + makeNodeFaulty(G, partial(delaysPrePrepareProcessing, delay=60)) - return adict(faulty=A) + return adict(faulty=G) @pytest.fixture(scope="module") diff --git a/plenum/test/node_request/test_prepare/test_num_of_sufficient_prepare.py b/plenum/test/node_request/test_prepare/test_num_of_sufficient_prepare.py index a87ab85b42..f1c0a15272 100644 --- a/plenum/test/node_request/test_prepare/test_num_of_sufficient_prepare.py +++ b/plenum/test/node_request/test_prepare/test_num_of_sufficient_prepare.py @@ -4,7 +4,7 @@ import pytest from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ delaysPrePrepareProcessing -from plenum.common.util import adict +from stp_core.common.util import adict from stp_core.common.log import getlogger from plenum.test.test_node import TestNodeSet @@ -18,13 +18,14 @@ @pytest.fixture(scope="module") def setup(startedNodes): - A = startedNodes.Alpha - B = startedNodes.Beta - for node in A, B: + G = startedNodes.Gamma + Z = startedNodes.Zeta + for node in G, Z: makeNodeFaulty(node, partial(delaysPrePrepareProcessing, delay=60)) - node.delaySelfNomination(10) - return adict(faulties=(A, B)) + # Delaying nomination to avoid becoming primary + # node.delaySelfNomination(10) + return adict(faulties=(G, Z)) @pytest.fixture(scope="module") diff --git a/plenum/test/node_request/test_prepare/test_num_prepare_with_2_of_6_faulty.py b/plenum/test/node_request/test_prepare/test_num_prepare_with_2_of_6_faulty.py new file mode 100644 index 0000000000..48c9d8e42b --- /dev/null +++ b/plenum/test/node_request/test_prepare/test_num_prepare_with_2_of_6_faulty.py @@ -0,0 +1,33 @@ +from functools import partial + +import pytest +from plenum.common.util import getNoInstances + +from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ + delaysPrePrepareProcessing, changesRequest +from plenum.test.node_request.node_request_helper import checkPrepared + +nodeCount = 6 +f = 1 +faultyNodes = f + 1 + +whitelist = ['cannot process incoming PREPARE'] + + +@pytest.fixture(scope="module") +def evilNodes(startedNodes): + # Delay processing of PRE-PREPARE messages for 90 + # seconds since the timeout for checking sufficient commits is 60 seconds + for node in startedNodes.nodes_by_rank[-faultyNodes:]: + makeNodeFaulty(node, changesRequest, partial(delaysPrePrepareProcessing, + delay=90)) + +def test_num_of_prepare_2_of_6_faulty(evilNodes, looper, + nodeSet, preprepared1, noRetryReq): + with pytest.raises(AssertionError): + # To raise an error pass less than the actual number of faults + checkPrepared(looper, + nodeSet, + preprepared1, + range(getNoInstances(len(nodeSet))), + f) diff --git a/plenum/test/node_request/test_propagate/test_node_lacks_finalised_requests.py b/plenum/test/node_request/test_propagate/test_node_lacks_finalised_requests.py new file mode 100644 index 0000000000..668e5408f3 --- /dev/null +++ b/plenum/test/node_request/test_propagate/test_node_lacks_finalised_requests.py @@ -0,0 +1,65 @@ +import pytest +from plenum.test.delayers import ppgDelay, req_delay +from plenum.test.helper import send_reqs_to_nodes_and_verify_all_replies +from plenum.test.pool_transactions.conftest import looper, clientAndWallet1, \ + client1, wallet1, client1Connected +from plenum.test.primary_selection.test_primary_selection_pool_txn import \ + ensure_pool_functional +from plenum.test.spy_helpers import get_count, getAllReturnVals +from plenum.test.test_node import getNonPrimaryReplicas + + +@pytest.fixture(scope='function', params=['client_requests', + 'no_client_requests']) +def setup(request, txnPoolNodeSet): + # Test once when client request is received and once when not received + + # Choosing a faulty node which is primary in neither instance, this helps + # in the that same PROPAGATEs are not requested again by the node + faulty_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[1].node + if request.param == 'client_requests': + # Long delay in PROPAGATEs + faulty_node.nodeIbStasher.delay(ppgDelay(90)) + return faulty_node, True + if request.param == 'no_client_requests': + # Long delay in PROPAGATEs + faulty_node.nodeIbStasher.delay(ppgDelay(90)) + # Long delay in Client Requests + faulty_node.clientIbStasher.delay(req_delay(90)) + return faulty_node, False + + +def test_node_request_propagates(looper, setup, txnPoolNodeSet, client1, + wallet1, client1Connected, request): + """ + One of node lacks sufficient propagates + """ + faulty_node, recv_client_requests = setup + + old_count_recv_ppg = get_count(faulty_node, faulty_node.processPropagate) + old_count_recv_req = get_count(faulty_node, faulty_node.processRequest) + old_count_request_propagates = get_count(faulty_node, faulty_node.request_propagates) + + sent_reqs = 5 + send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, sent_reqs) + + assert get_count(faulty_node, faulty_node.processPropagate) > old_count_recv_ppg + if recv_client_requests: + assert get_count(faulty_node, faulty_node.processRequest) > old_count_recv_req + else: + assert get_count(faulty_node, faulty_node.processRequest) == old_count_recv_req + + # Attempt to request PROPAGATEs was made twice, since the faulty node has 2 replicas + assert get_count(faulty_node, faulty_node.request_propagates) - old_count_request_propagates == 2 + + requested_propagate_counts = getAllReturnVals(faulty_node, + faulty_node.request_propagates) + + # The last attempt to request PROPAGATEs was not successful + assert requested_propagate_counts[0] == 0 + # The first attempt to request PROPAGATEs was successful as PROPAGATEs + # were requested for all nodes + assert requested_propagate_counts[1] == sent_reqs + + faulty_node.nodeIbStasher.reset_delays_and_process_delayeds() + ensure_pool_functional(looper, txnPoolNodeSet, wallet1, client1, 4) diff --git a/plenum/test/node_request/test_propagate/test_num_of_propagate_with_f_plus_one_faulty_nodes.py b/plenum/test/node_request/test_propagate/test_num_of_propagate_with_f_plus_one_faulty_nodes.py index c3b14f446c..08af537b1e 100644 --- a/plenum/test/node_request/test_propagate/test_num_of_propagate_with_f_plus_one_faulty_nodes.py +++ b/plenum/test/node_request/test_propagate/test_num_of_propagate_with_f_plus_one_faulty_nodes.py @@ -1,5 +1,5 @@ import pytest -from plenum.common.util import adict +from stp_core.common.util import adict from plenum.test.malicious_behaviors_node import makeNodeFaulty, changesRequest @@ -19,13 +19,14 @@ @pytest.fixture(scope="module") def setup(startedNodes): - A = startedNodes.Alpha - B = startedNodes.Beta - G = startedNodes.Gamma - for node in A, B, G: + E = startedNodes.Eta + Z = startedNodes.Gamma + Z = startedNodes.Zeta + for node in E, Z, Z: makeNodeFaulty(node, changesRequest) - node.delaySelfNomination(10) - return adict(faulties=(A, B, G)) + # Delaying nomination to avoid becoming primary + # node.delaySelfNomination(10) + return adict(faulties=(E, Z, Z)) @pytest.fixture(scope="module") diff --git a/plenum/test/node_request/test_propagate/test_num_of_propagate_with_one_fault.py b/plenum/test/node_request/test_propagate/test_num_of_propagate_with_one_fault.py index 6adea6a264..25b41fa20d 100644 --- a/plenum/test/node_request/test_propagate/test_num_of_propagate_with_one_fault.py +++ b/plenum/test/node_request/test_propagate/test_num_of_propagate_with_one_fault.py @@ -17,9 +17,9 @@ # behavior and should be chose randomly later. @pytest.fixture(scope="module") -def evilAlpha(nodeSet): +def evil_node(nodeSet): makeNodeFaulty(nodeSet.Alpha, changesRequest) -def testNumOfPropagateWithOneFault(evilAlpha, propagated1): +def testNumOfPropagateWithOneFault(evil_node, propagated1): pass diff --git a/plenum/test/node_request/test_propagate/test_num_of_sufficient_propagate.py b/plenum/test/node_request/test_propagate/test_num_of_sufficient_propagate.py index c4763bd604..eff46f81a1 100644 --- a/plenum/test/node_request/test_propagate/test_num_of_sufficient_propagate.py +++ b/plenum/test/node_request/test_propagate/test_num_of_sufficient_propagate.py @@ -1,5 +1,5 @@ import pytest -from plenum.common.util import adict +from stp_core.common.util import adict from plenum.test.malicious_behaviors_node import makeNodeFaulty, changesRequest @@ -41,12 +41,13 @@ @pytest.fixture(scope="module") def setup(startedNodes): - A = startedNodes.Alpha - B = startedNodes.Beta - for node in A, B: + # Making nodes faulty such that no primary is chosen + G = startedNodes.Gamma + Z = startedNodes.Zeta + for node in G, Z: makeNodeFaulty(node, changesRequest) - node.delaySelfNomination(10) - return adict(faulties=(A, B)) + # node.delaySelfNomination(10) + return adict(faulties=(G, Z)) @pytest.fixture(scope="module") diff --git a/plenum/test/node_request/test_quorum_disconnected.py b/plenum/test/node_request/test_quorum_disconnected.py new file mode 100644 index 0000000000..7dde1f840e --- /dev/null +++ b/plenum/test/node_request/test_quorum_disconnected.py @@ -0,0 +1,38 @@ +import pytest + +from plenum.test.batching_3pc.helper import send_and_check +from plenum.test.pool_transactions.helper import disconnect_node_and_ensure_disconnected +from stp_core.common.util import adict +from plenum.test.helper import signed_random_requests, \ + check_request_is_not_returned_to_nodes + +nodeCount = 6 +# f + 1 faults, i.e, num of faults greater than system can tolerate +faultyNodes = 2 + +whitelist = ['InvalidSignature'] + + +def stop_nodes(looper, nodeSet): + faulties = nodeSet.nodes_by_rank[-faultyNodes:] + for node in faulties: + for r in node.replicas: + assert not r.isPrimary + disconnect_node_and_ensure_disconnected(looper, nodeSet, node, stopNode=False) + looper.removeProdable(node) + return adict(faulties=faulties) + + +def test_6_nodes_pool_cannot_reach_quorum_with_2_disconnected(nodeSet, looper, + client1, wallet1): + ''' + Check that we can not reach consensus when more than n-f nodes are disconnected: + discinnect 2 of 6 nodes + ''' + stop_nodes(looper, nodeSet) + reqs = signed_random_requests(wallet1, 1) + with pytest.raises(AssertionError): + send_and_check(reqs, looper, nodeSet, client1) + check_request_is_not_returned_to_nodes(looper, nodeSet, reqs[0]) + + diff --git a/plenum/test/node_request/test_quorum_faulty.py b/plenum/test/node_request/test_quorum_faulty.py new file mode 100644 index 0000000000..89b7b2b8c2 --- /dev/null +++ b/plenum/test/node_request/test_quorum_faulty.py @@ -0,0 +1,53 @@ +from functools import partial +from itertools import product + +import pytest + +from plenum.common.util import getNoInstances +from plenum.test.batching_3pc.helper import send_and_check +from stp_core.common.util import adict +from plenum.test import waits +from plenum.test.helper import checkRequestReturnedToNode, checkRequestNotReturnedToNode, signed_random_requests, \ + check_request_is_not_returned_to_nodes +from plenum.test.node_request.node_request_helper import checkCommitted +from plenum.test.malicious_behaviors_node import makeNodeFaulty, \ + delaysPrePrepareProcessing, \ + changesRequest, delaysCommitProcessing +from stp_core.loop.eventually import eventually, eventuallyAll + +nodeCount = 6 +# f + 1 faults, i.e, num of faults greater than system can tolerate +faultyNodes = 2 + +whitelist = ['InvalidSignature'] + + +@pytest.fixture(scope="module") +def setup(startedNodes): + # A = startedNodes.Alpha + # B = startedNodes.Beta + A, B = startedNodes.nodes_by_rank[-2:] + for node in A, B: + makeNodeFaulty(node, changesRequest, + partial(delaysPrePrepareProcessing, delay=90)) + # node.delaySelfNomination(10) + return adict(faulties=(A, B)) + + +@pytest.fixture(scope="module") +def afterElection(setup, up): + for n in setup.faulties: + for r in n.replicas: + assert not r.isPrimary + + +def test_6_nodes_pool_cannot_reach_quorum_with_2_faulty(afterElection, looper, + nodeSet, prepared1, + wallet1, client1): + reqs = signed_random_requests(wallet1, 1) + with pytest.raises(AssertionError): + send_and_check(reqs, looper, nodeSet, client1) + check_request_is_not_returned_to_nodes(looper, nodeSet, reqs[0]) + + + diff --git a/plenum/test/node_request/test_request_forwarding.py b/plenum/test/node_request/test_request_forwarding.py index 4bf2ba254a..9cbd209ca4 100644 --- a/plenum/test/node_request/test_request_forwarding.py +++ b/plenum/test/node_request/test_request_forwarding.py @@ -1,5 +1,3 @@ -import pytest - from plenum.common.constants import DOMAIN_LEDGER_ID from plenum.test import waits from plenum.test.delayers import nom_delay, delay_3pc_messages @@ -12,7 +10,7 @@ from plenum.test.view_change.helper import ensure_view_change from stp_core.loop.eventually import eventually -@pytest.mark.skip(reason="INDY-147") + def test_all_replicas_hold_request_keys(looper, txnPoolNodeSet, client1, wallet1, client1Connected, tconf): """ @@ -48,7 +46,7 @@ def chk(count): for node in txnPoolNodeSet: node.nodeIbStasher.delay(nom_delay(delay)) - ensure_view_change(looper, txnPoolNodeSet, client1, wallet1) + ensure_view_change(looper, txnPoolNodeSet) reqs = sendRandomRequests(wallet1, client1, 2 * tconf.Max3PCBatchSize) looper.run(eventually(chk, 2 * tconf.Max3PCBatchSize)) diff --git a/plenum/test/node_request/test_timestamp/__init__.py b/plenum/test/node_request/test_timestamp/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/plenum/test/node_request/test_timestamp/conftest.py b/plenum/test/node_request/test_timestamp/conftest.py new file mode 100644 index 0000000000..38dac4e336 --- /dev/null +++ b/plenum/test/node_request/test_timestamp/conftest.py @@ -0,0 +1,3 @@ +from plenum.test.pool_transactions.conftest import clientAndWallet1, \ + client1, wallet1, client1Connected, looper, nodeThetaAdded, \ + stewardAndWallet1, steward1, stewardWallet diff --git a/plenum/test/node_request/test_timestamp/helper.py b/plenum/test/node_request/test_timestamp/helper.py new file mode 100644 index 0000000000..b2eab82700 --- /dev/null +++ b/plenum/test/node_request/test_timestamp/helper.py @@ -0,0 +1,29 @@ +import types + +from plenum.common.util import get_utc_epoch +from plenum.server.suspicion_codes import Suspicions +from plenum.test.helper import getNodeSuspicions + + +def get_timestamp_suspicion_count(node): + return len(getNodeSuspicions(node, Suspicions.PPR_TIME_WRONG.code)) + + +def make_clock_faulty(node, clock_slow_by_sec=None, ppr_always_wrong=True): + if clock_slow_by_sec is None: + clock_slow_by_sec = node.config.ACCEPTABLE_DEVIATION_PREPREPARE_SECS + 5 + + def utc_epoch(self) -> int: + return get_utc_epoch() - clock_slow_by_sec + + # slow_utc_epoch = types.MethodType(utc_epoch, node) + # setattr(node, 'utc_epoch', property(slow_utc_epoch)) + node.utc_epoch = types.MethodType(utc_epoch, node) + + if ppr_always_wrong: + def ppr_time_is_wrong(self, pp): + return False + + for repl in node.replicas: + repl.is_pre_prepare_time_correct = types.MethodType(ppr_time_is_wrong, + repl) diff --git a/plenum/test/node_request/test_timestamp/test_3pc_timestamp.py b/plenum/test/node_request/test_timestamp/test_3pc_timestamp.py new file mode 100644 index 0000000000..7edf731bea --- /dev/null +++ b/plenum/test/node_request/test_timestamp/test_3pc_timestamp.py @@ -0,0 +1,67 @@ +import types +from collections import defaultdict + +from plenum.common.constants import DOMAIN_LEDGER_ID, TXN_TIME +from plenum.test.helper import send_reqs_to_nodes_and_verify_all_replies +from plenum.test.instances.helper import recvd_prepares +from plenum.test.node_request.test_timestamp.helper import \ + get_timestamp_suspicion_count, make_clock_faulty +from plenum.test.spy_helpers import getAllReturnVals +from plenum.test.test_node import getNonPrimaryReplicas + + +def test_replicas_prepare_time(looper, txnPoolNodeSet, client1, + wallet1, client1Connected): + # Check that each replica's PREPARE time is same as the PRE-PREPARE time + sent_batches = 5 + for i in range(sent_batches): + send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 2) + looper.runFor(1) + + for node in txnPoolNodeSet: + for r in node.replicas: + rec_prps = defaultdict(list) + for p in recvd_prepares(r): + rec_prps[(p.viewNo, p.ppSeqNo)].append(p) + pp_coll = r.sentPrePrepares if r.isPrimary else r.prePrepares + for key, pp in pp_coll.items(): + for p in rec_prps[key]: + assert pp.ppTime == p.ppTime + + # `last_accepted_pre_prepare_time` is the time of the last PRE-PREPARE + assert r.last_accepted_pre_prepare_time == pp_coll.peekitem(-1)[1].ppTime + + # The ledger should store time for each txn and it should be same + # as the time for that PRE-PREPARE + if r.isMaster: + for iv in node.txn_seq_range_to_3phase_key[DOMAIN_LEDGER_ID]: + three_pc_key = iv.data + for seq_no in range(iv.begin, iv.end): + assert node.domainLedger.getBySeqNo(seq_no)[TXN_TIME] == pp_coll[three_pc_key].ppTime + + +def test_non_primary_accepts_pre_prepare_time(looper, txnPoolNodeSet, client1, + wallet1, client1Connected): + """ + One of the non-primary has an in-correct clock so it thinks PRE-PREPARE + has incorrect time + """ + send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 2) + # The replica having the bad clock + confused_npr = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1] + + make_clock_faulty(confused_npr.node) + + old_acceptable_rvs = getAllReturnVals(confused_npr, + confused_npr.is_pre_prepare_time_acceptable) + old_susp_count = get_timestamp_suspicion_count(confused_npr.node) + send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 2) + + assert get_timestamp_suspicion_count(confused_npr.node) > old_susp_count + + new_acceptable_rvs = getAllReturnVals(confused_npr, + confused_npr.is_pre_prepare_time_acceptable) + + # `is_pre_prepare_time_acceptable` first returned False then returned True + assert [True, False, *old_acceptable_rvs] == new_acceptable_rvs + diff --git a/plenum/test/node_request/test_timestamp/test_clock_disruption.py b/plenum/test/node_request/test_timestamp/test_clock_disruption.py new file mode 100644 index 0000000000..d772d2aef1 --- /dev/null +++ b/plenum/test/node_request/test_timestamp/test_clock_disruption.py @@ -0,0 +1,70 @@ +import types +from random import randint + +import pytest + +from plenum.common.util import get_utc_epoch +from stp_core.loop.eventually import eventually + +from plenum.test.helper import send_reqs_to_nodes_and_verify_all_replies, \ + sendRandomRequests +from plenum.test.node_request.test_timestamp.helper import make_clock_faulty, \ + get_timestamp_suspicion_count + +Max3PCBatchSize = 4 + +from plenum.test.batching_3pc.conftest import tconf + +# lot of requests will be sent and multiple view changes are done +TestRunningTimeLimitSec = 200 + + +@pytest.mark.skip(reason='Pending implementation') +def test_nodes_with_bad_clock(tconf, looper, txnPoolNodeSet, client1, + wallet1, client1Connected): + """ + All nodes have bad clocks but they eventaully get repaired, an example of + nodes being cut off from NTP server for some time or NTP sync disabled + then without node restart NTP sync enabled + """ + send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, + Max3PCBatchSize * 3) + + ledger_sizes = {node.name: node.domainLedger.size for node in + txnPoolNodeSet} + susp_counts = {node.name: get_timestamp_suspicion_count(node) for node in + txnPoolNodeSet} + for node in txnPoolNodeSet: + make_clock_faulty(node, + clock_slow_by_sec=node.config.ACCEPTABLE_DEVIATION_PREPREPARE_SECS+randint(5, 15), + ppr_always_wrong=False) + + for _ in range(5): + sendRandomRequests(wallet1, client1, 2) + looper.runFor(.2) + + # Let some time pass + looper.runFor(3) + + def chk(): + for node in txnPoolNodeSet: + # Each node raises suspicion + assert get_timestamp_suspicion_count(node) > susp_counts[node.name] + # Ledger does not change + assert node.domainLedger.size == ledger_sizes[node.name] + + looper.run(eventually(chk, retryWait=1)) + + # Fix clocks + for node in txnPoolNodeSet: + def utc_epoch(self) -> int: + return get_utc_epoch() + + node.utc_epoch = types.MethodType(utc_epoch, node) + + # Let some more time pass + looper.runFor(3) + + # All nodes reply + send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, + Max3PCBatchSize * 2) diff --git a/plenum/test/node_request/test_timestamp/test_timestamp_new_node.py b/plenum/test/node_request/test_timestamp/test_timestamp_new_node.py new file mode 100644 index 0000000000..7a3b76ecd2 --- /dev/null +++ b/plenum/test/node_request/test_timestamp/test_timestamp_new_node.py @@ -0,0 +1,48 @@ +from plenum.server.suspicion_codes import Suspicions +from plenum.test.helper import getNodeSuspicions, \ + sendReqsToNodesAndVerifySuffReplies, \ + send_reqs_to_nodes_and_verify_all_replies +from plenum.test.node_catchup.helper import waitNodeDataEquality +from plenum.test.node_request.test_timestamp.helper import \ + get_timestamp_suspicion_count +from plenum.test.test_node import ensureElectionsDone +from plenum.test.view_change.helper import ensure_view_change + +txnCount = 20 +Max3PCBatchSize = 4 + +from plenum.test.node_catchup.conftest import nodeCreatedAfterSomeTxns, \ + nodeSetWithNodeAddedAfterSomeTxns +from plenum.test.batching_3pc.conftest import tconf + +# lot of requests will be sent +TestRunningTimeLimitSec = 200 + + +def test_new_node_accepts_timestamp(tconf, looper, txnPoolNodeSet, + nodeSetWithNodeAddedAfterSomeTxns, client1, + wallet1, client1Connected): + """ + A new node joins the pool and is able to function properly without + """ + _, new_node, _, _, _, _ = nodeSetWithNodeAddedAfterSomeTxns + old_susp_count = get_timestamp_suspicion_count(new_node) + # Don't wait for node to catchup, start sending requests + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 10) + waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) + + # No suspicions were raised by new_node + assert get_timestamp_suspicion_count(new_node) == old_susp_count + + # All nodes should reply + send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, Max3PCBatchSize*3) + # No suspicions were raised by new_node + assert get_timestamp_suspicion_count(new_node) == old_susp_count + + suspicions = {node.name: get_timestamp_suspicion_count(node) for node in txnPoolNodeSet} + ensure_view_change(looper, txnPoolNodeSet) + ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) + + send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, Max3PCBatchSize*3) + for node in txnPoolNodeSet: + assert suspicions[node.name] == get_timestamp_suspicion_count(node) diff --git a/plenum/test/node_request/test_timestamp/test_timestamp_post_view_change.py b/plenum/test/node_request/test_timestamp/test_timestamp_post_view_change.py new file mode 100644 index 0000000000..700b979f39 --- /dev/null +++ b/plenum/test/node_request/test_timestamp/test_timestamp_post_view_change.py @@ -0,0 +1,73 @@ +from stp_core.loop.eventually import eventually + +from plenum.test.helper import send_reqs_to_nodes_and_verify_all_replies, \ + sendRandomRequests, checkViewNoForNodes +from plenum.test.node_request.test_timestamp.helper import make_clock_faulty, \ + get_timestamp_suspicion_count +from plenum.test.test_node import ensureElectionsDone, getNonPrimaryReplicas +from plenum.test.view_change.helper import ensure_view_change + +Max3PCBatchSize = 4 + +from plenum.test.batching_3pc.conftest import tconf + +# lot of requests will be sent and multiple view changes are done +TestRunningTimeLimitSec = 200 + + +def test_new_primary_has_wrong_clock(tconf, looper, txnPoolNodeSet, client1, + wallet1, client1Connected): + """ + One of non-primary has a bad clock, it raises suspicions but orders + requests after getting PREPAREs. Then a view change happens this + non-primary with the bad clock becomes the new primary but is not able to + get any of it's PRE-PREPAREs ordered. Eventually another view change + happens and a new primary is elected the pool is functional again + :return: + """ + # The node having the bad clock, this node will be primary after view change + faulty_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[0].node + make_clock_faulty(faulty_node) + + assert not faulty_node.master_replica.isPrimary + # faulty_node replies too + send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, + Max3PCBatchSize * 3) + + ledger_sizes = {node.name: node.domainLedger.size for node in txnPoolNodeSet} + susp_counts = {node.name: get_timestamp_suspicion_count(node) for node in txnPoolNodeSet} + ensure_view_change(looper, txnPoolNodeSet) + ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) + + # After view change, faulty_node is primary + assert faulty_node.master_replica.isPrimary + + old_view_no = txnPoolNodeSet[0].viewNo + + # Requests are sent + for _ in range(5): + sendRandomRequests(wallet1, client1, 2) + looper.runFor(.2) + + def chk(): + for node in [n for n in txnPoolNodeSet if n != faulty_node]: + # Each non faulty node raises suspicion + assert get_timestamp_suspicion_count(node) > susp_counts[node.name] + # Ledger does not change + assert node.domainLedger.size == ledger_sizes[node.name] + + assert faulty_node.domainLedger.size == ledger_sizes[faulty_node.name] + + looper.run(eventually(chk, retryWait=1)) + + # Eventually another view change happens + looper.run(eventually(checkViewNoForNodes, txnPoolNodeSet, old_view_no + 1, + retryWait=1, timeout=2*tconf.PerfCheckFreq)) + ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) + + # After view change, faulty_node is no more the primary + assert not faulty_node.master_replica.isPrimary + + # All nodes reply + send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, + Max3PCBatchSize * 2) diff --git a/plenum/test/pool_transactions/conftest.py b/plenum/test/pool_transactions/conftest.py index 3dd0c565e3..63c3a5f71d 100644 --- a/plenum/test/pool_transactions/conftest.py +++ b/plenum/test/pool_transactions/conftest.py @@ -50,10 +50,10 @@ def stewardWallet(stewardAndWallet1): @pytest.fixture("module") def nodeThetaAdded(looper, txnPoolNodeSet, tdirWithPoolTxns, tconf, steward1, - stewardWallet, allPluginsPath, testNodeClass, - testClientClass): + stewardWallet, allPluginsPath, testNodeClass=None, + testClientClass=None, name=None): newStewardName = "testClientSteward" + randomString(3) - newNodeName = "Theta" + newNodeName = name or "Theta" newSteward, newStewardWallet, newNode = addNewStewardAndNode(looper, steward1, stewardWallet, diff --git a/plenum/test/pool_transactions/get_txn_request.py b/plenum/test/pool_transactions/get_txn_request.py new file mode 100644 index 0000000000..4cfe33f20f --- /dev/null +++ b/plenum/test/pool_transactions/get_txn_request.py @@ -0,0 +1,75 @@ +from plenum.common.messages.node_messages import * +from random import randint +from plenum.test.pool_transactions.helper import sendAddNewClient +from stp_core.loop.eventually import eventually +from plenum.test import waits +from plenum.test.helper import checkSufficientRepliesReceived +from plenum.common.util import getMaxFailures +import json +from plenum.common.request import Request +from plenum.common.util import getTimeBasedId + +c_delay = 10 +fValue = getMaxFailures(4) + + +def testSendGetTxnReqForExistsSeqNo(looper, steward1, stewardWallet): + op = { + TXN_TYPE: GET_TXN, + DATA: 1 + } + req = Request(identifier=stewardWallet.defaultId, operation=op, reqId=getTimeBasedId()) + steward1.submitReqs(req) + + timeout = waits.expectedTransactionExecutionTime(len(steward1.inBox)) + c_delay + get_txn_response = looper.run( + eventually(checkSufficientRepliesReceived, steward1.inBox, + req.reqId, fValue, + retryWait=1, timeout=timeout)) + + assert get_txn_response[DATA] + + +def testSendGetTxnReqForNotExistsSeqNo(looper, steward1, stewardWallet): + op = { + TXN_TYPE: GET_TXN, + DATA: randint(100, 1000) + } + req = Request(identifier=stewardWallet.defaultId, operation=op, reqId=getTimeBasedId()) + steward1.submitReqs(req) + + timeout = waits.expectedTransactionExecutionTime(len(steward1.inBox)) + c_delay + get_txn_response = looper.run( + eventually(checkSufficientRepliesReceived, steward1.inBox, + req.reqId, fValue, + retryWait=1, timeout=timeout)) + + assert not get_txn_response[DATA] + + +def testSendGetTxnReqSameAsExpected(looper, steward1, stewardWallet): + req, wallet = sendAddNewClient(STEWARD, "name", steward1, stewardWallet) + + timeout = waits.expectedTransactionExecutionTime(len(steward1.inBox)) + c_delay + nym_response = looper.run( + eventually(checkSufficientRepliesReceived, steward1.inBox, + req.reqId, fValue, + retryWait=1, timeout=timeout)) + + op = { + TXN_TYPE: GET_TXN, + DATA: nym_response['seqNo'] + } + req = Request(identifier=stewardWallet.defaultId, operation=op, reqId=getTimeBasedId()) + steward1.submitReqs(req) + + get_txn_response = looper.run( + eventually(checkSufficientRepliesReceived, steward1.inBox, + req.reqId, fValue, + retryWait=1, timeout=timeout)) + get_txn_response = json.loads(get_txn_response[DATA]) + + del nym_response['txnTime'] + del get_txn_response['txnTime'] + + assert nym_response == get_txn_response diff --git a/plenum/test/pool_transactions/helper.py b/plenum/test/pool_transactions/helper.py index fa71aa29ac..fdcca566e8 100644 --- a/plenum/test/pool_transactions/helper.py +++ b/plenum/test/pool_transactions/helper.py @@ -1,3 +1,4 @@ +from plenum.test.node_catchup.helper import waitNodeDataEquality from stp_core.types import HA from typing import Iterable, Union @@ -78,16 +79,32 @@ def sendAddNewNode(newNodeName, stewardClient, stewardWallet, def addNewNode(looper, stewardClient, stewardWallet, newNodeName, tdir, tconf, allPluginsPath=None, autoStart=True, nodeClass=TestNode, transformOpFunc=None): + nodeClass = nodeClass or TestNode req, nodeIp, nodePort, clientIp, clientPort, sigseed \ - = sendAddNewNode(newNodeName, stewardClient, stewardWallet, transformOpFunc) + = sendAddNewNode(newNodeName, stewardClient, stewardWallet, + transformOpFunc) waitForSufficientRepliesForRequests(looper, stewardClient, requests=[req], fVal=1) - initNodeKeysForBothStacks(newNodeName, tdir, sigseed, override=True) - node = nodeClass(newNodeName, basedirpath=tdir, config=tconf, - ha=(nodeIp, nodePort), cliha=(clientIp, clientPort), - pluginPaths=allPluginsPath) - if autoStart: + # initNodeKeysForBothStacks(newNodeName, tdir, sigseed, override=True) + # node = nodeClass(newNodeName, basedirpath=tdir, config=tconf, + # ha=(nodeIp, nodePort), cliha=(clientIp, clientPort), + # pluginPaths=allPluginsPath) + # if autoStart: + # looper.add(node) + # return node + return start_newly_added_node(looper, newNodeName, tdir, sigseed, + (nodeIp, nodePort), (clientIp, clientPort), + tconf, autoStart, allPluginsPath, nodeClass) + + +def start_newly_added_node(looper, node_name, tdir, sigseed, node_ha, client_ha, + conf, auto_start, plugin_path, nodeClass): + initNodeKeysForBothStacks(node_name, tdir, sigseed, override=True) + node = nodeClass(node_name, basedirpath=tdir, config=conf, + ha=node_ha, cliha=client_ha, + pluginPaths=plugin_path) + if auto_start: looper.add(node) return node @@ -95,6 +112,7 @@ def addNewNode(looper, stewardClient, stewardWallet, newNodeName, tdir, tconf, def addNewSteward(looper, tdir, creatorClient, creatorWallet, stewardName, clientClass=TestClient): + clientClass = clientClass or TestClient newStewardWallet = addNewClient(STEWARD, looper, creatorClient, creatorWallet, stewardName) newSteward = clientClass(name=stewardName, @@ -215,6 +233,7 @@ def suspendNode(looper, stewardClient, stewardWallet, nodeNym, nodeName): waitForSufficientRepliesForRequests(looper, stewardClient, requests=[req], fVal=1) + def cancelNodeSuspension(looper, stewardClient, stewardWallet, nodeNym, nodeName): op = { @@ -291,3 +310,26 @@ def reconnect_node_and_ensure_connected(looper, poolNodes, reconnectPoolNode(poolNodes, connect, looper) looper.run(checkNodesConnected(poolNodes, customTimeout=timeout)) + + +def add_2_nodes(looper, existing_nodes, steward, steward_wallet, + tdir_with_pool_txns, conf, all_plugins_path, names=None): + assert names is None or (isinstance(names, list) and len(names) == 2) + names = names or ("Zeta", "Eta") + new_nodes = [] + for node_name in names: + new_steward_name = "testClientSteward"+randomString(3) + new_steward, new_steward_wallet, new_node = addNewStewardAndNode(looper, + steward, + steward_wallet, + new_steward_name, + node_name, + tdir_with_pool_txns, + conf, + all_plugins_path) + existing_nodes.append(new_node) + looper.run(checkNodesConnected(existing_nodes)) + waitNodeDataEquality(looper, new_node, *existing_nodes[:-1]) + new_nodes.append(new_node) + + return new_nodes diff --git a/plenum/test/pool_transactions/test_change_ha_persists_post_nodes_restart.py b/plenum/test/pool_transactions/test_change_ha_persists_post_nodes_restart.py index a3590d9513..b3e78ddfca 100644 --- a/plenum/test/pool_transactions/test_change_ha_persists_post_nodes_restart.py +++ b/plenum/test/pool_transactions/test_change_ha_persists_post_nodes_restart.py @@ -1,5 +1,4 @@ from plenum.common.constants import ALIAS, NODE_IP, NODE_PORT, CLIENT_IP, CLIENT_PORT -from stp_core.loop.eventually import eventually from stp_core.common.log import getlogger from plenum.test.node_catchup.helper import waitNodeDataEquality, \ ensureClientConnectedToNodesAndPoolLedgerSame diff --git a/plenum/test/pool_transactions/test_node_key_changed.py b/plenum/test/pool_transactions/test_node_key_changed.py index d407a839ff..4577c9c65d 100644 --- a/plenum/test/pool_transactions/test_node_key_changed.py +++ b/plenum/test/pool_transactions/test_node_key_changed.py @@ -1,5 +1,10 @@ +import pytest import base58 -from plenum.common.keygen_utils import initNodeKeysForBothStacks +import types + +from plenum.common import stack_manager +from plenum.common.keygen_utils import initNodeKeysForBothStacks, \ + initRemoteKeys from plenum.common.signer_simple import SimpleSigner from plenum.common.util import randomString from plenum.test.node_catchup.helper import waitNodeDataEquality, \ @@ -51,3 +56,44 @@ def testNodeKeysChanged(looper, txnPoolNodeSet, tdirWithPoolTxns, ensureClientConnectedToNodesAndPoolLedgerSame(looper, newSteward, *txnPoolNodeSet) + +def testNodeInitRemoteKeysErrorsNotSuppressed(looper, txnPoolNodeSet, + nodeThetaAdded, monkeypatch): + + TEST_EXCEPTION_MESSAGE = 'Failed to create some cert files' + + newSteward, newStewardWallet, newNode = nodeThetaAdded + + newNode.stop() + looper.removeProdable(name=newNode.name) + nodeHa, nodeCHa = HA(*newNode.nodestack.ha), HA(*newNode.clientstack.ha) + sigseed = randomString(32).encode() + verkey = base58.b58encode(SimpleSigner(seed=sigseed).naclSigner.verraw) + + def initRemoteKeysMock(name, *args, **kwargs): + if name in [node.name for node in txnPoolNodeSet]: + raise OSError(TEST_EXCEPTION_MESSAGE) + else: + return initRemoteKeys(name, *args, **kwargs) + + def wrap(node): + oldMethod = node.poolManager.stackKeysChanged + + def stackKeysChanged(self, *args, **kwargs): + with pytest.raises(OSError, + message="exception was suppressed") as excinfo: + oldMethod(*args, **kwargs) + excinfo.match(r'{}'.format(TEST_EXCEPTION_MESSAGE)) + return 0 + + node.poolManager.stackKeysChanged = types.MethodType(stackKeysChanged, + node.poolManager) + + for node in txnPoolNodeSet: + wrap(node) + + monkeypatch.setattr(stack_manager, 'initRemoteKeys', initRemoteKeysMock) + + changeNodeKeys(looper, newSteward, newStewardWallet, newNode, verkey) + + monkeypatch.undo() diff --git a/plenum/test/pool_transactions/test_nodes_data_changed.py b/plenum/test/pool_transactions/test_nodes_data_changed.py index 0b49840b1a..47a0efdae9 100644 --- a/plenum/test/pool_transactions/test_nodes_data_changed.py +++ b/plenum/test/pool_transactions/test_nodes_data_changed.py @@ -3,8 +3,8 @@ from plenum.test.helper import waitRejectWithReason from plenum.test.node_catchup.helper import waitNodeDataEquality, \ ensureClientConnectedToNodesAndPoolLedgerSame -from plenum.test.pool_transactions.helper import addNewStewardAndNode, sendUpdateNode, \ - updateNodeDataAndReconnect +from plenum.test.pool_transactions.helper import addNewStewardAndNode, \ + sendUpdateNode, updateNodeDataAndReconnect from plenum.test.test_node import checkNodesConnected from stp_core.common.log import getlogger @@ -24,7 +24,6 @@ # reaches it - def testNodePortCannotBeChangedByAnotherSteward(looper, txnPoolNodeSet, tdirWithPoolTxns, tconf, steward1, stewardWallet, @@ -102,7 +101,8 @@ def testNodePortChanged(looper, txnPoolNodeSet, tdirWithPoolTxns, def testAddInactiveNodeThenActivate(looper, txnPoolNodeSet, tdirWithPoolTxns, - tconf, steward1, stewardWallet, allPluginsPath): + tconf, steward1, stewardWallet, + allPluginsPath): newStewardName = "testClientSteward" + randomString(3) newNodeName = "Kappa" @@ -114,8 +114,7 @@ def del_services(op): del op[DATA][SERVICES] addNewStewardAndNode(looper, steward1, stewardWallet, newStewardName, newNodeName, - tdirWithPoolTxns, tconf, - allPluginsPath, + tdirWithPoolTxns, tconf, allPluginsPath, transformNodeOpFunc=del_services) looper.run(checkNodesConnected(txnPoolNodeSet)) diff --git a/plenum/test/pool_transactions/test_nodes_with_pool_txns.py b/plenum/test/pool_transactions/test_nodes_with_pool_txns.py index 95f97d1d53..f5a1f76f1f 100644 --- a/plenum/test/pool_transactions/test_nodes_with_pool_txns.py +++ b/plenum/test/pool_transactions/test_nodes_with_pool_txns.py @@ -11,7 +11,8 @@ waitReqNackFromPoolWithReason from plenum.test.node_catchup.helper import waitNodeDataEquality, \ ensureClientConnectedToNodesAndPoolLedgerSame -from plenum.test.pool_transactions.helper import addNewClient, addNewStewardAndNode, sendAddNewNode +from plenum.test.pool_transactions.helper import addNewClient, \ + addNewStewardAndNode, sendAddNewNode, add_2_nodes from plenum.test.test_node import checkNodesConnected, \ checkProtocolInstanceSetup @@ -31,6 +32,111 @@ # initialised a connection for a new node by the time the new node's message # reaches it +def testStewardCannotAddMoreThanOneNode(looper, txnPoolNodeSet, steward1, + stewardWallet, tdirWithPoolTxns, tconf, + allPluginsPath): + newNodeName = "Epsilon" + sendAddNewNode(newNodeName, steward1, stewardWallet) + + for node in txnPoolNodeSet: + waitRejectWithReason(looper, steward1, + 'already has a node', + node.clientstack.name) + + +def testNonStewardCannotAddNode(looper, txnPoolNodeSet, client1, + wallet1, client1Connected, tdirWithPoolTxns, + tconf, allPluginsPath): + newNodeName = "Epsilon" + sendAddNewNode(newNodeName, client1, wallet1) + for node in txnPoolNodeSet: + waitRejectWithReason(looper, client1, 'is not a steward so cannot add a ' + 'new node', node.clientstack.name) + +def testClientConnectsToNewNode(looper, txnPoolNodeSet, tdirWithPoolTxns, + tconf, steward1, stewardWallet, allPluginsPath): + """ + A client should be able to connect to a newly added node + """ + newStewardName = "testClientSteward" + randomString(3) + newNodeName = "Epsilon" + oldNodeReg = copy(steward1.nodeReg) + newSteward, newStewardWallet, newNode = addNewStewardAndNode(looper, + steward1, stewardWallet, + newStewardName, newNodeName, + tdirWithPoolTxns, tconf, + allPluginsPath) + txnPoolNodeSet.append(newNode) + looper.run(checkNodesConnected(txnPoolNodeSet)) + logger.debug("{} connected to the pool".format(newNode)) + + def chkNodeRegRecvd(): + assert (len(steward1.nodeReg) - len(oldNodeReg)) == 1 + assert (newNode.name + CLIENT_STACK_SUFFIX) in steward1.nodeReg + + timeout = waits.expectedClientToPoolConnectionTimeout(len(txnPoolNodeSet)) + looper.run(eventually(chkNodeRegRecvd, retryWait=1, timeout=timeout)) + ensureClientConnectedToNodesAndPoolLedgerSame(looper, steward1, + *txnPoolNodeSet) + ensureClientConnectedToNodesAndPoolLedgerSame(looper, newSteward, + *txnPoolNodeSet) + + +def testAdd2NewNodes(looper, txnPoolNodeSet, tdirWithPoolTxns, tconf, steward1, + stewardWallet, allPluginsPath): + """ + Add 2 new nodes to trigger replica addition and primary election + """ + new_nodes = add_2_nodes(looper, txnPoolNodeSet, steward1, stewardWallet, + tdirWithPoolTxns, tconf, allPluginsPath) + for n in new_nodes: + logger.debug("{} connected to the pool".format(n)) + + f = getMaxFailures(len(txnPoolNodeSet)) + + def checkFValue(): + for node in txnPoolNodeSet: + assert node.f == f + assert len(node.replicas) == (f + 1) + + timeout = waits.expectedClientToPoolConnectionTimeout(len(txnPoolNodeSet)) + looper.run(eventually(checkFValue, retryWait=1, timeout=timeout)) + checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) + + +def testStewardCannotAddNodeWithOutFullFieldsSet(looper, tdir, + txnPoolNodeSet, + newAdHocSteward): + """ + The case: + Steward accidentally sends the NODE txn without full fields set. + The expected result: + Steward gets NAck response from the pool. + """ + newNodeName = "Epsilon" + + newSteward, newStewardWallet = newAdHocSteward + + # case from the ticket + def _renameNodePortField(op): + op[DATA].update({NODE_PORT + ' ': op[DATA][NODE_PORT]}) + del op[DATA][NODE_PORT] + + sendAddNewNode(newNodeName, newSteward, newStewardWallet, + transformOpFunc=_renameNodePortField) + waitReqNackFromPoolWithReason(looper, txnPoolNodeSet, newSteward, + "unknown field") + + for fn in (NODE_IP, CLIENT_IP, NODE_PORT, CLIENT_PORT): + def _tnf(op): del op[DATA][fn] + + sendAddNewNode(newNodeName, newSteward, newStewardWallet, + transformOpFunc=_tnf) + # wait NAcks with exact message. it does not works for just 'is missed' + # because the 'is missed' will check only first few cases + waitReqNackFromPoolWithReason(looper, txnPoolNodeSet, newSteward, + "unknown field") + def testNodesConnect(txnPoolNodeSet): pass @@ -81,7 +187,7 @@ def _setHexVerkey(op): sendAddNewNode(newNodeName, newSteward, newStewardWallet, transformOpFunc=_setHexVerkey) waitReqNackFromPoolWithReason(looper, txnPoolNodeSet, newSteward, - 'is not a base58 string') + 'should not contain the following chars') def testStewardCannotAddNodeWithInvalidHa(looper, tdir, @@ -116,121 +222,4 @@ def _tnf(op): op[DATA].update({field: value}) # wait NAcks with exact message. it does not works for just 'is invalid' # because the 'is invalid' will check only first few cases waitReqNackFromPoolWithReason(looper, txnPoolNodeSet, newSteward, - "'{}' ('{}') is invalid".format(field, value)) - - -def testStewardCannotAddNodeWithOutFullFieldsSet(looper, tdir, - txnPoolNodeSet, - newAdHocSteward): - """ - The case: - Steward accidentally sends the NODE txn without full fields set. - The expected result: - Steward gets NAck response from the pool. - """ - newNodeName = "Epsilon" - - newSteward, newStewardWallet = newAdHocSteward - - # case from the ticket - def _renameNodePortField(op): - op[DATA].update({NODE_PORT + ' ': op[DATA][NODE_PORT]}) - del op[DATA][NODE_PORT] - - sendAddNewNode(newNodeName, newSteward, newStewardWallet, - transformOpFunc=_renameNodePortField) - waitReqNackFromPoolWithReason(looper, txnPoolNodeSet, newSteward, - "unknown field") - - for fn in (NODE_IP, CLIENT_IP, NODE_PORT, CLIENT_PORT): - def _tnf(op): del op[DATA][fn] - - sendAddNewNode(newNodeName, newSteward, newStewardWallet, - transformOpFunc=_tnf) - # wait NAcks with exact message. it does not works for just 'is missed' - # because the 'is missed' will check only first few cases - waitReqNackFromPoolWithReason(looper, txnPoolNodeSet, newSteward, - "unknown field") - - -def testStewardCannotAddMoreThanOneNode(looper, txnPoolNodeSet, steward1, - stewardWallet, tdirWithPoolTxns, tconf, - allPluginsPath): - newNodeName = "Epsilon" - sendAddNewNode(newNodeName, steward1, stewardWallet) - - for node in txnPoolNodeSet: - waitRejectWithReason(looper, steward1, - 'already has a node', - node.clientstack.name) - - -def testNonStewardCannotAddNode(looper, txnPoolNodeSet, client1, - wallet1, client1Connected, tdirWithPoolTxns, - tconf, allPluginsPath): - newNodeName = "Epsilon" - sendAddNewNode(newNodeName, client1, wallet1) - for node in txnPoolNodeSet: - waitRejectWithReason(looper, client1, 'is not a steward so cannot add a ' - 'new node', node.clientstack.name) - - -def testClientConnectsToNewNode(looper, txnPoolNodeSet, tdirWithPoolTxns, - tconf, steward1, stewardWallet, allPluginsPath): - """ - A client should be able to connect to a newly added node - """ - newStewardName = "testClientSteward" + randomString(3) - newNodeName = "Epsilon" - oldNodeReg = copy(steward1.nodeReg) - newSteward, newStewardWallet, newNode = addNewStewardAndNode(looper, - steward1, stewardWallet, - newStewardName, newNodeName, - tdirWithPoolTxns, tconf, - allPluginsPath) - txnPoolNodeSet.append(newNode) - looper.run(checkNodesConnected(txnPoolNodeSet)) - logger.debug("{} connected to the pool".format(newNode)) - - def chkNodeRegRecvd(): - assert (len(steward1.nodeReg) - len(oldNodeReg)) == 1 - assert (newNode.name + CLIENT_STACK_SUFFIX) in steward1.nodeReg - - timeout = waits.expectedClientToPoolConnectionTimeout(len(txnPoolNodeSet)) - looper.run(eventually(chkNodeRegRecvd, retryWait=1, timeout=timeout)) - ensureClientConnectedToNodesAndPoolLedgerSame(looper, steward1, - *txnPoolNodeSet) - ensureClientConnectedToNodesAndPoolLedgerSame(looper, newSteward, - *txnPoolNodeSet) - - -def testAdd2NewNodes(looper, txnPoolNodeSet, tdirWithPoolTxns, tconf, steward1, - stewardWallet, allPluginsPath): - """ - Add 2 new nodes to trigger replica addition and primary election - """ - for nodeName in ("Zeta", "Eta"): - newStewardName = "testClientSteward" + randomString(3) - newSteward, newStewardWallet, newNode = addNewStewardAndNode(looper, - steward1, - stewardWallet, - newStewardName, - nodeName, - tdirWithPoolTxns, - tconf, - allPluginsPath) - txnPoolNodeSet.append(newNode) - looper.run(checkNodesConnected(txnPoolNodeSet)) - logger.debug("{} connected to the pool".format(newNode)) - waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1]) - - f = getMaxFailures(len(txnPoolNodeSet)) - - def checkFValue(): - for node in txnPoolNodeSet: - assert node.f == f - assert len(node.replicas) == (f + 1) - - timeout = waits.expectedClientToPoolConnectionTimeout(len(txnPoolNodeSet)) - looper.run(eventually(checkFValue, retryWait=1, timeout=timeout)) - checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) + "invalid network ip address") \ No newline at end of file diff --git a/plenum/test/primary_election/helpers.py b/plenum/test/primary_election/helpers.py index 4532aebefa..09e501314f 100644 --- a/plenum/test/primary_election/helpers.py +++ b/plenum/test/primary_election/helpers.py @@ -1,4 +1,4 @@ -from plenum.common.types import Nomination, Primary +from plenum.common.messages.node_messages import Nomination, Primary from plenum.server.replica import Replica from plenum.test.test_node import TestNode @@ -28,9 +28,9 @@ def getSelfNominationByNode(node: TestNode) -> int: def nominationByNode(name: str, byNode: TestNode, instId: int): return Nomination(name, instId, byNode.viewNo, - byNode.replicas[instId].lastOrderedPPSeqNo) + byNode.replicas[instId].lastOrderedPPSeqNo[1]) def primaryByNode(name: str, byNode: TestNode, instId: int): return Primary(name, instId, byNode.viewNo, - byNode.replicas[instId].lastOrderedPPSeqNo) + byNode.replicas[instId].lastOrderedPPSeqNo[1]) diff --git a/plenum/test/primary_election/test_primary_election_case1.py b/plenum/test/primary_election/test_primary_election_case1.py index 6e9322f90f..353c7db1c4 100644 --- a/plenum/test/primary_election/test_primary_election_case1.py +++ b/plenum/test/primary_election/test_primary_election_case1.py @@ -2,7 +2,7 @@ from stp_core.loop.eventually import eventually from stp_core.common.log import getlogger -from plenum.common.types import Nomination +from plenum.common.messages.node_messages import Nomination from plenum.server.replica import Replica from plenum.server.suspicion_codes import Suspicions from plenum.test.delayers import delayerMsgTuple @@ -58,6 +58,7 @@ def case1Setup(startedNodes: TestNodeSet): # noinspection PyIncorrectDocstring +@pytest.mark.skip('Nodes use round robin primary selection') def testPrimaryElectionCase1(case1Setup, looper, keySharedNodes): """ Case 1 - A node making multiple nominations for a particular node. Consider diff --git a/plenum/test/primary_election/test_primary_election_case2.py b/plenum/test/primary_election/test_primary_election_case2.py index ef3f8917bf..ffed245168 100644 --- a/plenum/test/primary_election/test_primary_election_case2.py +++ b/plenum/test/primary_election/test_primary_election_case2.py @@ -1,7 +1,7 @@ import pytest from stp_core.loop.eventually import eventually -from plenum.common.types import Nomination +from plenum.common.messages.node_messages import Nomination from plenum.server.replica import Replica from plenum.server.suspicion_codes import Suspicions from plenum.test.delayers import delayerMsgTuple @@ -37,8 +37,8 @@ def case2Setup(startedNodes: TestNodeSet): for node in A, C, D: node.whitelistNode(B.name, Suspicions.DUPLICATE_NOM_SENT.code) - # noinspection PyIncorrectDocstring +@pytest.mark.skip('Nodes use round robin primary selection') def testPrimaryElectionCase2(case2Setup, looper, keySharedNodes): """ Case 2 - A node making nominations for a multiple other nodes. Consider 4 diff --git a/plenum/test/primary_election/test_primary_election_case4.py b/plenum/test/primary_election/test_primary_election_case4.py index 6a3227723b..5e74119f7c 100644 --- a/plenum/test/primary_election/test_primary_election_case4.py +++ b/plenum/test/primary_election/test_primary_election_case4.py @@ -1,7 +1,7 @@ import pytest from stp_core.loop.eventually import eventually -from plenum.common.types import Primary +from plenum.common.messages.node_messages import Primary from plenum.server.suspicion_codes import Suspicions from plenum.test import waits from plenum.test.primary_election.helpers import primaryByNode @@ -40,6 +40,7 @@ def case4Setup(keySharedNodes: TestNodeSet): # noinspection PyIncorrectDocstring +@pytest.mark.skip('Nodes use round robin primary selection') def testPrimaryElectionCase4(case4Setup, looper): """ Case 4 - A node making multiple primary declarations for a particular node. diff --git a/plenum/test/primary_election/test_primary_election_case5.py b/plenum/test/primary_election/test_primary_election_case5.py index af71d04fc3..c9b8122a1e 100644 --- a/plenum/test/primary_election/test_primary_election_case5.py +++ b/plenum/test/primary_election/test_primary_election_case5.py @@ -1,7 +1,7 @@ import logging import pytest -from plenum.common.types import Primary, Nomination +from plenum.common.messages.node_messages import Nomination, Primary from plenum.test import waits from stp_core.common.log import getlogger @@ -58,6 +58,7 @@ def case5Setup(startedNodes: TestNodeSet): # noinspection PyIncorrectDocstring +@pytest.mark.skip('Nodes use round robin primary selection') def testPrimaryElectionCase5(case5Setup, looper, keySharedNodes): """ Case 5 - A node making primary declarations for a multiple other nodes. diff --git a/plenum/test/primary_election/test_primary_election_case6.py b/plenum/test/primary_election/test_primary_election_case6.py index 739cdebacf..9add796acb 100644 --- a/plenum/test/primary_election/test_primary_election_case6.py +++ b/plenum/test/primary_election/test_primary_election_case6.py @@ -1,6 +1,6 @@ import pytest -from plenum.common.types import Primary, Nomination, Reelection +from plenum.common.messages.node_messages import Nomination, Reelection, Primary from plenum.test.delayers import delay from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies from plenum.test.test_node import checkNodesConnected, \ @@ -48,12 +48,14 @@ def chk(): looper.run(eventually(chk, retryWait=1, timeout=15)) checkProtocolInstanceSetup(looper=looper, nodes=nodeSet, retryWait=1) - + + # Make sure no Nominations or Primary are received by A from B for i in inst_ids: assert B.replicas[i].name not in A.elector.nominations[i] assert B.replicas[i].name not in A.elector.primaryDeclarations[i] +@pytest.mark.skip('Nodes use round robin primary selection') def test_primary_election_case6(elections_done, looper, client1, wallet1): """ A is disconnected with B so A does not get any Nomination/Primary from diff --git a/plenum/test/primary_election/test_primary_election_contested.py b/plenum/test/primary_election/test_primary_election_contested.py index 6c9d186730..5d78f9b23d 100644 --- a/plenum/test/primary_election/test_primary_election_contested.py +++ b/plenum/test/primary_election/test_primary_election_contested.py @@ -2,7 +2,7 @@ from stp_core.loop.eventually import eventually from stp_core.common.log import getlogger -from plenum.common.types import Nomination +from plenum.common.messages.node_messages import Nomination from plenum.test.delayers import delayerMsgTuple from plenum.test.primary_election.helpers import checkNomination from plenum.test.test_node import TestNodeSet, checkPoolReady, \ @@ -32,6 +32,7 @@ def electContFixture(startedNodes: TestNodeSet): # noinspection PyIncorrectDocstring +@pytest.mark.skip('Nodes use round robin primary selection') def testPrimaryElectionContested(electContFixture, looper, keySharedNodes): """ Primary selection (Rainy Day) diff --git a/plenum/test/primary_election/test_primary_election_with_clear_winner.py b/plenum/test/primary_election/test_primary_election_with_clear_winner.py index 099079758a..b098da68a5 100644 --- a/plenum/test/primary_election/test_primary_election_with_clear_winner.py +++ b/plenum/test/primary_election/test_primary_election_with_clear_winner.py @@ -18,6 +18,7 @@ def electContFixture(startedNodes: TestNodeSet): # noinspection PyIncorrectDocstring +@pytest.mark.skip('Nodes use round robin primary selection') def testPrimaryElectionWithAClearWinner(electContFixture, looper, keySharedNodes): """ Primary selection (Sunny Day) diff --git a/plenum/test/primary_election/test_primary_election_with_tie.py b/plenum/test/primary_election/test_primary_election_with_tie.py index cd0713e044..56a2b0baba 100644 --- a/plenum/test/primary_election/test_primary_election_with_tie.py +++ b/plenum/test/primary_election/test_primary_election_with_tie.py @@ -2,7 +2,7 @@ from stp_core.loop.eventually import eventually from stp_core.common.log import getlogger -from plenum.common.types import Nomination +from plenum.common.messages.node_messages import Nomination from plenum.test.delayers import delay from plenum.test.primary_election.helpers import checkNomination from plenum.test.test_node import TestNodeSet, checkPoolReady, \ @@ -30,6 +30,7 @@ def electTieFixture(startedNodes: TestNodeSet): # noinspection PyIncorrectDocstring +@pytest.mark.skip('Nodes use round robin primary selection') def testPrimaryElectionWithTie(electTieFixture, looper, keySharedNodes): """ Primary selection (Rainy Day) diff --git a/plenum/test/primary_selection/conftest.py b/plenum/test/primary_selection/conftest.py new file mode 100644 index 0000000000..43c832c8f9 --- /dev/null +++ b/plenum/test/primary_selection/conftest.py @@ -0,0 +1,17 @@ +import pytest + +from plenum.test.node_catchup.helper import waitNodeDataEquality +from plenum.test.primary_selection.helper import check_newly_added_nodes +from plenum.test.pool_transactions.conftest import clientAndWallet1, \ + client1, wallet1, client1Connected, looper, nodeThetaAdded, \ + stewardAndWallet1, steward1, stewardWallet + + +@pytest.fixture(scope="module") +def one_node_added(looper, txnPoolNodeSet, nodeThetaAdded): + # New node knows primary same primary as others and has rank greater + # than others + _, _, new_node = nodeThetaAdded + waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) + check_newly_added_nodes(looper, txnPoolNodeSet, [new_node]) + return new_node diff --git a/plenum/test/primary_selection/helper.py b/plenum/test/primary_selection/helper.py new file mode 100644 index 0000000000..c40702524e --- /dev/null +++ b/plenum/test/primary_selection/helper.py @@ -0,0 +1,42 @@ +from plenum.server.pool_manager import RegistryPoolManager, TxnPoolManager +from plenum.test.test_node import checkProtocolInstanceSetup + + +def check_rank_consistent_across_each_node(nodes): + assert nodes + node_ranks = {} + name_by_ranks = {} + for node in nodes: + node_ranks[node.poolManager.id] = node.rank + name_by_ranks[node.rank] = node.name + + for node in nodes: + for other_node in nodes: + if node != other_node: + oid = other_node.poolManager.id + assert node.poolManager.get_rank_of(oid) == node_ranks[oid] + ork = node_ranks[oid] + assert node.poolManager.get_name_by_rank(ork) == name_by_ranks[ork] + order = [] + for node in nodes: + if isinstance(node.poolManager, RegistryPoolManager): + order.append(node.poolManager.node_names_ordered_by_rank) + elif isinstance(node.poolManager, TxnPoolManager): + order.append(node.poolManager.node_ids_in_ordered_by_rank) + else: + RuntimeError('Dont know this pool manager {}'. + format(node.poolManager)) + + assert len(order) == len(nodes) + assert order.count(order[0]) == len(order) # All elements are same + + +def check_newly_added_nodes(looper, all_nodes, new_nodes): + # New nodes should be give in the order they were added + assert [n in all_nodes for n in new_nodes] + check_rank_consistent_across_each_node(all_nodes) + old_nodes = [node for node in all_nodes if node not in new_nodes] + for new_node in new_nodes: + assert all(new_node.rank > n.rank for n in old_nodes) + old_nodes.append(new_node) + checkProtocolInstanceSetup(looper, all_nodes, retryWait=1) diff --git a/plenum/test/primary_selection/test_catchup_after_view_change.py b/plenum/test/primary_selection/test_catchup_after_view_change.py new file mode 100644 index 0000000000..fc656ef39e --- /dev/null +++ b/plenum/test/primary_selection/test_catchup_after_view_change.py @@ -0,0 +1,106 @@ +import pytest + +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies +from plenum.test.node_catchup.helper import ensure_all_nodes_have_same_data, \ + waitNodeDataInequality +from plenum.test.delayers import cr_delay, ppDelay, pDelay, \ + cDelay +from plenum.test.spy_helpers import getAllReturnVals +from plenum.test.test_node import getNonPrimaryReplicas, \ + checkProtocolInstanceSetup, TestReplica +from plenum.test.view_change.helper import ensure_view_change +from stp_core.loop.eventually import eventually + +from plenum.test.pool_transactions.conftest import clientAndWallet1, \ + client1, wallet1, client1Connected, looper, nodeThetaAdded, \ + stewardAndWallet1, steward1, stewardWallet +from plenum.test.batching_3pc.conftest import tconf + + +Max3PCBatchSize = 4 +TestRunningTimeLimitSec = 150 + + +@pytest.fixture(scope='module', params=['next-non-primary', 'next-primary']) +def slow_node(request, txnPoolNodeSet): + nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0) + if request.param == 'next-non-primary': + return nprs[-1].node + if request.param == 'next-primary': + return nprs[0].node + + +def test_slow_nodes_catchup_before_selecting_primary_in_new_view(looper, + txnPoolNodeSet, + steward1, + stewardWallet, + tconf, + slow_node): + """ + Delay 3PC to 1 node and then cause view change so by the time the view + change happens(each node gets >n-f `INSTANCE_CHANGE`s), the slow node is + behind other nodes. The should initiate catchup to come to the same state + as other nodes. + """ + + fast_nodes = [n for n in txnPoolNodeSet if n != slow_node] + delay = tconf.PerfCheckFreq + + # Bad network introduced + slow_node.nodeIbStasher.delay(ppDelay(delay, 0)) + slow_node.nodeIbStasher.delay(pDelay(2*delay, 0)) + slow_node.nodeIbStasher.delay(cDelay(3*delay, 0)) + for i in range(2): + sendReqsToNodesAndVerifySuffReplies(looper, stewardWallet, steward1, 20) + waitNodeDataInequality(looper, slow_node, *fast_nodes) + + catchup_reply_counts = {n.name: n.ledgerManager.spylog.count( + n.ledgerManager.processCatchupRep) for n in txnPoolNodeSet} + catchup_done_counts = {n.name: n.spylog.count(n.allLedgersCaughtUp) + for n in txnPoolNodeSet} + + def slow_node_processed_some(): + assert slow_node.master_replica.batches + + # The slow node has received some PRE-PREPAREs + looper.run(eventually(slow_node_processed_some, retryWait=1, timeout=delay)) + + # No reverts have been called by the slow node + rv = getAllReturnVals(slow_node.replicas[0], + TestReplica.revert_unordered_batches) + assert not rv or max(rv) == 0 + + # Delay reception of catchup replies so ViewChangeDone can be received + # before catchup completes + delay_catchup_reply = 2 + slow_node.nodeIbStasher.delay(cr_delay(delay_catchup_reply)) + + ensure_view_change(looper, txnPoolNodeSet) + # `slow_node` will not have elections done but others will. + checkProtocolInstanceSetup(looper, fast_nodes, + numInstances=len(slow_node.replicas), + retryWait=1) + ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) + + # `slow_node` does catchup, `fast_nodes` don't + for n in txnPoolNodeSet: + assert n.spylog.count(n.allLedgersCaughtUp) > catchup_done_counts[ + n.name] + if n == slow_node: + assert n.ledgerManager.spylog.count( + n.ledgerManager.processCatchupRep) > catchup_reply_counts[n.name] + else: + assert n.ledgerManager.spylog.count( + n.ledgerManager.processCatchupRep) == catchup_reply_counts[n.name] + + # Greater than 0 batches were reverted by the slow node + assert max(getAllReturnVals(slow_node.master_replica, + slow_node.master_replica.revert_unordered_batches)) > 0 + + # Bad network repaired + slow_node.reset_delays_and_process_delayeds() + + # Make sure pool is functional + sendReqsToNodesAndVerifySuffReplies(looper, stewardWallet, steward1, 5) + ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) + diff --git a/plenum/test/primary_selection/test_catchup_multiple_rounds.py b/plenum/test/primary_selection/test_catchup_multiple_rounds.py new file mode 100644 index 0000000000..935eb91972 --- /dev/null +++ b/plenum/test/primary_selection/test_catchup_multiple_rounds.py @@ -0,0 +1,77 @@ +import pytest + +from plenum.common.constants import DOMAIN_LEDGER_ID +from plenum.test.delayers import delay_3pc_messages, icDelay +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, \ + sendRandomRequests, waitForSufficientRepliesForRequests, checkViewNoForNodes +from plenum.test.node_catchup.helper import waitNodeDataEquality +from plenum.test.pool_transactions.conftest import clientAndWallet1, \ + client1, wallet1, client1Connected, looper, nodeThetaAdded, \ + stewardAndWallet1, steward1, stewardWallet +from plenum.test.batching_3pc.conftest import tconf + +from plenum.test.test_node import getNonPrimaryReplicas, getPrimaryReplica, \ + checkProtocolInstanceSetup +from plenum.test.view_change.helper import ensure_view_change +from stp_core.loop.eventually import eventually + +Max3PCBatchSize = 3 +TestRunningTimeLimitSec = 200 + + +@pytest.mark.skip('Test incorrect') +def test_slow_nodes_catchup_before_selecting_primary_in_new_view(tconf, + looper, + txnPoolNodeSet, + client1, + wallet1, + one_node_added, + client1Connected): + """ + Delay 3PC messages to one node and view change messages to some others + (including primary) so the node that does not receive enough 3PC messages is + behind but learns of the view change quickly and starts catchup. + Other nodes learn of the view change late and thus keep on processing + requests + """ + new_node = one_node_added + nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)] + primary_node = getPrimaryReplica(txnPoolNodeSet, 0).node + slow_node = nprs[-1] + # nodes_slow_to_inst_chg = [primary_node] + nprs[:2] + nodes_slow_to_inst_chg = [n for n in txnPoolNodeSet if n != slow_node] + delay_3pc = 100 + delay_ic = 5 + + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, + 2 * Max3PCBatchSize) + + delay_3pc_messages([slow_node], 0, delay_3pc) + + for n in nodes_slow_to_inst_chg: + n.nodeIbStasher.delay(icDelay(delay_ic)) + + start_count = lambda: sum([1 for e in slow_node.ledgerManager.spylog.getAll( + slow_node.ledgerManager.startCatchUpProcess.__name__) + if e.params['ledgerId'] == DOMAIN_LEDGER_ID]) + + s = start_count() + requests = sendRandomRequests(wallet1, client1, 10*Max3PCBatchSize) + + ensure_view_change(looper, nodes=txnPoolNodeSet, + exclude_from_check=nodes_slow_to_inst_chg) + + waitForSufficientRepliesForRequests(looper, client1, + requests=requests) + waitNodeDataEquality(looper, slow_node, *txnPoolNodeSet[:-1]) + + e = start_count() + assert e - s >= 2 + + looper.run(eventually(checkViewNoForNodes, slow_node.viewNo)) + checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) + + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, + 2 * Max3PCBatchSize) + + waitNodeDataEquality(looper, new_node, *nodes_slow_to_inst_chg) diff --git a/plenum/test/primary_selection/test_catchup_needed_check.py b/plenum/test/primary_selection/test_catchup_needed_check.py new file mode 100644 index 0000000000..2d4be2b158 --- /dev/null +++ b/plenum/test/primary_selection/test_catchup_needed_check.py @@ -0,0 +1,77 @@ +import types + +import pytest + +from plenum.common.constants import DOMAIN_LEDGER_ID +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies +from plenum.test.node_catchup.helper import waitNodeDataInequality, \ + ensure_all_nodes_have_same_data, make_a_node_catchup_twice +from plenum.test.spy_helpers import getAllReturnVals +from plenum.test.test_node import getNonPrimaryReplicas, \ + checkProtocolInstanceSetup +from plenum.test.view_change.helper import ensure_view_change +# noinspection PyUnresolvedReferences +from plenum.test.batching_3pc.conftest import tconf + + +Max3PCBatchSize = 2 + + +def test_caught_up_for_current_view_check(looper, + txnPoolNodeSet, + client1, + wallet1, + client1Connected): + """ + One of the node experiences poor network and loses 3PC messages. It has to + do multiple rounds of catchup to be caught up + """ + + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, + 3*Max3PCBatchSize) + ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) + + nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0) + bad_node = nprs[-1].node + other_nodes = [n for n in txnPoolNodeSet if n != bad_node] + orig_method = bad_node.master_replica.dispatchThreePhaseMsg + + # Bad node does not process any 3 phase messages, equivalent to messages + # being lost + def bad_method(self, m, s): + pass + + bad_node.master_replica.dispatchThreePhaseMsg = types.MethodType( + bad_method, bad_node.master_replica) + + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, + 6*Max3PCBatchSize) + waitNodeDataInequality(looper, bad_node, *other_nodes) + + # Patch all nodes to return ConsistencyProof of a smaller ledger to the + # bad node but only once, so that the bad_node needs to do catchup again. + + make_a_node_catchup_twice(bad_node, other_nodes, DOMAIN_LEDGER_ID, + Max3PCBatchSize) + + def is_catchup_needed_count(): + return len(getAllReturnVals(bad_node, bad_node.is_catchup_needed, + compare_val_to=True)) + + def caught_up_for_current_view_count(): + return len(getAllReturnVals(bad_node, + bad_node.caught_up_for_current_view, + compare_val_to=True)) + + old_count_1 = is_catchup_needed_count() + old_count_2 = caught_up_for_current_view_count() + ensure_view_change(looper, txnPoolNodeSet) + checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) + ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) + + assert is_catchup_needed_count() > old_count_1 + # The bad_node caught up due to receiving sufficient ViewChangeDone messages + assert caught_up_for_current_view_count() > old_count_2 + + bad_node.master_replica.dispatchThreePhaseMsg = types.MethodType( + orig_method, bad_node.master_replica) diff --git a/plenum/test/primary_selection/test_primary_selection.py b/plenum/test/primary_selection/test_primary_selection.py index dc0b4d8fd1..8e09a65806 100644 --- a/plenum/test/primary_selection/test_primary_selection.py +++ b/plenum/test/primary_selection/test_primary_selection.py @@ -2,23 +2,24 @@ import pytest +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies +from plenum.test.primary_selection.helper import \ + check_rank_consistent_across_each_node +from plenum.test.view_change.helper import ensure_view_change from stp_core.loop.eventually import eventually from plenum.common.util import getNoInstances -from plenum.server.primary_selector import PrimarySelector from plenum.server.replica import Replica from plenum.test import waits -from plenum.test.test_node import checkProtocolInstanceSetup, getPrimaryReplica +from plenum.test.test_node import checkProtocolInstanceSetup, getPrimaryReplica, ensureElectionsDone # noinspection PyUnresolvedReferences -from plenum.test.view_change.conftest import viewNo, simulate_slow_master +from plenum.test.view_change.conftest import viewNo # noinspection PyUnresolvedReferences -from plenum.test.view_change.test_view_change import viewChangeDone +from plenum.test.conftest import looper, client1, wallet1, clientAndWallet1 nodeCount = 7 -PrimaryDecider = PrimarySelector - @pytest.fixture() def primaryReplicas(nodeSet, up): @@ -27,12 +28,11 @@ def primaryReplicas(nodeSet, up): # noinspection PyIncorrectDocstring -def testPrimarySelectionAfterPoolReady(looper, nodeSet, ready): +def testPrimarySelectionAfterPoolReady(looper, nodeSet, ready, wallet1, client1): """ Once the pool is ready(node has connected to at least 3 other nodes), appropriate primary replicas should be selected. """ - def checkPrimaryPlacement(): # Node names sorted by rank sortedNodeNames = sorted(nodeSet.nodes.values(), @@ -60,21 +60,41 @@ def checkPrimaryPlacement(): assert not node.replicas[1].isPrimary assert node.replicas[2].isPrimary + check_rank_consistent_across_each_node(nodeSet) # Check if the primary is on the correct node timeout = waits.expectedPoolElectionTimeout(len(nodeSet)) looper.run(eventually(checkPrimaryPlacement, retryWait=1, timeout=timeout)) # Check if every protocol instance has one and only one primary and any node # has no more than one primary checkProtocolInstanceSetup(looper, nodeSet, retryWait=1) + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5) + +@pytest.fixture(scope='module') +def catchup_complete_count(nodeSet): + return {n.name: n.spylog.count(n.allLedgersCaughtUp) for n in nodeSet} + +@pytest.fixture(scope='module') +def view_change_done(looper, nodeSet): + ensure_view_change(looper, nodeSet) + ensureElectionsDone(looper=looper, nodes=nodeSet) + +@pytest.fixture(scope='module') +def view_change_done(looper, nodeSet): + ensure_view_change(looper, nodeSet) + ensureElectionsDone(looper=looper, nodes=nodeSet) # noinspection PyIncorrectDocstring def testPrimarySelectionAfterViewChange(looper, nodeSet, ready, primaryReplicas, - viewChangeDone): + catchup_complete_count, view_change_done): """ Test that primary replica of a protocol instance shifts to a new node after a view change. """ + # TODO: This test can fail due to view change. + + for n in nodeSet: + assert n.spylog.count(n.allLedgersCaughtUp) > catchup_complete_count[n.name] # Primary replicas before view change prBeforeVC = primaryReplicas @@ -87,4 +107,5 @@ def testPrimarySelectionAfterViewChange(looper, nodeSet, ready, primaryReplicas, for br, ar in zip(prBeforeVC, prAfterVC): assert ar.node.rank - br.node.rank == 1 + check_rank_consistent_across_each_node(nodeSet) checkProtocolInstanceSetup(looper, nodeSet, retryWait=1) diff --git a/plenum/test/primary_selection/test_primary_selection_pool_txn.py b/plenum/test/primary_selection/test_primary_selection_pool_txn.py new file mode 100644 index 0000000000..f367203b7a --- /dev/null +++ b/plenum/test/primary_selection/test_primary_selection_pool_txn.py @@ -0,0 +1,53 @@ +import pytest + +from plenum.test.helper import send_reqs_batches_and_get_suff_replies +from plenum.test.node_catchup.helper import ensure_all_nodes_have_same_data +from plenum.test.pool_transactions.helper import add_2_nodes +from plenum.test.primary_selection.helper import check_newly_added_nodes + + +def ensure_pool_functional(looper, nodes, wallet, client, num_reqs=10, + num_batches=2): + send_reqs_batches_and_get_suff_replies(looper, wallet, client, num_reqs, + num_batches) + ensure_all_nodes_have_same_data(looper, nodes) + + +def check_accepted_view_change_sent(node, nodes): + for other_node in nodes: + if node == other_node: + continue + if other_node.name in node.elector._view_change_done: + assert node.elector._view_change_done[other_node.name] == \ + node.elector._accepted_view_change_done_message + + +def test_primary_selection_non_genesis_node(one_node_added, looper, + txnPoolNodeSet, stewardWallet, + steward1): + ensure_pool_functional(looper, txnPoolNodeSet, stewardWallet, steward1) + + +@pytest.fixture(scope='module') +def two_more_nodes_added(one_node_added, looper, txnPoolNodeSet, + stewardWallet, steward1, + tdirWithPoolTxns, tconf, allPluginsPath): + # check_accepted_view_change_sent(one_node_added, txnPoolNodeSet) + + new_nodes = add_2_nodes(looper, txnPoolNodeSet, steward1, stewardWallet, + tdirWithPoolTxns, tconf, allPluginsPath) + + check_newly_added_nodes(looper, txnPoolNodeSet, new_nodes) + return new_nodes + + +def test_primary_selection_increase_f(two_more_nodes_added, looper, txnPoolNodeSet, + stewardWallet, steward1): + # for n in two_more_nodes_added: + # check_accepted_view_change_sent(n, txnPoolNodeSet) + ensure_pool_functional(looper, txnPoolNodeSet, stewardWallet, steward1) + + + +# TODO: Add more tests to make one next primary crashed, malicious, ensure primary + # selection happens after catchup \ No newline at end of file diff --git a/plenum/test/primary_selection/test_primary_selection_routes.py b/plenum/test/primary_selection/test_primary_selection_routes.py new file mode 100644 index 0000000000..1c2d348864 --- /dev/null +++ b/plenum/test/primary_selection/test_primary_selection_routes.py @@ -0,0 +1,8 @@ +from plenum.test.conftest import looper + +nodeCount = 7 + + +def test_routes(nodeSet, up): + # TODO: Low priority. + pass diff --git a/plenum/test/primary_selection/test_primary_selector.py b/plenum/test/primary_selection/test_primary_selector.py new file mode 100644 index 0000000000..0dbbbb5cd8 --- /dev/null +++ b/plenum/test/primary_selection/test_primary_selector.py @@ -0,0 +1,185 @@ +from typing import Optional + +import base58 +import pytest + +from plenum.common.startable import Mode +from plenum.server.primary_selector import PrimarySelector +from plenum.common.messages.node_messages import ViewChangeDone +from plenum.server.quorums import Quorums +from plenum.server.replica import Replica +from plenum.common.ledger_manager import LedgerManager + + +whitelist = ['but majority declared'] + + +class FakeLedger(): + def __init__(self, ledger_id, size): + self._size = size + self.root_hash = base58.b58encode(str(ledger_id).encode() * 32) + self.hasher = None + + def __len__(self): + return self._size + + +# Question: Why doesn't this subclass Node. +class FakeNode(): + ledger_ids = [0] + + def __init__(self): + self.name = 'Node1' + self.f = 1 + self.replicas = [] + self.viewNo = 0 + self.rank = None + self.allNodeNames = [self.name, 'Node2', 'Node3', 'Node4'] + self.totalNodes = len(self.allNodeNames) + self.mode = Mode.starting + self.replicas = [ + Replica(node=self, instId=0, isMaster=True), + Replica(node=self, instId=1, isMaster=False), + Replica(node=self, instId=2, isMaster=False), + ] + self._found = False + self.ledgerManager = LedgerManager(self, ownedByNode=True) + ledger0 = FakeLedger(0, 10) + ledger1 = FakeLedger(1, 5) + self.ledgerManager.addLedger(0, ledger0) + self.ledgerManager.addLedger(1, ledger1) + self.quorums = Quorums(self.totalNodes) + + def get_name_by_rank(self, name): + # This is used only for getting name of next primary, so + # it just returns a constant + return 'Node2' + + def primary_selected(self, instance_id): + self._found = True + + def is_primary_found(self): + return self._found + + @property + def master_primary_name(self) -> Optional[str]: + nm = self.replicas[0].primaryName + if nm: + return Replica.getNodeName(nm) + + @property + def master_replica(self): + return self.replicas[0] + + @property + def is_synced(self): + return self.mode >= Mode.synced + + +def testHasViewChangeQuorum(): + """ + Checks method _hasViewChangeQuorum of SimpleSelector + """ + + ledgerInfo = ( + # ledger id, ledger length, merkle root + (0, 10, '7toTJZHzaxQ7cGZv18MR4PMBfuUecdEQ1JRqJVeJBvmd'), + (1, 5, 'Hs9n4M3CrmrkWGVviGq48vSbMpCrk6WgSBZ7sZAWbJy3') + ) + selector = PrimarySelector(FakeNode()) + + assert not selector._hasViewChangeQuorum + + # Accessing _view_change_done directly to avoid influence of methods + selector._view_change_done = {} + + def declare(replica_name): + selector._view_change_done[replica_name] = ('Node2', ledgerInfo) + + declare('Node1') + declare('Node3') + declare('Node4') + + # Three nodes is enough for quorum, but there is no Node2:0 which is + # expected to be next primary, so no quorum should be achieved + assert selector._hasViewChangeQuorum + assert not selector.has_view_change_from_primary + + declare('Node2') + assert selector.has_view_change_from_primary + + +def testProcessViewChangeDone(): + ledgerInfo = ( + # ledger id, ledger length, merkle root + (0, 10, '7toTJZHzaxQ7cGZv18MR4PMBfuUecdEQ1JRqJVeJBvmd'), + (1, 5, 'Hs9n4M3CrmrkWGVviGq48vSbMpCrk6WgSBZ7sZAWbJy3') + ) + msg = ViewChangeDone(viewNo=0, + name='Node2', + ledgerInfo=ledgerInfo) + node = FakeNode() + selector = PrimarySelector(node) + quorum = selector.quorum + for i in range(quorum): + selector._processViewChangeDoneMessage(msg, 'Node2') + assert selector._view_change_done + assert not node.is_primary_found() + + selector._processViewChangeDoneMessage(msg, 'Node1') + assert selector._view_change_done + assert not node.is_primary_found() + + selector._processViewChangeDoneMessage(msg, 'Node3') + assert selector._verify_primary(msg.name, msg.ledgerInfo) + selector._startSelection() + assert selector._view_change_done + # Since the FakeNode does not have setting of mode + # assert node.is_primary_found() + selector.view_change_started(1) + assert not selector._view_change_done + + +def test_get_msgs_for_lagged_nodes(): + ledgerInfo = ( + # ledger id, ledger length, merkle root + (0, 10, '7toTJZHzaxQ7cGZv18MR4PMBfuUecdEQ1JRqJVeJBvmd'), + (1, 5, 'Hs9n4M3CrmrkWGVviGq48vSbMpCrk6WgSBZ7sZAWbJy3'), + ) + messages = [ + (ViewChangeDone(viewNo=0, name='Node2', ledgerInfo=ledgerInfo), 'Node1'), + (ViewChangeDone(viewNo=0, name='Node3', ledgerInfo=ledgerInfo), 'Node2') + ] + node = FakeNode() + selector = PrimarySelector(node) + for message in messages: + selector._processViewChangeDoneMessage(*message) + + messages_for_lagged = selector.get_msgs_for_lagged_nodes() + assert {m for m in messages_for_lagged} == {m[0] for m in messages if m[1] == node.name} + + +def test_send_view_change_done_message(): + node = FakeNode() + selector = PrimarySelector(node) + instance_id = 0 + view_no = selector.viewNo + new_primary_name = selector.node.get_name_by_rank(selector._get_primary_id( + view_no, instance_id)) + selector._send_view_change_done_message() + + ledgerInfo = [ + # ledger id, ledger length, merkle root + (0, 10, '4F7BsTMVPKFshM1MwLf6y23cid6fL3xMpazVoF9krzUw'), + (1, 5, '4K2V1kpVycZ6qSFsNdz2FtpNxnJs17eBNzf9rdCMcKoe'), + ] + messages = [ + ViewChangeDone(viewNo=0, name='Node2', ledgerInfo=ledgerInfo) + ] + + assert len(selector.outBox) == 1 + + print(list(selector.outBox)) + print(messages) + + assert list(selector.outBox) == messages diff --git a/plenum/test/propagate/helper.py b/plenum/test/propagate/helper.py index f4b95c4c34..5747c203b5 100644 --- a/plenum/test/propagate/helper.py +++ b/plenum/test/propagate/helper.py @@ -1,4 +1,4 @@ -from plenum.common.types import Propagate +from plenum.common.messages.node_messages import Propagate from plenum.test.spy_helpers import getAllArgs from plenum.test.test_node import TestNode diff --git a/plenum/test/propagate/test_propagate_recvd_after_request.py b/plenum/test/propagate/test_propagate_recvd_after_request.py index fa8775962a..819122be00 100644 --- a/plenum/test/propagate/test_propagate_recvd_after_request.py +++ b/plenum/test/propagate/test_propagate_recvd_after_request.py @@ -1,7 +1,7 @@ import pytest from stp_core.loop.eventually import eventually -from plenum.common.types import Propagate +from plenum.common.messages.node_messages import Propagate, MessageRep from plenum.test import waits from plenum.test.delayers import delay from plenum.test.propagate.helper import recvdRequest, recvdPropagate, \ @@ -15,6 +15,8 @@ def setup(nodeSet): A, B, C, D = nodeSet.nodes.values() # type: TestNode delay(Propagate, frm=[B, C, D], to=A, howlong=howlong) + # Delay MessageRep by long simulating loss as if Propagate is missing, it is requested + delay(MessageRep, frm=[B, C, D], to=A, howlong=10*howlong) def testPropagateRecvdAfterRequest(setup, looper, nodeSet, up, sent1): diff --git a/plenum/test/propagate/test_propagate_recvd_before_request.py b/plenum/test/propagate/test_propagate_recvd_before_request.py index 3e9e4b173e..4903a02985 100644 --- a/plenum/test/propagate/test_propagate_recvd_before_request.py +++ b/plenum/test/propagate/test_propagate_recvd_before_request.py @@ -1,7 +1,7 @@ import pytest from stp_core.loop.eventually import eventually -from plenum.common.types import Propagate +from plenum.common.messages.node_messages import Propagate from plenum.test.delayers import delay from plenum.test.helper import assertLength from plenum.test.propagate.helper import recvdRequest, recvdPropagate, \ diff --git a/plenum/test/replica/test_buffers_cleaning.py b/plenum/test/replica/test_buffers_cleaning.py new file mode 100644 index 0000000000..07d2acf68e --- /dev/null +++ b/plenum/test/replica/test_buffers_cleaning.py @@ -0,0 +1,66 @@ +from plenum.server.replica import Replica + + +class FakeNode(): + + def __init__(self, **kwargs): + for name, value in kwargs.items(): + setattr(self, name, value) + + +def test_ordered_cleaning(): + + global_view_no = 2 + + node = FakeNode( + name="fake node", + ledger_ids=[0], + viewNo=global_view_no, + ) + + replica = Replica(node, instId=0) + total = [] + + num_requests_per_view = 3 + for viewNo in range(global_view_no + 1): + for seqNo in range(num_requests_per_view): + reqId = viewNo, seqNo + replica.addToOrdered(*reqId) + total.append(reqId) + + # gc is called after stable checkpoint, since no request executed + # in this test starting it manually + replica._gc(100) + # Requests with view lower then previous view + # should not be in ordered + assert len(replica.ordered) == len(total[num_requests_per_view:]) + + +def test_primary_names_cleaning(): + + node = FakeNode( + name="fake node", + ledger_ids=[0], + viewNo=0, + ) + + replica = Replica(node, instId=0) + + replica.primaryName = "Node1:0" + assert list(replica.primaryNames.items()) == \ + [(0, "Node1:0")] + + node.viewNo += 1 + replica.primaryName = "Node2:0" + assert list(replica.primaryNames.items()) == \ + [(0, "Node1:0"), (1, "Node2:0")] + + node.viewNo += 1 + replica.primaryName = "Node3:0" + assert list(replica.primaryNames.items()) == \ + [(1, "Node2:0"), (2, "Node3:0")] + + node.viewNo += 1 + replica.primaryName = "Node4:0" + assert list(replica.primaryNames.items()) == \ + [(2, "Node3:0"), (3, "Node4:0")] diff --git a/plenum/test/replica/test_primary_marked_suspicious_for_sending_prepare.py b/plenum/test/replica/test_primary_marked_suspicious_for_sending_prepare.py index 0601c8b16e..aa0e4659d3 100644 --- a/plenum/test/replica/test_primary_marked_suspicious_for_sending_prepare.py +++ b/plenum/test/replica/test_primary_marked_suspicious_for_sending_prepare.py @@ -1,8 +1,11 @@ import time +import pytest + +from plenum.test.delayers import cDelay from stp_core.loop.eventually import eventually from plenum.common.exceptions import SuspiciousNode -from plenum.common.types import Prepare +from plenum.common.messages.node_messages import Prepare from plenum.server.suspicion_codes import Suspicions from plenum.test.helper import getNodeSuspicions from plenum.test.spy_helpers import getAllArgs @@ -12,7 +15,15 @@ nodeCount = 7 -def testPrimarySendsAPrepareAndMarkedSuspicious(looper, nodeSet, preprepared1): +@pytest.fixture(scope="module") +def delay_commits(nodeSet): + # Delay COMMITs so that ordering is delayed and checks can be made + for n in nodeSet: + n.nodeIbStasher.delay(cDelay(5)) + + +def testPrimarySendsAPrepareAndMarkedSuspicious(looper, nodeSet, delay_commits, + preprepared1): def sendPrepareFromPrimary(instId): primary = getPrimaryReplica(nodeSet, instId) viewNo, ppSeqNo = next(iter(primary.sentPrePrepares.keys())) @@ -20,6 +31,7 @@ def sendPrepareFromPrimary(instId): prepare = Prepare(instId, viewNo, ppSeqNo, + ppReq.ppTime, ppReq.digest, ppReq.stateRootHash, ppReq.txnRootHash) diff --git a/plenum/test/replica/test_replica_reject_same_pre_prepare.py b/plenum/test/replica/test_replica_reject_same_pre_prepare.py index f21137e648..37e2d900cb 100644 --- a/plenum/test/replica/test_replica_reject_same_pre_prepare.py +++ b/plenum/test/replica/test_replica_reject_same_pre_prepare.py @@ -1,13 +1,15 @@ import time +import types import pytest -from plenum.test.spy_helpers import getAllArgs, getAllReturnVals +from plenum.server.replica import TPCStat +from plenum.test.delayers import cDelay from stp_core.loop.eventually import eventually from stp_core.common.log import getlogger -from plenum.common.types import PrePrepare +from plenum.common.messages.node_messages import PrePrepare from plenum.common.constants import DOMAIN_LEDGER_ID -from plenum.common.util import getMaxFailures +from plenum.common.util import getMaxFailures, updateNamedTuple, get_utc_epoch from plenum.test import waits from plenum.test.helper import checkPrePrepareReqSent, \ checkPrePrepareReqRecvd, \ @@ -34,15 +36,40 @@ def testReplicasRejectSamePrePrepareMsg(looper, nodeSet, client1, wallet1): """ numOfNodes = 4 fValue = getMaxFailures(numOfNodes) - request1 = sendRandomRequest(wallet1, client1) - timeout = waits.expectedReqAckQuorumTime() - result1 = looper.run( - eventually(checkSufficientRepliesReceived, client1.inBox, - request1.reqId, fValue, - retryWait=1, timeout=timeout)) - logger.debug("request {} gives result {}".format(request1, result1)) - primaryRepl = getPrimaryReplica(nodeSet) + primaryRepl = getPrimaryReplica(nodeSet, 1) logger.debug("Primary Replica: {}".format(primaryRepl)) + nonPrimaryReplicas = getNonPrimaryReplicas(nodeSet, 1) + logger.debug("Non Primary Replicas: " + str(nonPrimaryReplicas)) + + # Delay COMMITs so request is not ordered and checks can be made + c_delay = 10 + for node in nodeSet: + node.nodeIbStasher.delay(cDelay(delay=c_delay, instId=1)) + + request1 = sendRandomRequest(wallet1, client1) + for npr in nonPrimaryReplicas: + looper.run(eventually(checkPrepareReqSent, + npr, + request1.identifier, + request1.reqId, + primaryRepl.viewNo, + retryWait=1)) + prePrepareReq = primaryRepl.sentPrePrepares[primaryRepl.viewNo, + primaryRepl.lastPrePrepareSeqNo] + looper.run(eventually(checkPrePrepareReqRecvd, + nonPrimaryReplicas, + prePrepareReq, + retryWait=1)) + + # logger.debug("Patching the primary replica's pre-prepare sending method ") + # orig_method = primaryRepl.sendPrePrepare + + # def patched(self, ppReq): + # self.sentPrePrepares[ppReq.viewNo, ppReq.ppSeqNo] = ppReq + # ppReq = updateNamedTuple(ppReq, **{f.PP_SEQ_NO.nm: 1}) + # self.send(ppReq, TPCStat.PrePrepareSent) + # + # primaryRepl.sendPrePrepare = types.MethodType(patched, primaryRepl) logger.debug( "Decrementing the primary replica's pre-prepare sequence number by " "one...") @@ -53,14 +80,17 @@ def testReplicasRejectSamePrePrepareMsg(looper, nodeSet, client1, wallet1): looper.run(eventually(checkPrePrepareReqSent, primaryRepl, request2, retryWait=1, timeout=timeout)) - nonPrimaryReplicas = getNonPrimaryReplicas(nodeSet) - logger.debug("Non Primary Replicas: " + str(nonPrimaryReplicas)) + # Since the node is malicious, it will not be able to process requests due + # to conflicts in PRE-PREPARE + primaryRepl.node.stop() + looper.removeProdable(primaryRepl.node) + reqIdr = [(request2.identifier, request2.reqId)] prePrepareReq = PrePrepare( primaryRepl.instId, primaryRepl.viewNo, primaryRepl.lastPrePrepareSeqNo, - time.time(), + get_utc_epoch(), reqIdr, 1, primaryRepl.batchDigest([request2]), @@ -93,3 +123,10 @@ def testReplicasRejectSamePrePrepareMsg(looper, nodeSet, client1, wallet1): view_no, retryWait=1, timeout=timeout)) + + timeout = waits.expectedTransactionExecutionTime(len(nodeSet)) + c_delay + result1 = looper.run( + eventually(checkSufficientRepliesReceived, client1.inBox, + request1.reqId, fValue, + retryWait=1, timeout=timeout)) + logger.debug("request {} gives result {}".format(request1, result1)) diff --git a/plenum/test/signing/test_signing.py b/plenum/test/signing/test_signing.py index 234419d461..354fbb7d93 100644 --- a/plenum/test/signing/test_signing.py +++ b/plenum/test/signing/test_signing.py @@ -3,7 +3,7 @@ from stp_core.loop.eventually import eventually from plenum.common.exceptions import InvalidSignature from stp_core.common.log import getlogger -from plenum.common.util import adict +from stp_core.common.util import adict from plenum.test import waits from plenum.test.malicious_behaviors_node import changesRequest, makeNodeFaulty from plenum.test.node_request.node_request_helper import checkPropagated diff --git a/plenum/test/spy_helpers.py b/plenum/test/spy_helpers.py index 8f64bfd299..71a59f0a69 100644 --- a/plenum/test/spy_helpers.py +++ b/plenum/test/spy_helpers.py @@ -30,7 +30,20 @@ def getAllArgs(obj: Any, method: Union[str, Callable]) -> List[Any]: return [m.params for m in obj.spylog.getAll(methodName)] -def getAllReturnVals(obj: Any, method: SpyableMethod) -> List[Any]: - # params should return a List +def getAllReturnVals(obj: Any, method: SpyableMethod, + compare_val_to=None) -> List[Any]: + """ + + :param obj: + :param method: method name or method + :param compare_val_to: if provided, only returns values which are equal to + the provided one. Won't work if the provided value is None + :return: a list of return vals + """ methodName = method if isinstance(method, str) else getCallableName(method) - return [m.result for m in obj.spylog.getAll(methodName)] \ No newline at end of file + return [m.result for m in obj.spylog.getAll(methodName) + if (compare_val_to is None or m.result == compare_val_to)] + + +def get_count(obj: Any, method: SpyableMethod) -> int: + return obj.spylog.count(method) diff --git a/plenum/test/stasher.py b/plenum/test/stasher.py index 859c1be849..7dfb912547 100644 --- a/plenum/test/stasher.py +++ b/plenum/test/stasher.py @@ -45,7 +45,7 @@ def stashAll(self, age): self.delayeds.append((age + secondsToDelay, rx)) self.queue.remove(rx) - def unstashAll(self, age, ignore_age_check=False): + def unstashAll(self, age, *names, ignore_age_check=False): """ Not terribly efficient, but for now, this is only used for testing. HasActionQueue is more efficient about knowing when to iterate through @@ -54,23 +54,55 @@ def unstashAll(self, age, ignore_age_check=False): :param age: seconds since Stasher started """ unstashed = 0 - for d in self.delayeds: + to_remove = [] + for idx, d in enumerate(self.delayeds): # This is in-efficient as `ignore_age_check` wont change during loop # but its ok since its a testing util. - if ignore_age_check or age >= d[0]: - msg = '(forced)' if ignore_age_check else '({:.0f} milliseconds overdue)'\ - .format((age - d[0]) * 1000) + if ignore_age_check or (names and d[1][0].__name__ in names) or age >= d[0]: + if ignore_age_check: + msg = '(forced)' + elif names and d[1][0].__name__ in names: + msg = '({} present in {})'.format(d[1][0].__name__, names) + else: + msg = '({:.0f} milliseconds overdue)'.format((age - d[0]) * 1000) logger.debug( "{} unstashing message {} {}". format(self.name, d[1], msg)) self.queue.appendleft(d[1]) - self.delayeds.remove(d) + to_remove.append(idx) unstashed += 1 + + # Since `to_remove` is filled with increasing numbers so reverse it + # and then remove elements from list + for idx in to_remove[::-1]: + self.delayeds.pop(idx) + return unstashed - def resetDelays(self): - logger.debug("{} resetting delays".format(self.name)) - self.delayRules = set() + def resetDelays(self, *names): + if not names: + logger.debug("{} resetting all delays".format(self.name)) + self.delayRules = set() + else: + logger.debug("{} resetting delays for {}".format(self.name, names)) + to_remove = [] + for r in self.delayRules: + if r.__name__ in names: + to_remove.append(r) + + for r in to_remove: + self.delayRules.remove(r) + + def drop_delayeds(self): + # This will empty the stashed message queue + self.delayeds = [] + + def force_unstash(self, *names): + if not names: + return self.unstashAll(0, ignore_age_check=True) + else: + return self.unstashAll(0, *names) - def force_unstash(self): - return self.unstashAll(0, ignore_age_check=True) + def reset_delays_and_process_delayeds(self, *names): + self.resetDelays(*names) + self.force_unstash(*names) diff --git a/plenum/test/test_bootstrapping.py b/plenum/test/test_bootstrapping.py index 4b35af8b3f..6bee68cc8b 100644 --- a/plenum/test/test_bootstrapping.py +++ b/plenum/test/test_bootstrapping.py @@ -1,11 +1,10 @@ import pytest -from stp_core.network.exceptions import RemoteNotFound, PublicKeyNotFoundOnDisk +from stp_core.network.exceptions import PublicKeyNotFoundOnDisk from stp_core.common.log import getlogger from plenum.test.greek import genNodeNames from stp_core.loop.looper import Looper from plenum.test.helper import msgAll -from plenum.test.test_stack import NOT_CONNECTED from plenum.test.test_node import TestNodeSet, checkNodesConnected, genNodeReg logger = getlogger() @@ -48,6 +47,5 @@ def testConnectWithoutKeySharingFails(tdir_for_func): with TestNodeSet(names=nodeNames, tmpdir=tdir_for_func, keyshare=False) as nodes: with Looper(nodes) as looper: - looper.run( - checkNodesConnected(nodes, NOT_CONNECTED)) + looper.runFor(2) diff --git a/plenum/test/test_client.py b/plenum/test/test_client.py index 89f2e76fda..ee50ee154f 100644 --- a/plenum/test/test_client.py +++ b/plenum/test/test_client.py @@ -20,8 +20,10 @@ logger = getlogger() +client_spyables = [Client.handleOneNodeMsg, Client.resendRequests] -@spyable(methods=[Client.handleOneNodeMsg, Client.resendRequests]) + +@spyable(methods=client_spyables) class TestClient(Client, StackedTester): def __init__(self, *args, **kwargs): self.NodeStackClass = nodeStackClass diff --git a/plenum/test/test_current_state_propagation.py b/plenum/test/test_current_state_propagation.py new file mode 100644 index 0000000000..1fbaa307d1 --- /dev/null +++ b/plenum/test/test_current_state_propagation.py @@ -0,0 +1,42 @@ +from plenum.test.node_catchup.conftest import * +from plenum.test.pool_transactions.helper import \ + disconnect_node_and_ensure_disconnected, reconnect_node_and_ensure_connected +from plenum.test.node_catchup.helper import waitNodeDataEquality, \ + waitNodeDataInequality, checkNodeDataForEquality + +# # Do not remove the next import +# from plenum.test.node_catchup.conftest import whitelist + + +def test_current_state_propagation(newNodeCaughtUp, + txnPoolNodeSet, + nodeSetWithNodeAddedAfterSomeTxns): + """ + Checks that nodes send CurrentState to lagged nodes. + """ + + # 1. Start pool + looper, new_node, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns + + # 2. Stop one node + lagging_node = new_node + disconnect_node_and_ensure_disconnected(looper, + txnPoolNodeSet, + lagging_node, + stopNode=True) + looper.removeProdable(new_node) + + # 3. Start it again + looper.add(new_node) + reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, new_node) + looper.runFor(5) + + # 4. Check that all nodes sent CurrentState + for node in txnPoolNodeSet[:-1]: + sent_times = node.spylog.count(node.send_current_state_to_lagging_node.__name__) + assert sent_times != 0, "{} haven't sent CurrentState".format(node) + looper.runFor(5) + + # 5. Check that it received CurrentState messages + received_times = lagging_node.spylog.count(lagging_node.process_current_state_message.__name__) + assert received_times != 0 diff --git a/plenum/test/test_delay.py b/plenum/test/test_delay.py index 50c4137be1..ccaf88062c 100644 --- a/plenum/test/test_delay.py +++ b/plenum/test/test_delay.py @@ -43,11 +43,11 @@ def testTestNodeDelay(tdir_for_func): customTimeout=timeout)) # reset the delay, and find another message comes quickly - nodeB.nodeIbStasher.resetDelays() - nodeB.nodeIbStasher.force_unstash() + nodeB.nodeIbStasher.reset_delays_and_process_delayeds() looper.run(sendMessageAndCheckDelivery(nodes, nodeA, nodeB)) +@pytest.mark.skip('Nodes use round robin primary selection') def testSelfNominationDelay(tdir_for_func): nodeNames = ["testA", "testB", "testC", "testD"] with TestNodeSet(names=nodeNames, tmpdir=tdir_for_func) as nodeSet: diff --git a/plenum/test/test_ledger_manager.py b/plenum/test/test_ledger_manager.py index 63c0dab2af..da3c8e3c5e 100644 --- a/plenum/test/test_ledger_manager.py +++ b/plenum/test/test_ledger_manager.py @@ -1,11 +1,16 @@ from plenum.common.ledger_manager import LedgerManager from plenum.test.testable import spyable +ledger_manager_spyables = [LedgerManager.startCatchUpProcess, + LedgerManager.catchupCompleted, + LedgerManager.processConsistencyProof, + LedgerManager.canProcessConsistencyProof, + LedgerManager.processCatchupRep, + LedgerManager.getCatchupReqs + ] -@spyable(methods=[LedgerManager.startCatchUpProcess, - LedgerManager.catchupCompleted, - LedgerManager.processConsistencyProofReq, - LedgerManager.canProcessConsistencyProof]) + +@spyable(methods=ledger_manager_spyables) class TestLedgerManager(LedgerManager): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) \ No newline at end of file diff --git a/plenum/test/test_log_rotation.py b/plenum/test/test_log_rotation.py index 4fd7603555..0ce4f09b6c 100644 --- a/plenum/test/test_log_rotation.py +++ b/plenum/test/test_log_rotation.py @@ -6,15 +6,8 @@ import TimeAndSizeRotatingFileHandler -def cleanFolder(path): - if os.path.exists(path): - shutil.rmtree(path, ignore_errors=True) - os.makedirs(path, exist_ok=True) - return path - - -def test_time_log_rotation(): - logDirPath = cleanFolder("/tmp/plenum/test_time_log_rotation") +def test_time_log_rotation(tdir_for_func): + logDirPath = tdir_for_func logFile = os.path.join(logDirPath, "log") logger = logging.getLogger('test_time_log_rotation-logger') @@ -27,8 +20,8 @@ def test_time_log_rotation(): assert len(os.listdir(logDirPath)) == 4 # initial + 3 new -def test_size_log_rotation(): - logDirPath = cleanFolder("/tmp/plenum/test_size_log_rotation") +def test_size_log_rotation(tdir_for_func): + logDirPath = tdir_for_func logFile = os.path.join(logDirPath, "log") logger = logging.getLogger('test_time_log_rotation-logger') @@ -43,8 +36,8 @@ def test_size_log_rotation(): assert len(os.listdir(logDirPath)) == 5 -def test_time_and_size_log_rotation(): - logDirPath = cleanFolder("/tmp/plenum/test_time_and_size_log_rotation") +def test_time_and_size_log_rotation(tdir_for_func): + logDirPath = tdir_for_func logFile = os.path.join(logDirPath, "log") logger = logging.getLogger('test_time_and_size_log_rotation-logger') diff --git a/plenum/test/test_node.py b/plenum/test/test_node.py index dda3a2ce86..05a0bf6077 100644 --- a/plenum/test/test_node.py +++ b/plenum/test/test_node.py @@ -7,7 +7,7 @@ from functools import partial from itertools import combinations, permutations from typing import Iterable, Iterator, Tuple, Sequence, Union, Dict, TypeVar, \ - List + List, Optional from plenum.common.stacks import nodeStackClass, clientStackClass from plenum.server.domain_req_handler import DomainRequestHandler @@ -22,15 +22,17 @@ from stp_core.common.log import getlogger from stp_core.loop.looper import Looper from plenum.common.startable import Status -from plenum.common.types import TaggedTuples, NodeDetail, f +from plenum.common.types import NodeDetail, f from plenum.common.constants import CLIENT_STACK_SUFFIX, TXN_TYPE, \ DOMAIN_LEDGER_ID -from plenum.common.util import Seconds, getMaxFailures, adict +from plenum.common.util import Seconds, getMaxFailures +from stp_core.common.util import adict from plenum.server import replica from plenum.server.instances import Instances from plenum.server.monitor import Monitor from plenum.server.node import Node from plenum.server.primary_elector import PrimaryElector +from plenum.server.primary_selector import PrimarySelector from plenum.test.greek import genNodeNames from plenum.test.msgs import TestMsg from plenum.test.spy_helpers import getLastMsgReceivedForNode, \ @@ -42,6 +44,7 @@ checkRemoteExists, RemoteState, checkState from plenum.test.testable import spyable from plenum.test import waits +from plenum.common.messages.node_message_factory import node_message_factory logger = getlogger() @@ -126,14 +129,21 @@ def createReplica(self, instNo: int, isMaster: bool): def newPrimaryDecider(self): pdCls = self.primaryDecider if self.primaryDecider else \ - TestPrimaryElector + TestPrimarySelector return pdCls(self) def delaySelfNomination(self, delay: Seconds): - logger.debug("{} delaying start election".format(self)) - delayerElection = partial(delayers.delayerMethod, - TestPrimaryElector.startElection) - self.elector.actionQueueStasher.delay(delayerElection(delay)) + if isinstance(self.primaryDecider, PrimaryElector): + logger.debug("{} delaying start election".format(self)) + delayerElection = partial(delayers.delayerMethod, + TestPrimaryElector.startElection) + self.elector.actionQueueStasher.delay(delayerElection(delay)) + elif isinstance(self.primaryDecider, PrimarySelector): + raise RuntimeError('Does not support nomination since primary is ' + 'selected deterministically') + else: + raise RuntimeError('Unknown primary decider encountered {}'. + format(self.primaryDecider)) def delayCheckPerformance(self, delay: Seconds): logger.debug("{} delaying check performance".format(self)) @@ -153,10 +163,14 @@ def force_process_delayeds(self): c += self.nodeIbStasher.force_unstash() for r in self.replicas: c += r.outBoxTestStasher.force_unstash() - logger.debug("{} forced processing of delayed messages, {} processed in total". - format(self, c)) + logger.debug("{} forced processing of delayed messages, " + "{} processed in total".format(self, c)) return c + def reset_delays_and_process_delayeds(self): + self.resetDelays() + self.force_process_delayeds() + def whitelistNode(self, nodeName: str, *codes: int): if nodeName not in self.whitelistedClients: self.whitelistedClients[nodeName] = set() @@ -192,9 +206,7 @@ def blacklistClient(self, clientName: str, reason: str=None, code: int=None): super().blacklistClient(clientName, reason, code) def validateNodeMsg(self, wrappedMsg): - nm = TestMsg.__name__ - if nm not in TaggedTuples: - TaggedTuples[nm] = TestMsg + node_message_factory.set_message_class(TestMsg) return super().validateNodeMsg(wrappedMsg) async def eatTestMsg(self, msg, frm): @@ -215,31 +227,47 @@ def getDomainReqHandler(self): self.reqProcessors) -@spyable(methods=[Node.handleOneNodeMsg, - Node.handleInvalidClientMsg, - Node.processRequest, - Node.processOrdered, - Node.postToClientInBox, - Node.postToNodeInBox, - "eatTestMsg", - Node.decidePrimaries, - Node.startViewChange, - Node.discard, - Node.reportSuspiciousNode, - Node.reportSuspiciousClient, - Node.processPropagate, - Node.propagate, - Node.forward, - Node.send, - Node.sendInstanceChange, - Node.processInstanceChange, - Node.checkPerformance, - Node.processStashedOrderedReqs, - Node.lost_master_primary, - Node.propose_view_change, - Node.getReplyFromLedger, - Node.recordAndPropagate - ]) +node_spyables = [Node.handleOneNodeMsg, + Node.handleInvalidClientMsg, + Node.processRequest, + Node.processOrdered, + Node.postToClientInBox, + Node.postToNodeInBox, + "eatTestMsg", + Node.decidePrimaries, + Node.startViewChange, + Node.discard, + Node.reportSuspiciousNode, + Node.reportSuspiciousClient, + Node.processPropagate, + Node.propagate, + Node.forward, + Node.send, + Node.sendInstanceChange, + Node.processInstanceChange, + Node.checkPerformance, + Node.processStashedOrderedReqs, + Node.lost_master_primary, + Node.propose_view_change, + Node.getReplyFromLedger, + Node.recordAndPropagate, + Node.allLedgersCaughtUp, + Node.start_catchup, + Node.is_catchup_needed, + Node.no_more_catchups_needed, + Node.caught_up_for_current_view, + Node._check_view_change_completed, + Node.primary_selected, + Node.num_txns_caught_up_in_last_catchup, + Node.process_message_req, + Node.process_message_rep, + Node.request_propagates, + Node.send_current_state_to_lagging_node, + Node.process_current_state_message, + ] + + +@spyable(methods=node_spyables) class TestNode(TestNodeCore, Node): def __init__(self, *args, **kwargs): @@ -268,11 +296,14 @@ def getLedgerManager(self): preCatchupClbk=self.preLedgerCatchUp) -@spyable(methods=[ +elector_spyables = [ PrimaryElector.discard, PrimaryElector.processPrimary, PrimaryElector.sendPrimary - ]) + ] + + +@spyable(methods=elector_spyables) class TestPrimaryElector(PrimaryElector): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -285,19 +316,39 @@ def _serviceActions(self): return super()._serviceActions() -@spyable(methods=[replica.Replica.sendPrePrepare, - replica.Replica.canProcessPrePrepare, - replica.Replica.canPrepare, - replica.Replica.validatePrepare, - replica.Replica.addToPrePrepares, - replica.Replica.processPrePrepare, - replica.Replica.processPrepare, - replica.Replica.processCommit, - replica.Replica.doPrepare, - replica.Replica.doOrder, - replica.Replica.discard, - replica.Replica.stashOutsideWatermarks - ]) +selector_spyables = [PrimarySelector.decidePrimaries] + + +@spyable(methods=selector_spyables) +class TestPrimarySelector(PrimarySelector): + pass + + +replica_spyables = [ + replica.Replica.sendPrePrepare, + replica.Replica.canProcessPrePrepare, + replica.Replica.canPrepare, + replica.Replica.validatePrepare, + replica.Replica.addToPrePrepares, + replica.Replica.processPrePrepare, + replica.Replica.processPrepare, + replica.Replica.processCommit, + replica.Replica.doPrepare, + replica.Replica.doOrder, + replica.Replica.discard, + replica.Replica.stashOutsideWatermarks, + replica.Replica.revert_unordered_batches, + replica.Replica.can_process_since_view_change_in_progress, + replica.Replica.processThreePhaseMsg, + replica.Replica.process_requested_pre_prepare, + replica.Replica._request_pre_prepare_if_possible, + replica.Replica.is_pre_prepare_time_correct, + replica.Replica.is_pre_prepare_time_acceptable, + replica.Replica._process_stashed_pre_prepare_for_time_if_possible, +] + + +@spyable(methods=replica_spyables) class TestReplica(replica.Replica): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -389,8 +440,13 @@ def removeNode(self, name, shouldClean): def __iter__(self) -> Iterator[TestNode]: return self.nodes.values().__iter__() - def __getitem__(self, key) -> TestNode: - return self.nodes.get(key) + def __getitem__(self, key) -> Optional[TestNode]: + if key in self.nodes: + return self.nodes[key] + elif isinstance(key, int): + return list(self.nodes.values())[key] + else: + return None def __len__(self): return self.nodes.__len__() @@ -399,6 +455,12 @@ def __len__(self): def nodeNames(self): return sorted(self.nodes.keys()) + @property + def nodes_by_rank(self): + return [t[1] for t in sorted([(node.rank, node) + for node in self.nodes.values()], + key=operator.itemgetter(0))] + @property def f(self): return getMaxFailures(len(self.nodes)) @@ -427,15 +489,19 @@ def connectAll(self): def getLastMsgReceived(self, node: NodeRef, method: str = None) -> Tuple: return getLastMsgReceivedForNode(self.getNode(node), method) - def getAllMsgReceived(self, node: NodeRef, method: str = None) -> Tuple: + def getAllMsgReceived(self, node: NodeRef, method: str = None) -> List: return getAllMsgReceivedForNode(self.getNode(node), method) -@spyable(methods=[Monitor.isMasterThroughputTooLow, - Monitor.isMasterReqLatencyTooHigh, - Monitor.sendThroughput, - Monitor.requestOrdered, - Monitor.reset]) +monitor_spyables = [Monitor.isMasterThroughputTooLow, + Monitor.isMasterReqLatencyTooHigh, + Monitor.sendThroughput, + Monitor.requestOrdered, + Monitor.reset + ] + + +@spyable(methods=monitor_spyables) class TestMonitor(Monitor): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -661,6 +727,9 @@ def ensureElectionsDone(looper: Looper, retryWait: float = None, # seconds customTimeout: float = None, numInstances: int = None) -> Sequence[TestNode]: + # TODO: Change the name to something like `ensure_primaries_selected` + # since there might not always be an election, there might be a round + # robin selection """ Wait for elections to be complete @@ -782,6 +851,14 @@ def get_master_primary_node(nodes): raise AssertionError('No primary found for master') +def get_last_master_non_primary_node(nodes): + return getNonPrimaryReplicas(nodes)[-1].node + + +def get_first_master_non_primary_node(nodes): + return getNonPrimaryReplicas(nodes)[0].node + + def primaryNodeNameForInstance(nodes, instanceId): primaryNames = {node.replicas[instanceId].primaryName for node in nodes} assert 1 == len(primaryNames) @@ -808,9 +885,11 @@ def check_node_disconnected_from(needle: str, haystack: Iterable[TestNode]): assert all([needle not in node.nodestack.connecteds for node in haystack]) -def ensure_node_disconnected(looper, disconnected_name, other_nodes, +def ensure_node_disconnected(looper, disconnected, other_nodes, timeout=None): timeout = timeout or (len(other_nodes) - 1) + disconnected_name = disconnected if isinstance(disconnected, str) \ + else disconnected.name looper.run(eventually(check_node_disconnected_from, disconnected_name, [n for n in other_nodes if n.name != disconnected_name], diff --git a/plenum/test/test_node_basic.py b/plenum/test/test_node_basic.py index 5e6cd42653..c2bf33ffc8 100644 --- a/plenum/test/test_node_basic.py +++ b/plenum/test/test_node_basic.py @@ -3,7 +3,8 @@ from plenum.test import waits from plenum.test.test_node import TestNode, checkProtocolInstanceSetup from plenum.test.node_helpers.node_helper import getProtocolInstanceNums -from plenum.common.util import getMaxFailures, adict +from plenum.common.util import getMaxFailures +from stp_core.common.util import adict from plenum.test.helper import checkNodesConnected, sendMessageAndCheckDelivery, msgAll from plenum.test.msgs import randomMsg diff --git a/plenum/test/test_node_genesis.py b/plenum/test/test_node_genesis.py new file mode 100644 index 0000000000..20a44f85fb --- /dev/null +++ b/plenum/test/test_node_genesis.py @@ -0,0 +1,148 @@ +import os + +import pytest + +from plenum.common.init_util import initialize_node_environment +from plenum.server.node import Node + +SAMPLE_GEN_NODE_1 = """{"data":{"alias":"Node1","client_ip":"127.0.0.1","client_port":9701,"node_ip":"127.0.0.1","node_port":9700,"services":["VALIDATOR"]},"dest":"8WM6hggY9oqoYa8i5WxcRTHREgT1rFW1zxorh8XyKjLA","identifier":"Ssycj6ZMhXaEx5gXr6xGv2","txnId":"fea82e10e894419fe2bea7d96296a6d46f50f93f9eeda954ec461b2ed2950b62","type":"0"}""" +SAMPLE_GEN_NODE_2 = """{"data":{"alias":"Node2","client_ip":"127.0.0.1","client_port":9703,"node_ip":"127.0.0.1","node_port":9702,"services":["VALIDATOR"]},"dest":"8o28Ywvj4CvVCKub4TdyXJ9T2y6aFpyQS2iF5GUNCDvF","identifier":"K34DgPoiKHVBcaS9DeSgQv","txnId":"1ac8aece2a18ced660fef8694b61aac3af08ba875ce3026a160acbc3a3af35fc","type":"0"}""" +SAMPLE_GEN_NODE_3 = """{"data":{"alias":"Node3","client_ip":"127.0.0.1","client_port":9705,"node_ip":"127.0.0.1","node_port":9704,"services":["VALIDATOR"]},"dest":"5EAh3u5Gj8HDb7C84AxjUSNN8vxioGLsKKzUiH71RVHL","identifier":"NHD3fiLvJMQVzDuiptbAm3","txnId":"7e9f355dffa78ed24668f0e0e369fd8c224076571c51e2ea8be5f26479edebe4","type":"0"}""" +SAMPLE_GEN_NODE_4 = """{"data":{"alias":"Node4","client_ip":"127.0.0.1","client_port":9707,"node_ip":"127.0.0.1","node_port":9706,"services":["VALIDATOR"]},"dest":"6KFnarfpneosG5ez43QbLA1j3bdXke3Bu1nqUyhfTYF3","identifier":"VE7ZgKvy5tfnJxsXKXEfps","txnId":"aa5e817d7cc626170eca175822029339a444eb0ee8f0bd20d3b0b76e566fb008","type":"0"}""" + +SAMPLE_GEN_4_POOL = [SAMPLE_GEN_NODE_1, SAMPLE_GEN_NODE_2, SAMPLE_GEN_NODE_3, SAMPLE_GEN_NODE_3] + +SAMPLE_GEN_NODE_1_DUPLICATE_KEY = """{"data":{"alias":"Node1", "alias":"Node1","client_ip":"127.0.0.1","client_port":9701,"node_ip":"127.0.0.1","node_port":9700,"services":["VALIDATOR"]},"dest":"8WM6hggY9oqoYa8i5WxcRTHREgT1rFW1zxorh8XyKjLA","identifier":"Ssycj6ZMhXaEx5gXr6xGv2","txnId":"fea82e10e894419fe2bea7d96296a6d46f50f93f9eeda954ec461b2ed2950b62","type":"0"}""" +SAMPLE_GEN_NODE_1_NULL_VALUES = """{"data":{"alias":null, "client_ip":null,"client_port":null,"node_ip":null,"node_port":null,"services":null},"dest":null,"identifier":null,"txnId":null,"type":null}""" +SAMPLE_GEN_NODE_1_COMPLEX_TARGET = """{"data":{"alias":"Node1","client_ip":"127.0.0.1","client_port":9701,"node_ip":"127.0.0.1","node_port":9700,"services":["VALIDATOR"]},"dest":{"id":"8WM6hggY9oqoYa8i5WxcRTHREgT1rFW1zxorh8XyKjLA"},"identifier":"Ssycj6ZMhXaEx5gXr6xGv2","txnId":"fea82e10e894419fe2bea7d96296a6d46f50f93f9eeda954ec461b2ed2950b62","type":"0"}""" + + +def _setup_genesis(base_dir, ledger_file_name, genesis_txn_list): + default_file = os.path.join(base_dir, ledger_file_name) + with open(default_file, 'w') as f: + f.write("\n".join(genesis_txn_list)) + + +@pytest.mark.skip # INDY1-140 +def test_empty_dict_in_genesis(tmpdir, looper): + base_dir = str(tmpdir) + name = "Node1" + ledger_file = 'pool_transactions_sandbox' + + gen_txn = list(SAMPLE_GEN_4_POOL) + gen_txn.insert(1, "{}") + + _setup_genesis(base_dir, ledger_file, gen_txn) + + initialize_node_environment(name=name, base_dir=base_dir) + + n = Node(name=name, basedirpath=base_dir) + looper.add(n) + + +@pytest.mark.skip # INDY1-141 +def test_empty_line(tmpdir, looper): + base_dir = str(tmpdir) + name = "Node1" + ledger_file = 'pool_transactions_sandbox' + + gen_txn = list(SAMPLE_GEN_4_POOL) + gen_txn.insert(1, " ") + + _setup_genesis(base_dir, ledger_file, gen_txn) + + initialize_node_environment(name=name, base_dir=base_dir) + + n = Node(name=name, basedirpath=base_dir) + looper.add(n) + +@pytest.mark.skip +def test_utf_16(tmpdir, looper): + base_dir = str(tmpdir) + name = "Node1" + ledger_file = 'pool_transactions_sandbox' + + gen_txn = list(SAMPLE_GEN_4_POOL) + + default_file = os.path.join(base_dir, ledger_file) + genesis_data = "\n".join(gen_txn) + with open(default_file, 'wb') as f: + f.write(genesis_data.encode("UTF-16")) + + initialize_node_environment(name=name, base_dir=base_dir) + + n = Node(name=name, basedirpath=base_dir) + looper.add(n) + + +@pytest.mark.skip +def test_utf_8_with_bom(tmpdir, looper): + base_dir = str(tmpdir) + name = "Node1" + ledger_file = 'pool_transactions_sandbox' + + gen_txn = list(SAMPLE_GEN_4_POOL) + + default_file = os.path.join(base_dir, ledger_file) + genesis_data = "\n".join(gen_txn) + with open(default_file, 'wb') as f: + f.write(b'\xEF\xBB\xBF') + f.write(genesis_data.encode("UTF-8")) + + initialize_node_environment(name=name, base_dir=base_dir) + + n = Node(name=name, basedirpath=base_dir) + looper.add(n) + + +@pytest.mark.skip +def test_null_values(tmpdir, looper): + base_dir = str(tmpdir) + name = "Node1" + ledger_file = 'pool_transactions_sandbox' + + gen_txn = list(SAMPLE_GEN_4_POOL) + gen_txn[0] = SAMPLE_GEN_NODE_1_NULL_VALUES + + _setup_genesis(base_dir, ledger_file, gen_txn) + + initialize_node_environment(name=name, base_dir=base_dir) + + n = Node(name=name, basedirpath=base_dir) + looper.add(n) + +@pytest.mark.skip +def test_complex_target(tmpdir, looper): + """ + Test what happens if target is a json object instead of a String + """ + base_dir = str(tmpdir) + name = "Node1" + ledger_file = 'pool_transactions_sandbox' + + gen_txn = list(SAMPLE_GEN_4_POOL) + gen_txn[0] = SAMPLE_GEN_NODE_1_COMPLEX_TARGET + + _setup_genesis(base_dir, ledger_file, gen_txn) + + initialize_node_environment(name=name, base_dir=base_dir) + + n = Node(name=name, basedirpath=base_dir) + looper.add(n) + + +@pytest.mark.skip +def test_duplicate_tnx(tmpdir, looper): + base_dir = str(tmpdir) + name = "Node1" + ledger_file = 'pool_transactions_sandbox' + + gen_txn = list(SAMPLE_GEN_4_POOL) + gen_txn[1] = SAMPLE_GEN_NODE_1 + + _setup_genesis(base_dir, ledger_file, gen_txn) + + initialize_node_environment(name=name, base_dir=base_dir) + + n = Node(name=name, basedirpath=base_dir) + looper.add(n) \ No newline at end of file diff --git a/plenum/test/test_node_request.py b/plenum/test/test_node_request.py index f2e83dcd42..6a40427500 100644 --- a/plenum/test/test_node_request.py +++ b/plenum/test/test_node_request.py @@ -6,8 +6,7 @@ from stp_core.loop.eventually import eventually from stp_core.common.log import getlogger from stp_core.loop.looper import Looper -from plenum.common.types import PrePrepare, Prepare, \ - Commit, Primary +from plenum.common.messages.node_messages import Primary, PrePrepare, Prepare, Commit from plenum.common.util import getMaxFailures from plenum.test import waits from plenum.test.delayers import delayerMsgTuple @@ -49,6 +48,7 @@ async def chk(): looper.run(eventually(chk, timeout=timeout)) +@pytest.mark.skip('Since primary is selected immediately now') def testPrePrepareWhenPrimaryStatusIsUnknown(tdir_for_func): nodeNames = genNodeNames(4) nodeReg = genNodeReg(names=nodeNames) @@ -59,16 +59,18 @@ def testPrePrepareWhenPrimaryStatusIsUnknown(tdir_for_func): nodeA, nodeB, nodeC, nodeD = tuple( addNodeBack(nodeSet, looper, nodeNames[i]) for i in range(0, 4)) + # Since primary selection is round robin, A and B will be primaries + # Nodes C and D delays self nomination so A and B can become # primaries - nodeC.delaySelfNomination(10) - nodeD.delaySelfNomination(10) + # nodeC.delaySelfNomination(10) + # nodeD.delaySelfNomination(10) # Node D delays receiving PRIMARY messages from all nodes so it # will not know whether it is primary or not - delayD = 5 - nodeD.nodeIbStasher.delay(delayerMsgTuple(delayD, Primary)) + # delayD = 5 + # nodeD.nodeIbStasher.delay(delayerMsgTuple(delayD, Primary)) checkPoolReady(looper=looper, nodes=nodeSet) @@ -88,27 +90,23 @@ def testPrePrepareWhenPrimaryStatusIsUnknown(tdir_for_func): request.identifier, request.reqId, retryWait=1, timeout=timeout)) - # Node D should have 1 pending PRE-PREPARE request - def assertOnePrePrepare(): + def assert_msg_count(typ, count): assert len(getPendingRequestsForReplica(nodeD.replicas[instNo], - PrePrepare)) == 1 + typ)) == count + # Node D should have 1 pending PRE-PREPARE request timeout = waits.expectedPrePrepareTime(len(nodeSet)) - looper.run(eventually(assertOnePrePrepare, retryWait=1, timeout=timeout)) + looper.run(eventually(assert_msg_count, PrePrepare, 1, + retryWait=1, timeout=timeout)) # Node D should have 2 pending PREPARE requests(from node B and C) - - def assertTwoPrepare(): - assert len(getPendingRequestsForReplica(nodeD.replicas[instNo], - Prepare)) == 2 - - timeout = waits.expectedPrePrepareTime(len(nodeSet)) - looper.run(eventually(assertTwoPrepare, retryWait=1, timeout=timeout)) + timeout = waits.expectedPrepareTime(len(nodeSet)) + looper.run(eventually(assert_msg_count, Prepare, 2, retryWait=1, + timeout=timeout)) # Its been checked above that replica stashes 3 phase messages in # lack of primary, now avoid delay (fix the network) - nodeD.nodeIbStasher.resetDelays() - nodeD.nodeIbStasher.force_unstash() + nodeD.nodeIbStasher.reset_delays_and_process_delayeds() # Node D should have no pending PRE-PREPARE, PREPARE or COMMIT # requests diff --git a/plenum/test/test_performance.py b/plenum/test/test_performance.py new file mode 100644 index 0000000000..b0615d8f9f --- /dev/null +++ b/plenum/test/test_performance.py @@ -0,0 +1,300 @@ +import logging +from statistics import pstdev, mean +from time import perf_counter +from types import MethodType + +import math +import pytest + +from plenum.common.constants import DOMAIN_LEDGER_ID, LedgerState +from plenum.common.perf_util import get_memory_usage +from plenum.test.delayers import cr_delay +from plenum.test.test_client import TestClient + +from stp_core.loop.eventually import eventually +from plenum.common.types import HA +from stp_core.common.log import getlogger, Logger +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies +from plenum.test.node_catchup.helper import waitNodeDataEquality, \ + check_ledger_state +from plenum.test.pool_transactions.helper import \ + disconnect_node_and_ensure_disconnected, buildPoolClientAndWallet +from plenum.test.test_node import checkNodesConnected, TestNode +from plenum.test import waits + +# noinspection PyUnresolvedReferences +from plenum.test.node_catchup.conftest import whitelist, \ + nodeCreatedAfterSomeTxns, nodeSetWithNodeAddedAfterSomeTxns, newNodeCaughtUp +from plenum.test.pool_transactions.conftest import looper, clientAndWallet1, \ + client1, wallet1, client1Connected + +Logger.setLogLevel(logging.WARNING) +logger = getlogger() +txnCount = 5 + + +TestRunningTimeLimitSec = math.inf + + +""" +Since these tests expect performance to be of certain level, they can fail and +for now should only be run when a perf check is required, like after a relevant +change in protocol, setting `SkipTests` to False will run tests in this +module +""" +SkipTests = True +skipper = pytest.mark.skipif(SkipTests, reason='Perf optimisations not done') + + +@pytest.fixture(scope="module") +def disable_node_monitor_config(tconf): + tconf.unsafe.add('disable_view_change') + # tconf.unsafe.add('disable_monitor') + return tconf + + +@pytest.fixture(scope="module") +def change_checkpoint_freq(tconf): + tconf.CHK_FREQ = 3 + + +@skipper +def test_node_load(looper, txnPoolNodeSet, tconf, + tdirWithPoolTxns, allPluginsPath, + poolTxnStewardData, capsys): + client, wallet = buildPoolClientAndWallet(poolTxnStewardData, + tdirWithPoolTxns, + clientClass=TestClient) + looper.add(client) + looper.run(client.ensureConnectedToNodes()) + + client_batches = 150 + txns_per_batch = 25 + for i in range(client_batches): + s = perf_counter() + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, + txns_per_batch, + override_timeout_limit=True) + with capsys.disabled(): + print('{} executed {} client txns in {:.2f} seconds'. + format(i + 1, txns_per_batch, perf_counter() - s)) + + +@skipper +def test_node_load_consistent_time(tconf, change_checkpoint_freq, + disable_node_monitor_config, looper, + txnPoolNodeSet, tdirWithPoolTxns, + allPluginsPath, poolTxnStewardData, capsys): + + # One of the reason memory grows is because spylog grows + client, wallet = buildPoolClientAndWallet(poolTxnStewardData, + tdirWithPoolTxns, + clientClass=TestClient) + looper.add(client) + looper.run(client.ensureConnectedToNodes()) + + client_batches = 300 + txns_per_batch = 25 + time_log = [] + warm_up_batches = 10 + tolerance_factor = 2 + from pympler import asizeof + for i in range(client_batches): + s = perf_counter() + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, + txns_per_batch, + override_timeout_limit=True) + t = perf_counter() - s + with capsys.disabled(): + print('{} executed {} client txns in {:.2f} seconds'. + format(i + 1, txns_per_batch, t)) + print('--------Memory Usage details start') + for node in txnPoolNodeSet: + # print(sys.getsizeof(node)) + print('---Node {}-----'.format(node)) + # print('Requests {}'.format(asizeof.asizeof(node.requests, detail=1))) + print(get_memory_usage(node, True, get_only_non_empty=True)) + for r in node.replicas: + print('---Replica {}-----'.format(r)) + print(get_memory_usage(r, True, get_only_non_empty=True)) + print('--------Memory Usage details end') + + if len(time_log) >= warm_up_batches: + m = mean(time_log) + sd = tolerance_factor*pstdev(time_log) + assert m > t or abs(t - m) <= sd, '{} {}'.format(abs(t - m), sd) + time_log.append(t) + # Since client checks inbox for sufficient replies, clear inbox so that + # it takes constant time to check replies for each batch + client.inBox.clear() + client.txnLog.reset() + + +@skipper +def test_node_load_after_add(newNodeCaughtUp, txnPoolNodeSet, tconf, + tdirWithPoolTxns, allPluginsPath, + poolTxnStewardData, looper, client1, wallet1, + client1Connected, capsys): + """ + A node that restarts after some transactions should eventually get the + transactions which happened while it was down + :return: + """ + new_node = newNodeCaughtUp + logger.debug("Sending requests") + + # Here's where we apply some load + client_batches = 300 + txns_per_batch = 25 + for i in range(client_batches): + s = perf_counter() + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, + txns_per_batch, + override_timeout_limit=True) + with capsys.disabled(): + print('{} executed {} client txns in {:.2f} seconds'. + format(i+1, txns_per_batch, perf_counter()-s)) + + logger.debug("Starting the stopped node, {}".format(new_node)) + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5) + waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:4]) + + +@skipper +def test_node_load_after_add_then_disconnect(newNodeCaughtUp, txnPoolNodeSet, + tconf, looper, client1, wallet1, + client1Connected, + tdirWithPoolTxns, allPluginsPath, + poolTxnStewardData, capsys): + """ + A node that restarts after some transactions should eventually get the + transactions which happened while it was down + :return: + """ + new_node = newNodeCaughtUp + with capsys.disabled(): + print("Stopping node {} with pool ledger size {}". + format(new_node, new_node.poolManager.txnSeqNo)) + disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, new_node) + looper.removeProdable(new_node) + + client_batches = 80 + txns_per_batch = 10 + for i in range(client_batches): + s = perf_counter() + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, + txns_per_batch, + override_timeout_limit=True) + with capsys.disabled(): + print('{} executed {} client txns in {:.2f} seconds'. + format(i+1, txns_per_batch, perf_counter()-s)) + + with capsys.disabled(): + print("Starting the stopped node, {}".format(new_node)) + nodeHa, nodeCHa = HA(*new_node.nodestack.ha), HA(*new_node.clientstack.ha) + new_node = TestNode(new_node.name, basedirpath=tdirWithPoolTxns, config=tconf, + ha=nodeHa, cliha=nodeCHa, pluginPaths=allPluginsPath) + looper.add(new_node) + txnPoolNodeSet[-1] = new_node + + # Delay catchup reply processing so LedgerState does not change + delay_catchup_reply = 5 + new_node.nodeIbStasher.delay(cr_delay(delay_catchup_reply)) + looper.run(checkNodesConnected(txnPoolNodeSet)) + + # Make sure ledger starts syncing (sufficient consistency proofs received) + looper.run(eventually(check_ledger_state, new_node, DOMAIN_LEDGER_ID, + LedgerState.syncing, retryWait=.5, timeout=5)) + + # Not accurate timeout but a conservative one + timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) + \ + 2*delay_catchup_reply + waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:4], + customTimeout=timeout) + + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5) + waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:4]) + + +@skipper +def test_nodestack_contexts_are_discrete(txnPoolNodeSet): + assert txnPoolNodeSet[0].nodestack.ctx != txnPoolNodeSet[1].nodestack.ctx + ctx_objs = {n.nodestack.ctx for n in txnPoolNodeSet} + ctx_underlying = {n.nodestack.ctx.underlying for n in txnPoolNodeSet} + assert len(ctx_objs) == len(txnPoolNodeSet) + assert len(ctx_underlying) == len(txnPoolNodeSet) + + +@skipper +def test_node_load_after_disconnect(looper, txnPoolNodeSet, tconf, + tdirWithPoolTxns, allPluginsPath, + poolTxnStewardData, capsys): + + client, wallet = buildPoolClientAndWallet(poolTxnStewardData, + tdirWithPoolTxns, + clientClass=TestClient) + looper.add(client) + looper.run(client.ensureConnectedToNodes()) + + nodes = txnPoolNodeSet + x = nodes[-1] + + with capsys.disabled(): + print("Stopping node {} with pool ledger size {}". + format(x, x.poolManager.txnSeqNo)) + + disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, x) + looper.removeProdable(x) + + client_batches = 80 + txns_per_batch = 10 + for i in range(client_batches): + s = perf_counter() + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, + txns_per_batch, + override_timeout_limit=True) + with capsys.disabled(): + print('{} executed {} client txns in {:.2f} seconds'. + format(i+1, txns_per_batch, perf_counter()-s)) + + nodeHa, nodeCHa = HA(*x.nodestack.ha), HA(*x.clientstack.ha) + newNode = TestNode(x.name, basedirpath=tdirWithPoolTxns, config=tconf, + ha=nodeHa, cliha=nodeCHa, pluginPaths=allPluginsPath) + looper.add(newNode) + txnPoolNodeSet[-1] = newNode + looper.run(checkNodesConnected(txnPoolNodeSet)) + + +@skipper +def test_node_load_after_one_node_drops_all_msgs(looper, txnPoolNodeSet, tconf, + tdirWithPoolTxns, allPluginsPath, + poolTxnStewardData, capsys): + + client, wallet = buildPoolClientAndWallet(poolTxnStewardData, + tdirWithPoolTxns, + clientClass=TestClient) + looper.add(client) + looper.run(client.ensureConnectedToNodes()) + + nodes = txnPoolNodeSet + x = nodes[-1] + + with capsys.disabled(): + print("Patching node {}".format(x)) + + def handleOneNodeMsg(self, wrappedMsg): + # do nothing with an incoming node message + pass + + x.handleOneNodeMsg = MethodType(handleOneNodeMsg, x) + + client_batches = 120 + txns_per_batch = 25 + for i in range(client_batches): + s = perf_counter() + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, + txns_per_batch, + override_timeout_limit=True) + with capsys.disabled(): + print('{} executed {} client txns in {:.2f} seconds'. + format(i+1, txns_per_batch, perf_counter()-s)) diff --git a/plenum/test/test_round_trip_with_one_faulty_node.py b/plenum/test/test_round_trip_with_one_faulty_node.py index 1a2941a6c8..0d8dcf18d4 100644 --- a/plenum/test/test_round_trip_with_one_faulty_node.py +++ b/plenum/test/test_round_trip_with_one_faulty_node.py @@ -2,7 +2,7 @@ import pytest -from plenum.common.types import Propagate +from plenum.common.messages.node_messages import Propagate from stp_core.common.log import getlogger nodeCount = 4 @@ -13,7 +13,7 @@ # noinspection PyIncorrectDocstring @pytest.fixture("module") -def alphaDoesntPropagate(startedNodes): +def node_doesnt_propagate(startedNodes): """ Makes the node named Alpha in the given set of nodes faulty. After applying this behavior, the node Alpha no longer sends @@ -28,20 +28,22 @@ def evilPropagateRequest(self, request, clientName): logger.info("TEST: Evil {} is not PROPAGATing client request". format(self)) - epp = types.MethodType(evilProcessPropagate, nodes.Alpha) - nodes.Alpha.nodeMsgRouter.routes[Propagate] = epp - nodes.Alpha.processPropagate = epp + # Choosing a node which will not be primary + node = nodes.Delta + epp = types.MethodType(evilProcessPropagate, node) + node.nodeMsgRouter.routes[Propagate] = epp + node.processPropagate = epp - nodes.Alpha.propagate = types.MethodType(evilPropagateRequest, nodes.Alpha) + node.propagate = types.MethodType(evilPropagateRequest, node) - # we don't want Alpha having a primary (another test?) - nodes.Alpha.delaySelfNomination(100) + # we don't want `node` being a primary (another test?) + # nodes.Alpha.delaySelfNomination(100) - return nodes.Alpha + return node # noinspection PyIncorrectDocstring -def testRequestFullRoundTrip(alphaDoesntPropagate, replied1): +def testRequestFullRoundTrip(node_doesnt_propagate, replied1): """ With an Alpha that doesn't send propagate requests, the request should still be able to successfully complete a full cycle. diff --git a/plenum/test/test_state_regenerated_from_ledger.py b/plenum/test/test_state_regenerated_from_ledger.py new file mode 100644 index 0000000000..5bcf11025c --- /dev/null +++ b/plenum/test/test_state_regenerated_from_ledger.py @@ -0,0 +1,55 @@ +import psutil +import shutil + +from plenum.common.constants import DOMAIN_LEDGER_ID +from plenum.test.helper import send_reqs_batches_and_get_suff_replies +from plenum.test.node_catchup.helper import ensure_all_nodes_have_same_data, \ + waitNodeDataEquality + +from plenum.test.pool_transactions.conftest import looper, clientAndWallet1, \ + client1, wallet1, client1Connected +from plenum.test.test_node import checkNodesConnected, TestNode +from stp_core.types import HA + +TestRunningTimeLimitSec = 200 + + +def test_state_regenerated_from_ledger(looper, txnPoolNodeSet, + client1, wallet1, client1Connected, + tconf, tdirWithPoolTxns, allPluginsPath): + """ + Node loses its state database but recreates it from ledger after start + """ + sent_batches = 10 + send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, + 5 * sent_batches, sent_batches) + ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) + node_to_stop = txnPoolNodeSet[-1] + node_state = node_to_stop.states[DOMAIN_LEDGER_ID] + assert not node_state.isEmpty + state_db_path = node_state._kv._dbPath + nodeHa, nodeCHa = HA(*node_to_stop.nodestack.ha), HA(*node_to_stop.clientstack.ha) + + node_to_stop.stop() + looper.removeProdable(node_to_stop) + + shutil.rmtree(state_db_path) + + restarted_node = TestNode(node_to_stop.name, basedirpath=tdirWithPoolTxns, + config=tconf, ha=nodeHa, cliha=nodeCHa, + pluginPaths=allPluginsPath) + looper.add(restarted_node) + txnPoolNodeSet[-1] = restarted_node + + looper.run(checkNodesConnected(txnPoolNodeSet)) + waitNodeDataEquality(looper, restarted_node, *txnPoolNodeSet[:-1]) + + +def test_memory_consumption_while_recreating_state_db(): + """ + Check that while re-creating state db from ledger, the node does not read + all transactions at once. Use psutil to compare memory + """ + # TODO: + # p = psutil.Process() + # print(p.memory_info_ex()) \ No newline at end of file diff --git a/plenum/test/test_util.py b/plenum/test/test_util.py index b7196b0b9e..895cece450 100644 --- a/plenum/test/test_util.py +++ b/plenum/test/test_util.py @@ -3,7 +3,8 @@ from libnacl import crypto_hash_sha256 -from plenum.common.util import randomString +from plenum.common.util import randomString, compare_3PC_keys, \ + check_if_all_equal_in_list, min_3PC_key, max_3PC_key from stp_core.network.util import evenCompare, distributedConnectionMap from plenum.test.greek import genNodeNames @@ -53,3 +54,38 @@ def test_distributedConnectionMapIsDeterministic(): conmaps = [distributedConnectionMap(rands) for _ in range(10)] for conmap1, conmap2 in combinations(conmaps, 2): assert conmap1 == conmap2 + + +def test_list_item_equality(): + l = [ + {'a': 1, 'b': 2, 'c': 3}, + {'c': 3, 'a': 1, 'b': 2}, + {'c': 3, 'a': 1, 'b': 2}, + {'a': 1, 'b': 2, 'c': 3}, + {'c': 3, 'a': 1, 'b': 2}, + {'b': 2, 'c': 3, 'a': 1}, + ] + l1 = [{'a', 'b', 'c', 1}, {'c', 'a', 'b', 1}, {1, 'a', 'c', 'b'}] + assert check_if_all_equal_in_list(l) + assert check_if_all_equal_in_list(l1) + assert check_if_all_equal_in_list([1, 1, 1, 1]) + assert check_if_all_equal_in_list(['a', 'a', 'a', 'a']) + assert not check_if_all_equal_in_list(['b', 'a', 'a', 'a']) + assert not check_if_all_equal_in_list(l + [{'a': 1, 'b': 2, 'c': 33}]) + assert not check_if_all_equal_in_list(l1 + [{'c', 'a', 'b', 11}]) + + +def test_3PC_key_comaparison(): + assert compare_3PC_keys((1,2), (1,2)) == 0 + assert compare_3PC_keys((1,3), (1,2)) < 0 + assert compare_3PC_keys((1,2), (1,3)) > 0 + assert compare_3PC_keys((1,2), (1,10)) > 0 + assert compare_3PC_keys((1, 100), (2, 3)) > 0 + assert compare_3PC_keys((1, 100), (4, 3)) > 0 + assert compare_3PC_keys((2, 100), (1, 300)) < 0 + assert min_3PC_key([(2, 100), (1, 300), (5, 600)]) == (1, 300) + assert min_3PC_key([(2, 100), (2, 300), (2, 600)]) == (2, 100) + assert min_3PC_key([(2, 100), (2, 300), (1, 600)]) == (1, 600) + assert max_3PC_key([(2, 100), (1, 300), (5, 6)]) == (5, 6) + assert max_3PC_key([(2, 100), (3, 20), (4, 1)]) == (4, 1) + assert max_3PC_key([(2, 100), (2, 300), (2, 400)]) == (2, 400) diff --git a/plenum/test/testable.py b/plenum/test/testable.py index 721a575c7a..2b8f7e7863 100644 --- a/plenum/test/testable.py +++ b/plenum/test/testable.py @@ -5,6 +5,11 @@ Callable from typing import Dict +try: + from plenum.test import NO_SPIES +except ImportError: + pass + from plenum.common.util import objSearchReplace from stp_core.common.log import getlogger @@ -109,6 +114,14 @@ def wrap(self, *args, **kwargs): def spyable(name: str = None, methods: SpyableMethods = None, deep_level: int = None): def decorator(clas): + + if 'NO_SPIES' in globals() and globals()['NO_SPIES']: + # Since spylog consumes resources, benchmarking tests need to be + # able to not have spyables, so they set a module global `NO_SPIES`, + # it's their responsibility to unset it + logger.info('NOT USING SPIES ON METHODS AS THEY ARE EXPLICITLY DISABLED') + return clas + nonlocal name name = name if name else "Spyable" + clas.__name__ diff --git a/plenum/test/view_change/conftest.py b/plenum/test/view_change/conftest.py index cf35163716..21c8a3d1e8 100644 --- a/plenum/test/view_change/conftest.py +++ b/plenum/test/view_change/conftest.py @@ -1,11 +1,5 @@ import pytest -from plenum.common.util import adict -from plenum.test.delayers import delayNonPrimaries -from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, \ - waitForViewChange -from plenum.test.test_node import ensureElectionsDone, get_master_primary_node - @pytest.fixture() def viewNo(nodeSet): @@ -16,21 +10,3 @@ def viewNo(nodeSet): return viewNos.pop() -@pytest.fixture() -def simulate_slow_master(nodeSet, looper, up, wallet1, client1, viewNo): - def _(): - m_primary_node = get_master_primary_node(list(nodeSet.nodes.values())) - # Delay processing of PRE-PREPARE from all non primary replicas of master - # so master's performance falls and view changes - delayNonPrimaries(nodeSet, 0, 10) - - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4) - - try: - waitForViewChange(looper, nodeSet, expectedViewNo=viewNo+1) - except AssertionError as e: - raise RuntimeError('view did not change') from e - ensureElectionsDone(looper=looper, nodes=nodeSet) - new_m_primary_node = get_master_primary_node(list(nodeSet.nodes.values())) - return adict(old=m_primary_node, new=new_m_primary_node) - return _ diff --git a/plenum/test/view_change/helper.py b/plenum/test/view_change/helper.py index 7564393b3d..1168faf723 100644 --- a/plenum/test/view_change/helper.py +++ b/plenum/test/view_change/helper.py @@ -1,8 +1,10 @@ import types +from plenum.test.delayers import delayNonPrimaries, delay_3pc_messages, reset_delays_and_process_delayeds from plenum.test.helper import checkViewNoForNodes, sendRandomRequests, \ - sendReqsToNodesAndVerifySuffReplies -from plenum.test.test_node import get_master_primary_node + sendReqsToNodesAndVerifySuffReplies, send_reqs_to_nodes_and_verify_all_replies +from plenum.test.node_catchup.helper import ensure_all_nodes_have_same_data +from plenum.test.test_node import get_master_primary_node, ensureElectionsDone from stp_core.common.log import getlogger from stp_core.loop.eventually import eventually from plenum.test import waits @@ -44,7 +46,17 @@ def provoke_and_wait_for_view_change(looper, timeout=timeout)) -def ensure_view_change(looper, nodes, client, wallet): +def simulate_slow_master(looper, nodeSet, wallet, client, delay=10, num_reqs=4): + m_primary_node = get_master_primary_node(list(nodeSet.nodes.values())) + # Delay processing of PRE-PREPARE from all non primary replicas of master + # so master's performance falls and view changes + delayNonPrimaries(nodeSet, 0, delay) + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, num_reqs) + return m_primary_node + + +def ensure_view_change(looper, nodes, exclude_from_check=None, + custom_timeout=None): """ This method patches the master performance check to return False and thus ensures that all given nodes do a view change @@ -66,10 +78,16 @@ def slow_master(self): node.monitor.isMasterDegraded = types.MethodType(slow_master, node.monitor) - timeout = waits.expectedPoolViewChangeStartedTimeout(len(nodes)) + \ - client.config.PerfCheckFreq - looper.run(eventually(checkViewNoForNodes, nodes, old_view_no+1, + perf_check_freq = next(iter(nodes)).config.PerfCheckFreq + timeout = custom_timeout or waits.expectedPoolViewChangeStartedTimeout(len(nodes)) + \ + perf_check_freq + nodes_to_check = nodes if exclude_from_check is None else [n for n in nodes + if n not in exclude_from_check] + logger.debug('Checking view no for nodes {}'.format(nodes_to_check)) + looper.run(eventually(checkViewNoForNodes, nodes_to_check, old_view_no+1, retryWait=1, timeout=timeout)) + + logger.debug('Patching back perf check for all nodes') for node in nodes: node.monitor.isMasterDegraded = old_meths[node.name] return old_view_no + 1 @@ -100,7 +118,7 @@ def check_each_node_reaches_same_end_for_view(nodes, view_no): def do_vc(looper, nodes, client, wallet, old_view_no=None): sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5) - new_view_no = ensure_view_change(looper, nodes, client, wallet) + new_view_no = ensure_view_change(looper, nodes) if old_view_no: assert new_view_no - old_view_no >= 1 return new_view_no @@ -112,3 +130,57 @@ def disconnect_master_primary(nodes): if node != pr_node: node.nodestack.getRemote(pr_node.nodestack.name).disconnect() return pr_node + + +def check_replica_queue_empty(node): + replica = node.replicas[0] + + assert len(replica.prePrepares) == 0 + assert len(replica.prePreparesPendingFinReqs) == 0 + assert len(replica.prepares) == 0 + assert len(replica.sentPrePrepares) == 0 + assert len(replica.batches) == 0 + assert len(replica.commits) == 0 + assert len(replica.commitsWaitingForPrepare) == 0 + assert len(replica.ordered) == 0 + + +def check_all_replica_queue_empty(nodes): + for node in nodes: + check_replica_queue_empty(node) + + +def view_change_in_between_3pc(looper, nodes, slow_nodes, wallet, client, + slow_delay=1, wait=None): + send_reqs_to_nodes_and_verify_all_replies(looper, wallet, client, 4) + delay_3pc_messages(slow_nodes, 0, delay=slow_delay) + + sendRandomRequests(wallet, client, 10) + if wait: + looper.runFor(wait) + + ensure_view_change(looper, nodes) + ensureElectionsDone(looper=looper, nodes=nodes, customTimeout=60) + ensure_all_nodes_have_same_data(looper, nodes=nodes) + + reset_delays_and_process_delayeds(slow_nodes) + + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5, total_timeout=30) + send_reqs_to_nodes_and_verify_all_replies(looper, wallet, client, 5, total_timeout=30) + + +def view_change_in_between_3pc_random_delays(looper, nodes, slow_nodes, wallet, client, + min_delay=0, max_delay=5): + send_reqs_to_nodes_and_verify_all_replies(looper, wallet, client, 4) + + delay_3pc_messages(slow_nodes, 0, min_delay=min_delay, max_delay=max_delay) + + sendRandomRequests(wallet, client, 10) + + ensure_view_change(looper, nodes) + ensureElectionsDone(looper=looper, nodes=nodes) + ensure_all_nodes_have_same_data(looper, nodes=nodes) + + reset_delays_and_process_delayeds(slow_nodes) + + send_reqs_to_nodes_and_verify_all_replies(looper, wallet, client, 10) diff --git a/plenum/test/view_change/slow_nodes/__init__.py b/plenum/test/view_change/slow_nodes/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/plenum/test/view_change/slow_nodes/conftest.py b/plenum/test/view_change/slow_nodes/conftest.py new file mode 100644 index 0000000000..cf8b254eaf --- /dev/null +++ b/plenum/test/view_change/slow_nodes/conftest.py @@ -0,0 +1,8 @@ +import pytest + +@pytest.fixture(scope="module") +def client(looper, txnPoolNodeSet, client1, client1Connected): + return client1Connected + + + diff --git a/plenum/test/view_change/slow_nodes/test_view_change_2_of_4_nodes_with_new_primary.py b/plenum/test/view_change/slow_nodes/test_view_change_2_of_4_nodes_with_new_primary.py new file mode 100644 index 0000000000..fdf5ba29d0 --- /dev/null +++ b/plenum/test/view_change/slow_nodes/test_view_change_2_of_4_nodes_with_new_primary.py @@ -0,0 +1,38 @@ +from plenum.test.test_node import get_last_master_non_primary_node, get_first_master_non_primary_node +from plenum.test.view_change.helper import view_change_in_between_3pc +from plenum.test.pool_transactions.conftest import clientAndWallet1, \ + client1, wallet1, client1Connected, looper + + +def slow_nodes(node_set): + return [get_first_master_non_primary_node(node_set), + get_last_master_non_primary_node(node_set)] + + +def test_view_change_in_between_3pc_2_of_4_nodes_with_new_primary( + txnPoolNodeSet, looper, wallet1, client): + """ + - Slow processing 3PC messages for 2 of 4 node (2>f) + - Slow the the first and the last non-primary node + (the first will be primary and the last one will not be the next primary + because of round-robin). + - do view change + """ + view_change_in_between_3pc(looper, txnPoolNodeSet, + slow_nodes(txnPoolNodeSet), + wallet1, client) + + +def test_view_change_in_between_3pc_2_of_4_nodes_with_new_primary_long_delay( + txnPoolNodeSet, looper, wallet1, client): + """ + - Slow processing 3PC messages for 2 of 4 node (2>f) + - Slow the the first and the last non-primary node + (the first will be primary and the last one will not be the next primary + because of round-robin). + - do view change + """ + view_change_in_between_3pc(looper, txnPoolNodeSet, + slow_nodes(txnPoolNodeSet), + wallet1, client, + slow_delay=20) \ No newline at end of file diff --git a/plenum/test/view_change/slow_nodes/test_view_change_2_of_4_nodes_with_non_primary.py b/plenum/test/view_change/slow_nodes/test_view_change_2_of_4_nodes_with_non_primary.py new file mode 100644 index 0000000000..9792150052 --- /dev/null +++ b/plenum/test/view_change/slow_nodes/test_view_change_2_of_4_nodes_with_non_primary.py @@ -0,0 +1,33 @@ +from plenum.test.test_node import getNonPrimaryReplicas +from plenum.test.view_change.helper import view_change_in_between_3pc +from plenum.test.pool_transactions.conftest import clientAndWallet1, \ + client1, wallet1, client1Connected, looper + + +def slow_nodes(node_set): + return [replica.node for replica in getNonPrimaryReplicas(node_set)[1:]] + + +def test_view_change_in_between_3pc_2_of_4_nodes_with_non_primary( + txnPoolNodeSet, looper, wallet1, client): + """ + - Slow processing 3PC messages for 2 of 4 node (2>f). + - Both nodes are non-primary for master neither in this nor the next view + - do view change + """ + view_change_in_between_3pc(looper, txnPoolNodeSet, + slow_nodes(txnPoolNodeSet), + wallet1, client) + + +def test_view_change_in_between_3pc_2_of_4_nodes_with_non_primary_long_delay( + txnPoolNodeSet, looper, wallet1, client): + """ + - Slow processing 3PC messages for 2 of 4 node (2>f). + - Both nodes are non-primary for master neither in this nor the next view + - do view change + """ + view_change_in_between_3pc(looper, txnPoolNodeSet, + slow_nodes(txnPoolNodeSet), + wallet1, client, + slow_delay=20) \ No newline at end of file diff --git a/plenum/test/view_change/slow_nodes/test_view_change_2_of_4_nodes_with_old_and_new_primary.py b/plenum/test/view_change/slow_nodes/test_view_change_2_of_4_nodes_with_old_and_new_primary.py new file mode 100644 index 0000000000..31af7118c7 --- /dev/null +++ b/plenum/test/view_change/slow_nodes/test_view_change_2_of_4_nodes_with_old_and_new_primary.py @@ -0,0 +1,34 @@ +from plenum.test.test_node import get_master_primary_node, get_first_master_non_primary_node +from plenum.test.view_change.helper import view_change_in_between_3pc +from plenum.test.pool_transactions.conftest import clientAndWallet1, \ + client1, wallet1, client1Connected, looper + + +def slow_nodes(node_set): + return [get_master_primary_node(node_set), + get_first_master_non_primary_node(node_set)] + + +def test_view_change_in_between_3pc_2_of_4_nodes_with_old_and_new_primary( + txnPoolNodeSet, looper, wallet1, client): + """ + - Slow processing 3PC messages for 2 of 4 node (2>f) + - Slow both current and next primaries + - do view change + """ + view_change_in_between_3pc(looper, txnPoolNodeSet, + slow_nodes(txnPoolNodeSet), + wallet1, client) + + +def test_view_change_in_between_3pc_2_of_4_nodes_with_old_and_new_primary_long_delay( + txnPoolNodeSet, looper, wallet1, client): + """ + - Slow processing 3PC messages for 2 of 4 node (2>f) + - Slow both current and next primaries + - do view change + """ + view_change_in_between_3pc(looper, txnPoolNodeSet, + slow_nodes(txnPoolNodeSet), + wallet1, client, + slow_delay=20) diff --git a/plenum/test/view_change/slow_nodes/test_view_change_2_of_4_nodes_with_old_primary.py b/plenum/test/view_change/slow_nodes/test_view_change_2_of_4_nodes_with_old_primary.py new file mode 100644 index 0000000000..df32ed292a --- /dev/null +++ b/plenum/test/view_change/slow_nodes/test_view_change_2_of_4_nodes_with_old_primary.py @@ -0,0 +1,36 @@ +from plenum.test.test_node import get_master_primary_node, get_last_master_non_primary_node +from plenum.test.view_change.helper import view_change_in_between_3pc +from plenum.test.pool_transactions.conftest import clientAndWallet1, \ + client1, wallet1, client1Connected, looper + + +def slow_nodes(node_set): + return [get_master_primary_node(node_set), + get_last_master_non_primary_node(node_set)] + + +def test_view_change_in_between_3pc_2_of_4_nodes_with_old_primary( + txnPoolNodeSet, looper, wallet1, client): + """ + - Slow processing 3PC messages for 2 of 4 node (2>f) + - Slow the current Primary node and the last non-primary node (it will not + be the next primary because of round-robin). + - do view change + """ + view_change_in_between_3pc(looper, txnPoolNodeSet, + slow_nodes(txnPoolNodeSet), + wallet1, client) + + +def test_view_change_in_between_3pc_2_of_4_nodes_with_old_primary_long_delay( + txnPoolNodeSet, looper, wallet1, client): + """ + - Slow processing 3PC messages for 2 of 4 node (2>f) + - Slow the current Primary node and the last non-primary node (it will not + be the next primary because of round-robin). + - do view change + """ + view_change_in_between_3pc(looper, txnPoolNodeSet, + slow_nodes(txnPoolNodeSet), + wallet1, client, + slow_delay=20) \ No newline at end of file diff --git a/plenum/test/view_change/slow_nodes/test_view_change_all_nodes.py b/plenum/test/view_change/slow_nodes/test_view_change_all_nodes.py new file mode 100644 index 0000000000..8c9a712cde --- /dev/null +++ b/plenum/test/view_change/slow_nodes/test_view_change_all_nodes.py @@ -0,0 +1,25 @@ +from plenum.test.view_change.helper import view_change_in_between_3pc +from plenum.test.pool_transactions.conftest import clientAndWallet1, \ + client1, wallet1, client1Connected, looper + + +def test_view_change_in_between_3pc_all_nodes(txnPoolNodeSet, looper, + wallet1, client): + """ + - Slow processing 3PC messages for all nodes + - do view change + """ + view_change_in_between_3pc(looper, txnPoolNodeSet, txnPoolNodeSet, wallet1, + client) + + +def test_view_change_in_between_3pc_all_nodes_long_delay(txnPoolNodeSet, looper, + wallet1, client): + """ + - Slow processing 3PC messages for all nodes + - do view change + """ + view_change_in_between_3pc(looper, txnPoolNodeSet, + txnPoolNodeSet, + wallet1, client, + slow_delay=20) \ No newline at end of file diff --git a/plenum/test/view_change/slow_nodes/test_view_change_all_nodes_random_delay.py b/plenum/test/view_change/slow_nodes/test_view_change_all_nodes_random_delay.py new file mode 100644 index 0000000000..eda6dab7bf --- /dev/null +++ b/plenum/test/view_change/slow_nodes/test_view_change_all_nodes_random_delay.py @@ -0,0 +1,31 @@ +from plenum.test.view_change.helper import view_change_in_between_3pc_random_delays +from plenum.test.pool_transactions.conftest import clientAndWallet1, \ + client1, wallet1, client1Connected, looper + + +TestRunningTimeLimitSec = 300 + + +def test_view_change_in_between_3pc_all_nodes_random_delays(txnPoolNodeSet, + looper, wallet1, + client): + """ + - Slow processing 3PC messages for all nodes randomly + - do view change + """ + view_change_in_between_3pc_random_delays(looper, txnPoolNodeSet, + txnPoolNodeSet, wallet1, client) + + +def test_view_change_in_between_3pc_all_nodes_random_delays_long_delay(txnPoolNodeSet, + looper, wallet1, + client): + """ + - Slow processing 3PC messages for all nodes randomly + - do view change + """ + view_change_in_between_3pc_random_delays(looper, txnPoolNodeSet, + txnPoolNodeSet, + wallet1, client, + min_delay=5, + max_delay=30) \ No newline at end of file diff --git a/plenum/test/view_change/slow_nodes/test_view_change_complex.py b/plenum/test/view_change/slow_nodes/test_view_change_complex.py new file mode 100644 index 0000000000..87a251b8e3 --- /dev/null +++ b/plenum/test/view_change/slow_nodes/test_view_change_complex.py @@ -0,0 +1,46 @@ +from plenum.test.view_change.helper import \ + view_change_in_between_3pc_random_delays +from plenum.test.pool_transactions.conftest import clientAndWallet1, \ + client1, wallet1, client1Connected, looper + + +TestRunningTimeLimitSec = 300 + + +def test_view_change_complex( + txnPoolNodeSet, looper, wallet1, client): + """ + - Complex scenario with multiple view changes + """ + + # # 1. check if 2 of 4 nodes (non-primary) are slowed + # slow_nodes = [replica.node for replica in getNonPrimaryReplicas(txnPoolNodeSet)[1:]] + # view_change_in_between_3pc(looper, txnPoolNodeSet, slow_nodes, wallet1, + # client) + # + # # 2. check if 2 of 4 nodes (including old primary) are slowed + # slow_nodes = [get_master_primary_node(txnPoolNodeSet), + # get_last_master_non_primary_node(txnPoolNodeSet)] + # view_change_in_between_3pc(looper, txnPoolNodeSet, slow_nodes, wallet1, + # client) + # + # # 3. check if 2 of 4 nodes (including new primary) are slowed + # slow_nodes = [get_first_master_non_primary_node(txnPoolNodeSet), + # get_last_master_non_primary_node(txnPoolNodeSet)] + # view_change_in_between_3pc(looper, txnPoolNodeSet, slow_nodes, wallet1, + # client) + # + # # 4. check if all nodes are slowed + # view_change_in_between_3pc(looper, txnPoolNodeSet, txnPoolNodeSet, wallet1, + # client) + + view_change_in_between_3pc_random_delays(looper, txnPoolNodeSet, + txnPoolNodeSet, wallet1, client, + min_delay=0, max_delay=10) + view_change_in_between_3pc_random_delays(looper, txnPoolNodeSet, + txnPoolNodeSet, wallet1, client, + min_delay=1, max_delay=5) + view_change_in_between_3pc_random_delays(looper, txnPoolNodeSet, + txnPoolNodeSet, wallet1, client, + min_delay=5, max_delay=50) + diff --git a/plenum/test/view_change/test_3pc_msgs_during_view_change.py b/plenum/test/view_change/test_3pc_msgs_during_view_change.py new file mode 100644 index 0000000000..7e11bc9da4 --- /dev/null +++ b/plenum/test/view_change/test_3pc_msgs_during_view_change.py @@ -0,0 +1,66 @@ +import pytest +from plenum.test.delayers import ppgDelay +from plenum.test.helper import sendRandomRequests, waitRejectFromPoolWithReason, send_pre_prepare, send_prepare, \ + send_commit, waitForSufficientRepliesForRequests +from plenum.test.test_node import getPrimaryReplica +from plenum.test.view_change.helper import check_replica_queue_empty, check_all_replica_queue_empty + + +@pytest.mark.skip('Currently we stash client requests during view change') +def test_no_requests_processed_during_view_change(looper, nodeSet, + client1, wallet1): + for node in nodeSet: + node.view_change_in_progress = True + + sendRandomRequests(wallet1, client1, 10) + + waitRejectFromPoolWithReason(looper, nodeSet, client1, 'Can not process requests when view change is in progress') + + for node in nodeSet: + check_replica_queue_empty(node) + + +@pytest.mark.skip('The filter is not enabled now') +def test_no_new_view_3pc_messages_processed_during_view_change(looper, nodeSet, + client1, wallet1): + for node in nodeSet: + node.view_change_in_progress = True + + new_view_no = getPrimaryReplica(nodeSet).node.viewNo + 1 + pp_seq_no = 1 + + send_pre_prepare(new_view_no, pp_seq_no, wallet1, nodeSet) + looper.runFor(1) + check_all_replica_queue_empty(nodeSet) + + send_prepare(new_view_no, pp_seq_no, nodeSet) + looper.runFor(1) + check_all_replica_queue_empty(nodeSet) + + send_commit(new_view_no, pp_seq_no, nodeSet) + looper.runFor(1) + check_all_replica_queue_empty(nodeSet) + +@pytest.mark.skip('The filter is not enabled now') +def test_old_view_requests_processed_during_view_change(looper, nodeSet, + client1, wallet1): + """ + Make sure that requests sent before view change started are processed and replies are returned: + - delay Propogates (to make sure that requests are not ordered before view change is started) + - send requests + - check that requests are ordered despite of view change being in progress + """ + for node in nodeSet: + node.view_change_in_progress = False + node.nodeIbStasher.delay(ppgDelay(3, 0)) + + + reqs = sendRandomRequests(wallet1, client1, 2) + looper.runFor(1) + + for node in nodeSet: + node.view_change_in_progress = True + + waitForSufficientRepliesForRequests(looper, client1, requests=reqs) + + diff --git a/plenum/test/view_change/test_disable_view_change.py b/plenum/test/view_change/test_disable_view_change.py index c89ef14ad8..4bb0a36d2a 100644 --- a/plenum/test/view_change/test_disable_view_change.py +++ b/plenum/test/view_change/test_disable_view_change.py @@ -1,4 +1,6 @@ import pytest +from plenum.test.helper import waitForViewChange +from plenum.test.view_change.helper import simulate_slow_master @pytest.fixture(scope="module") @@ -7,11 +9,13 @@ def disable_view_change_config(tconf): return tconf -def test_disable_view_change(disable_view_change_config, simulate_slow_master): +def test_disable_view_change(disable_view_change_config, looper, nodeSet, up, viewNo, + wallet1, client1): assert disable_view_change_config assert isinstance(disable_view_change_config.unsafe, set) assert 'disable_view_change' in disable_view_change_config.unsafe - with pytest.raises(RuntimeError) as e_info: - simulate_slow_master() - assert e_info.value.args == ('view did not change',) + simulate_slow_master(looper, nodeSet, wallet1, client1) + + with pytest.raises(AssertionError): + waitForViewChange(looper, nodeSet, expectedViewNo=viewNo + 1) diff --git a/plenum/test/view_change/test_elections_after_view_change.py b/plenum/test/view_change/test_elections_after_view_change.py deleted file mode 100644 index 95a93a39ec..0000000000 --- a/plenum/test/view_change/test_elections_after_view_change.py +++ /dev/null @@ -1,40 +0,0 @@ -from functools import partial - -from stp_core.loop.eventually import eventually -from stp_core.loop.looper import Looper -from plenum.test import waits -from plenum.test.delayers import ppDelay -from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies -from plenum.test.test_node import TestNodeSet, getNonPrimaryReplicas, \ - checkProtocolInstanceSetup, checkViewChangeInitiatedForNode - -nodeCount = 7 - - -# noinspection PyIncorrectDocstring -def testElectionsAfterViewChange(delayed_perf_chk, looper: Looper, - nodeSet: TestNodeSet, up, wallet1, client1): - """ - Test that a primary election does happen after a view change - """ - - # Delay processing of PRE-PREPARE from all non primary replicas of master - # so master's throughput falls - # and view changes - delay = 10 - nonPrimReps = getNonPrimaryReplicas(nodeSet, 0) - for r in nonPrimReps: - r.node.nodeIbStasher.delay(ppDelay(delay, 0)) - - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4) - - # Ensure view change happened for both node and its primary elector - timeout = waits.expectedPoolViewChangeStartedTimeout(len(nodeSet)) - for node in nodeSet: - looper.run(eventually(partial(checkViewChangeInitiatedForNode, node, 1), - retryWait=1, timeout=timeout)) - - # Ensure elections are done again and pool is setup again with appropriate - # protocol instances and each protocol instance is setup properly too - timeout = waits.expectedPoolElectionTimeout(len(nodeSet)) + delay - checkProtocolInstanceSetup(looper, nodeSet, retryWait=1, customTimeout=timeout) diff --git a/plenum/test/view_change/test_inst_chng_msg_throttling.py b/plenum/test/view_change/test_inst_chng_msg_throttling.py index c0c4a8d9c5..eb5a966bf0 100644 --- a/plenum/test/view_change/test_inst_chng_msg_throttling.py +++ b/plenum/test/view_change/test_inst_chng_msg_throttling.py @@ -4,7 +4,7 @@ def testInstChngMsgThrottling(nodeSet, looper, up, viewNo): """ 2 nodes out of 4 keep on sending INSTANCE_CHANGE messages as they - find the master to be slow but since we need 3 out of 4 (2f+1) to say that + find the master to be slow but since we need 3 out of 4 (n-f) to say that master is slow for a view change to happen, a view change does not happen but the nodes finding the master to be slow should not send INSTANCE_CHANGE messages to often. So nodes should throttle sending INSTANCE_CHANGE messages diff --git a/plenum/test/view_change/test_instance_change_msg_checking.py b/plenum/test/view_change/test_instance_change_msg_checking.py index 838fab5b58..f07418f092 100644 --- a/plenum/test/view_change/test_instance_change_msg_checking.py +++ b/plenum/test/view_change/test_instance_change_msg_checking.py @@ -1,9 +1,11 @@ +import re + from stp_core.loop.eventually import eventually -from plenum.common.types import InstanceChange +from plenum.common.messages.node_messages import InstanceChange from plenum.test import waits from plenum.test.test_node import TestNode -DISCARD_REASON = "validation error: expected types" +DISCARD_REASON = "validation error \[InstanceChange\]: expected types" whitelist = [DISCARD_REASON,] @@ -31,7 +33,7 @@ def createInstanceChangeMessage(): params = nodeB.spylog.getLastParams(TestNode.discard) def chk(): - assert DISCARD_REASON in str(params['reason']) + assert re.search(DISCARD_REASON, str(params['reason'])) timeout = waits.expectedNodeToNodeMessageDeliveryTime() looper.run(eventually(chk, timeout=timeout)) diff --git a/plenum/test/view_change/test_master_primary_different_from_previous.py b/plenum/test/view_change/test_master_primary_different_from_previous.py index 65b2f9c4f6..d460c83e39 100644 --- a/plenum/test/view_change/test_master_primary_different_from_previous.py +++ b/plenum/test/view_change/test_master_primary_different_from_previous.py @@ -8,13 +8,12 @@ from plenum.test.test_node import getPrimaryReplica, ensureElectionsDone from plenum.test.pool_transactions.conftest import clientAndWallet1, client1, \ wallet1, client1Connected, looper -from plenum.test.view_change.helper import provoke_and_wait_for_view_change +from plenum.test.view_change.helper import provoke_and_wait_for_view_change, ensure_view_change from stp_core.common.log import getlogger logger = getlogger() -@pytest.mark.skip(reason='SOV-1020') def test_master_primary_different_from_previous(txnPoolNodeSet, looper, client1, wallet1, client1Connected): @@ -23,19 +22,16 @@ def test_master_primary_different_from_previous(txnPoolNodeSet, master instance, it does not matter for other instance. The primary is benign and does not vote for itself. """ - old_view_no = checkViewNoForNodes(txnPoolNodeSet) pr = slow_primary(txnPoolNodeSet, 0, delay=10) old_pr_node_name = pr.node.name # View change happens - provoke_and_wait_for_view_change(looper, - txnPoolNodeSet, - old_view_no + 1, - wallet1, - client1) + ensure_view_change(looper, txnPoolNodeSet) logger.debug("VIEW HAS BEEN CHANGED!") + # Elections done ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) + # New primary is not same as old primary assert getPrimaryReplica(txnPoolNodeSet, 0).node.name != old_pr_node_name @@ -46,7 +42,7 @@ def test_master_primary_different_from_previous(txnPoolNodeSet, -@pytest.mark.skip(reason='SOV-1020') +@pytest.mark.skip(reason='Nodes use round robin primary selection') def test_master_primary_different_from_previous_view_for_itself(txnPoolNodeSet, looper, client1, wallet1, client1Connected): diff --git a/plenum/test/view_change/test_node_detecting_lag_from_view_change_messages.py b/plenum/test/view_change/test_node_detecting_lag_from_view_change_messages.py new file mode 100644 index 0000000000..4b29e28143 --- /dev/null +++ b/plenum/test/view_change/test_node_detecting_lag_from_view_change_messages.py @@ -0,0 +1,115 @@ +import types + +import pytest + +from plenum.common.util import compare_3PC_keys +from plenum.test.delayers import delay_3pc_messages, icDelay, cDelay +from plenum.test.helper import send_reqs_batches_and_get_suff_replies, \ + sendRandomRequests +from plenum.test.node_catchup.helper import ensure_all_nodes_have_same_data +from plenum.test.spy_helpers import get_count +from plenum.test.test_node import getNonPrimaryReplicas +from plenum.test.view_change.helper import ensure_view_change +from stp_core.loop.eventually import eventually +from plenum.test.pool_transactions.conftest import clientAndWallet1, \ + client1, wallet1, client1Connected, looper + + +TestRunningTimeLimitSec = 150 + + +@pytest.mark.skip(reason='Pending complete implementation') +def test_node_detecting_lag_from_view_change_done_messages(txnPoolNodeSet, + looper, wallet1, + client1, + client1Connected, + tconf): + """ + A node is slow and after view change starts, it marks it's `last_prepared` + to less than others, after catchup it does not get any txns from others + and finds it has already ordered it's `last_prepared`, but when + it gets ViewChangeDone messages, it starts catchup again and this + time gets the txns. To achieve this delay all 3PC messages to a node so + before view change it has different last_prepared from others. + Also delay processing of COMMITs and INSTANCE_CHANGEs by other nodes + """ + send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, 2 * 3, 3) + ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) + + slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node + fast_nodes = [n for n in txnPoolNodeSet if n != slow_node] + slow_master_replica = slow_node.master_replica + fast_master_replicas = [n.master_replica for n in fast_nodes] + + delay_3pc = 50 + delay_ic = tconf.PerfCheckFreq + 5 + delay_commit = delay_ic + 10 + delay_3pc_messages([slow_node], 0, delay_3pc) + for n in fast_nodes: + n.nodeIbStasher.delay(icDelay(delay_ic)) + n.nodeIbStasher.delay(cDelay(delay_commit)) + + reqs = [] + for i in range(10): + reqs = reqs + sendRandomRequests(wallet1, client1, 2) + looper.runFor(.2) + + def chk1(): + for rep in fast_master_replicas: + assert compare_3PC_keys( + slow_master_replica.last_prepared_certificate_in_view(), + rep.last_prepared_certificate_in_view()) > 0 + assert slow_master_replica.last_ordered_3pc == rep.last_ordered_3pc + + looper.run(eventually(chk1)) + + no_more_catchup_count = get_count(slow_node, + slow_node.no_more_catchups_needed) + + # Track last prepared for master replica of each node + prepareds = {} + orig_methods = {} + for node in txnPoolNodeSet: + orig_methods[node.name] = node.master_replica.on_view_change_start + + def patched_on_view_change_start(self): + orig_methods[self.node.name]() + prepareds[self.node.name] = self.last_prepared_before_view_change + + node.master_replica.on_view_change_start = types.MethodType( + patched_on_view_change_start, node.master_replica) + + ensure_view_change(looper, txnPoolNodeSet, exclude_from_check=fast_nodes) + + def chk2(): + # last_prepared of slow_node is less than fast_nodes + for rep in fast_master_replicas: + assert compare_3PC_keys(prepareds[slow_master_replica.node.name], + prepareds[rep.node.name]) > 0 + + looper.run(eventually(chk2, timeout=delay_ic+5)) + + last_start_catchup_call_at = None + no_more_catchup_call_at = None + + def chk3(): + # no_more_catchups_needed was called since node found no need of catchup + nonlocal last_start_catchup_call_at, no_more_catchup_call_at + assert (get_count(slow_node, slow_node.no_more_catchups_needed) + - no_more_catchup_count) > 0 + + no_more_catchup_call_at = slow_node.spylog.getLast( + slow_node.no_more_catchups_needed).starttime + last_start_catchup_call_at = slow_node.spylog.getLast( + slow_node.start_catchup).starttime + + looper.run(eventually(chk3, timeout=delay_commit)) + + for n in fast_nodes: + n.nodeIbStasher.reset_delays_and_process_delayeds() + n.nodeIbStasher.reset_delays_and_process_delayeds() + + ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) + + assert slow_node.spylog.getLast(slow_node.start_catchup).starttime > no_more_catchup_call_at + assert slow_node.spylog.getLast(slow_node.start_catchup).starttime > last_start_catchup_call_at diff --git a/plenum/test/view_change/test_pp_seq_no_starts_from_1.py b/plenum/test/view_change/test_pp_seq_no_starts_from_1.py new file mode 100644 index 0000000000..a278c67556 --- /dev/null +++ b/plenum/test/view_change/test_pp_seq_no_starts_from_1.py @@ -0,0 +1,45 @@ +import pytest +from plenum.test.helper import checkViewNoForNodes, \ + sendReqsToNodesAndVerifySuffReplies +from plenum.test.pool_transactions.conftest import clientAndWallet1, \ + client1, wallet1, client1Connected, looper +from plenum.test.view_change.helper import ensure_view_change + + +# make sure that we send each reqeust individually to count pp_seq_no determenistically +@pytest.fixture(scope="module") +def tconf(tconf, request): + oldSize = tconf.Max3PCBatchSize + tconf.Max3PCBatchSize = 1 + + def reset(): + tconf.Max3PCBatchSize = oldSize + + request.addfinalizer(reset) + return tconf + + +def test_pp_seq_no_starts_from_0_in_new_view(tconf, txnPoolNodeSet, looper, wallet1, + client1, client1Connected): + # This test fails since last ordered pre-prepare sequence number is + old_view_no = checkViewNoForNodes(txnPoolNodeSet) + + def chk(count): + for node in txnPoolNodeSet: + assert node.master_replica.last_ordered_3pc[1] == count + + chk(0) + + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5) + chk(5) + + new_view_no = ensure_view_change(looper, txnPoolNodeSet) + assert new_view_no > old_view_no + chk(5) # no new requests yet, so last ordered 3PC is (0,5) + + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1) + chk(1) # new request for new view => last ordered 3PC is (0,1) + + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5) + chk(6) + diff --git a/plenum/test/view_change/test_queueing_req_from_future_view.py b/plenum/test/view_change/test_queueing_req_from_future_view.py index 02daf3f862..500801a110 100644 --- a/plenum/test/view_change/test_queueing_req_from_future_view.py +++ b/plenum/test/view_change/test_queueing_req_from_future_view.py @@ -2,7 +2,7 @@ import pytest -from plenum.test.view_change.helper import provoke_and_wait_for_view_change +from plenum.test.view_change.helper import provoke_and_wait_for_view_change, ensure_view_change from stp_core.loop.eventually import eventually from stp_core.common.log import getlogger from plenum.common.util import getMaxFailures @@ -11,28 +11,24 @@ from plenum.test.helper import sendRandomRequest, \ sendReqsToNodesAndVerifySuffReplies from plenum.test.test_node import TestReplica, getNonPrimaryReplicas, \ - checkViewChangeInitiatedForNode + checkViewChangeInitiatedForNode, get_last_master_non_primary_node nodeCount = 7 logger = getlogger() -# TODO: This test needs to be implemented # noinspection PyIncorrectDocstring -@pytest.mark.skip(reason='INDY-84. Complete implementation') def testQueueingReqFromFutureView(delayed_perf_chk, looper, nodeSet, up, wallet1, client1): """ Test if every node queues 3 Phase requests(PRE-PREPARE, PREPARE and COMMIT) - that come from a view which is greater than the current view. Slow down - the primary node of master protocol instance, delay reception and - processing of view change message by a non primary for master instance so - that it starts receiving 3 phase commit messages for next view + that come from a view which is greater than the current view. + - Delay reception and processing of view change messages by a non primary for master instance + => it starts receiving 3 phase commit messages for next view """ - nprs = getNonPrimaryReplicas(nodeSet, 0) - lagging_node = nprs[0].node + lagging_node = get_last_master_non_primary_node(nodeSet) old_view_no = lagging_node.viewNo # Delay processing of instance change on a node @@ -40,17 +36,6 @@ def testQueueingReqFromFutureView(delayed_perf_chk, looper, nodeSet, up, lagging_node.nodeIbStasher.delay(icDelay(delay_ic)) logger.debug('{} will delay its view change'.format(lagging_node)) - # Delay processing of PRE-PREPARE from all non primary replicas of master - # so master's throughput falls and view changes - delay_pp = 5 - pp_delayer = ppDelay(delay_pp, 0) - for r in nprs: - r.node.nodeIbStasher.delay(pp_delayer) - - timeout = waits.expectedTransactionExecutionTime(len(nodeSet)) + delay_pp - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5, - customTimeoutPerReq=timeout) - def chk_fut_view(view_no, is_empty): length = len(lagging_node.msgsForFutureViews.get(view_no, ())) if is_empty: @@ -65,65 +50,21 @@ def chk_fut_view(view_no, is_empty): .format(lagging_node)) # Every node except Node A should do a view change - provoke_and_wait_for_view_change(looper, - [n for n in nodeSet if n != lagging_node], - old_view_no + 1, - wallet1, client1) - - for node in nodeSet: - node.nodeIbStasher.nodelay(pp_delayer) + ensure_view_change(looper, + [n for n in nodeSet if n != lagging_node], + [lagging_node]) - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 3, - customTimeoutPerReq=timeout) - - # Messages queued for future view + # send more requests that will be queued for the lagged node + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 3) l = chk_fut_view(old_view_no + 1, is_empty=False) logger.debug('{} has {} messages for future views' .format(lagging_node, l)) + # reset delays for the lagging_node node so that it finally makes view change + lagging_node.reset_delays_and_process_delayeds() + # Eventually no messages queued for future view looper.run(eventually(chk_fut_view, old_view_no + 1, True, retryWait=1, timeout=delay_ic+10)) logger.debug('{} exhausted pending messages for future views' .format(lagging_node)) - - # timeout = waits.expectedPoolViewChangeStartedTimeout(len(nodeSet)-1) - # # for node in nodeSet: - # # if node.name == nodeA.name: - # # # Node A's view should not have changed yet - # # with pytest.raises(AssertionError): - # # looper.run(eventually(partial( - # # checkViewChangeInitiatedForNode, node, 1), - # # retryWait=1, - # # timeout=timeout)) - # # else: - # # looper.run(eventually( - # # partial(checkViewChangeInitiatedForNode, node, 1), - # # retryWait=1, - # # timeout=timeout)) - # - # - # # NodeA should not have any pending 3 phase request for a later view - # for r in nodeA.replicas: # type: TestReplica - # assert len(r.threePhaseMsgsForLaterView) == 0 - # - # # Reset delays on incoming messages from all nodes - # for node in nodeSet: - # node.nodeIbStasher.nodelay(pp_delayer) - # - # # Send one more request - # sendRandomRequest(wallet1, client1) - # - # def checkPending3PhaseReqs(): - # # Get all replicas that have their primary status decided - # reps = [rep for rep in nodeA.replicas if rep.isPrimary is not None] - # # At least one replica should have its primary status decided - # assert len(reps) > 0 - # for r in reps: # type: TestReplica - # logger.debug("primary status for replica {} is {}" - # .format(r, r.primaryNames)) - # assert len(r.threePhaseMsgsForLaterView) > 0 - # - # # NodeA should now have pending 3 phase request for a later view - # timeout = waits.expectedPoolViewChangeStartedTimeout(len(nodeSet)) + delayIcA - # looper.run(eventually(checkPending3PhaseReqs, retryWait=1, timeout=timeout)) diff --git a/plenum/test/view_change/test_view_change.py b/plenum/test/view_change/test_view_change.py index fd67fd0bf9..bd80896bac 100644 --- a/plenum/test/view_change/test_view_change.py +++ b/plenum/test/view_change/test_view_change.py @@ -1,74 +1,56 @@ -import types - -import pytest - -from plenum.server.node import Node -from plenum.test.delayers import delayNonPrimaries -from plenum.test.helper import waitForViewChange, \ - sendReqsToNodesAndVerifySuffReplies -from plenum.test.test_node import get_master_primary_node, getPrimaryReplica, \ - ensureElectionsDone +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies +from plenum.test.node_catchup.helper import ensure_all_nodes_have_same_data +from plenum.test.spy_helpers import get_count +from plenum.test.test_node import ensureElectionsDone +from plenum.test.view_change.helper import ensure_view_change nodeCount = 7 -@pytest.fixture() -def viewChangeDone(simulate_slow_master): - primary_node = simulate_slow_master() - assert primary_node.old.name != primary_node.new.name - - # noinspection PyIncorrectDocstring -def testViewChange(viewChangeDone): +def test_view_change_on_empty_ledger(nodeSet, up, looper): + """ - Test that a view change is done when the performance of master goes down - Send multiple requests from the client and delay some requests by master - instance so that there is a view change. All nodes will agree that master - performance degraded + Check that view change is done when no txns in the ldegr """ - pass + ensure_view_change(looper, nodeSet) + ensureElectionsDone(looper=looper, nodes=nodeSet) + ensure_all_nodes_have_same_data(looper, nodes=nodeSet) -def testViewChangeCase1(nodeSet, looper, up, wallet1, client1, viewNo): +# noinspection PyIncorrectDocstring +def test_view_change_after_some_txns(looper, nodeSet, up, viewNo, + wallet1, client1): """ - Node will change view even though it does not find the master to be degraded - when a quorum of nodes agree that master performance degraded + Check that view change is done after processing some of txns """ + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4) - m_primary_node = get_master_primary_node(list(nodeSet.nodes.values())) - - # Delay processing of PRE-PREPARE from all non primary replicas of master - # so master's performance falls and view changes - delayNonPrimaries(nodeSet, 0, 10) - - pr = getPrimaryReplica(nodeSet, 0) - relucatantNode = pr.node - - # Count sent instance changes of all nodes - sentInstChanges = {} - instChngMethodName = Node.sendInstanceChange.__name__ - for n in nodeSet: - sentInstChanges[n.name] = n.spylog.count(instChngMethodName) + ensure_view_change(looper, nodeSet) + ensureElectionsDone(looper=looper, nodes=nodeSet) + ensure_all_nodes_have_same_data(looper, nodes=nodeSet) - # Node reluctant to change view, never says master is degraded - relucatantNode.monitor.isMasterDegraded = types.MethodType( - lambda x: False, relucatantNode.monitor) +# noinspection PyIncorrectDocstring +def test_send_more_after_view_change(looper, nodeSet, up, + wallet1, client1): + """ + Check that we can send more requests after view change + """ sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4) - # Check that view change happened for all nodes - waitForViewChange(looper, nodeSet, expectedViewNo=viewNo+1) + ensure_view_change(looper, nodeSet) + ensureElectionsDone(looper=looper, nodes=nodeSet) + ensure_all_nodes_have_same_data(looper, nodes=nodeSet) + + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 10) - # All nodes except the reluctant node should have sent a view change and - # thus must have called `sendInstanceChange` - for n in nodeSet: - if n.name != relucatantNode.name: - assert n.spylog.count(instChngMethodName) > \ - sentInstChanges.get(n.name, 0) - else: - assert n.spylog.count(instChngMethodName) == \ - sentInstChanges.get(n.name, 0) +def test_node_notified_about_primary_election_result(nodeSet, looper, up): + old_counts = {node.name: get_count(node, node.primary_selected) for node in nodeSet} + ensure_view_change(looper, nodeSet) ensureElectionsDone(looper=looper, nodes=nodeSet) - new_m_primary_node = get_master_primary_node(list(nodeSet.nodes.values())) - assert m_primary_node.name != new_m_primary_node.name + ensure_all_nodes_have_same_data(looper, nodes=nodeSet) + + for node in nodeSet: + assert get_count(node, node.primary_selected) > old_counts[node.name] diff --git a/plenum/test/view_change/test_view_change_done_delayed.py b/plenum/test/view_change/test_view_change_done_delayed.py new file mode 100644 index 0000000000..818b5c71bc --- /dev/null +++ b/plenum/test/view_change/test_view_change_done_delayed.py @@ -0,0 +1,66 @@ +from plenum.test.delayers import delay_3pc_messages, vcd_delay +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, \ + send_reqs_batches_and_get_suff_replies +from plenum.test.node_catchup.helper import waitNodeDataEquality, \ + ensure_all_nodes_have_same_data +from plenum.test.pool_transactions.conftest import clientAndWallet1, \ + client1, wallet1, client1Connected, looper +from plenum.test.test_node import getNonPrimaryReplicas +from plenum.test.view_change.helper import ensure_view_change +from stp_core.loop.eventually import eventually + + +def test_view_change_done_delayed(txnPoolNodeSet, looper, wallet1, client1, + client1Connected): + """ + A node is slow so is behind other nodes, after view change, it catches up + but it also gets view change message as delayed, a node should start + participating only when caught up and ViewChangeCone quorum received. + """ + nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)] + slow_node = nprs[-1] + other_nodes = [n for n in txnPoolNodeSet if n != slow_node] + delay_3pc = 10 + delay_vcd = 25 + delay_3pc_messages([slow_node], 0, delay_3pc) + slow_node.nodeIbStasher.delay(vcd_delay(delay_vcd)) + + def chk(node): + assert node.elector.has_acceptable_view_change_quorum + assert node.elector.primary_verified + assert node.isParticipating + assert None not in {r.isPrimary for r in node.replicas} + + send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, 5 * 4, 4) + + ensure_view_change(looper, nodes=txnPoolNodeSet) + + # After view change, the slow node successfully completes catchup + waitNodeDataEquality(looper, slow_node, *other_nodes) + + # Other nodes complete view change, select primary and participate + for node in other_nodes: + looper.run(eventually(chk, node, retryWait=1)) + + # Since `ViewChangeCone` is delayed, slow_node is not able to select primary + # and participate + assert not slow_node.elector.has_acceptable_view_change_quorum + assert not slow_node.elector.primary_verified + assert not slow_node.isParticipating + assert {r.isPrimary for r in slow_node.replicas} == {None} + + # Send requests to make sure pool is functional + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5) + + # Repair network + slow_node.reset_delays_and_process_delayeds() + + # `slow_node` selects primary and participate + looper.run(eventually(chk, slow_node, retryWait=1)) + + # Processes requests received during lack of primary + waitNodeDataEquality(looper, slow_node, *other_nodes) + + # Send more requests and compare data of all nodes + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5) + ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) diff --git a/plenum/test/view_change/test_view_change_happens_post_timeout.py b/plenum/test/view_change/test_view_change_happens_post_timeout.py index e2f5ceed7d..0d85892a93 100644 --- a/plenum/test/view_change/test_view_change_happens_post_timeout.py +++ b/plenum/test/view_change/test_view_change_happens_post_timeout.py @@ -1,8 +1,42 @@ import pytest +from plenum.test.test_node import getNonPrimaryReplicas -@pytest.mark.skip(reason='INDY-101. Not implemented') -def test_view_change_happens_post_timeout(): - # TODO: - # View change should not happen unless the timeout expires - raise NotImplementedError +@pytest.fixture(scope="module") +def tconf(tconf, request): + oldViewChangeWindowSize = tconf.ViewChangeWindowSize + tconf.ViewChangeWindowSize = 5 + + def reset(): + tconf.ViewChangeWindowSize = oldViewChangeWindowSize + + request.addfinalizer(reset) + return tconf + + +def is_instance_change_sent_for_view_no(node, view_no): + return node.instanceChanges.hasView(view_no) + + +def test_instance_change_happens_post_timeout(tconf, looper, nodeSet, up): + non_prim_node = getNonPrimaryReplicas(nodeSet)[0].node + old_view_no = non_prim_node.viewNo + + # first sending on InstanceChange: OK + new_view_no = old_view_no + 1 + assert not is_instance_change_sent_for_view_no(non_prim_node, new_view_no) + non_prim_node.sendInstanceChange(new_view_no) + assert is_instance_change_sent_for_view_no(non_prim_node, new_view_no) + + # second immediate sending on InstanceChange: NOT OK + new_view_no = new_view_no + 1 + assert not is_instance_change_sent_for_view_no(non_prim_node, new_view_no) + non_prim_node.sendInstanceChange(new_view_no) + assert not is_instance_change_sent_for_view_no(non_prim_node, new_view_no) + + # third sending on InstanceChange after ViewChangeWindowSize timepout: OK + new_view_no = new_view_no + 1 + looper.runFor(tconf.ViewChangeWindowSize) + assert not is_instance_change_sent_for_view_no(non_prim_node, new_view_no) + non_prim_node.sendInstanceChange(new_view_no) + assert is_instance_change_sent_for_view_no(non_prim_node, new_view_no) diff --git a/plenum/test/view_change/test_view_change_max_catchup_rounds.py b/plenum/test/view_change/test_view_change_max_catchup_rounds.py new file mode 100644 index 0000000000..ee545996b1 --- /dev/null +++ b/plenum/test/view_change/test_view_change_max_catchup_rounds.py @@ -0,0 +1,67 @@ +from plenum.common.util import check_if_all_equal_in_list +from plenum.test.delayers import pDelay, cDelay +from plenum.test.helper import send_reqs_batches_and_get_suff_replies, \ + sendRandomRequests +from plenum.test.node_catchup.helper import ensure_all_nodes_have_same_data +from plenum.test.pool_transactions.conftest import clientAndWallet1, \ + client1, wallet1, client1Connected, looper +from plenum.test.primary_selection.test_primary_selection_pool_txn import \ + ensure_pool_functional +from plenum.test.test_node import getNonPrimaryReplicas, ensureElectionsDone +from plenum.test.view_change.helper import ensure_view_change + + +def test_view_change_after_max_catchup_rounds(txnPoolNodeSet, looper, wallet1, + client1, client1Connected): + """ + The node should do only a fixed rounds of catchup. For this delay Prepares + and Commits for 2 non-primary nodes by a large amount which is equivalent + to loss of Prepares and Commits. Make sure 2 nodes have a different last + prepared certificate from other two. Then do a view change, make sure view + change completes and the pool does not process the request that were + prepared by only a subset of the nodes + """ + send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, 2 * 3, 3) + ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) + ledger_summary = txnPoolNodeSet[0].elector.ledger_summary + + slow_nodes = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)[-2:]] + fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] + + # Make node slow to process Prepares and Commits + for node in slow_nodes: + node.nodeIbStasher.delay(pDelay(120, 0)) + node.nodeIbStasher.delay(cDelay(120, 0)) + + sendRandomRequests(wallet1, client1, 5) + looper.runFor(3) + + ensure_view_change(looper, nodes=txnPoolNodeSet) + + def last_prepared(nodes): + lst = [n.master_replica.last_prepared_certificate_in_view() for n in nodes] + # All nodes have same last prepared + assert check_if_all_equal_in_list(lst) + return lst[0] + + last_prepared_slow = last_prepared(slow_nodes) + last_prepared_fast = last_prepared(fast_nodes) + + # Check `slow_nodes` and `fast_nodes` set different last_prepared + assert last_prepared_fast != last_prepared_slow + + # View change complete + ensureElectionsDone(looper, txnPoolNodeSet) + ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) + + # The requests which were prepared by only a subset of the nodes were + # not ordered + assert txnPoolNodeSet[0].elector.ledger_summary == ledger_summary + + for node in slow_nodes: + node.nodeIbStasher.reset_delays_and_process_delayeds() + + # Make sure pool is functional + ensure_pool_functional(looper, txnPoolNodeSet, wallet1, client1) + ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) + last_prepared(txnPoolNodeSet) diff --git a/plenum/test/view_change/test_view_change_on_master_degraded.py b/plenum/test/view_change/test_view_change_on_master_degraded.py new file mode 100644 index 0000000000..2d20a57ba5 --- /dev/null +++ b/plenum/test/view_change/test_view_change_on_master_degraded.py @@ -0,0 +1,80 @@ +import types + +import pytest + +from plenum.server.node import Node +from plenum.test.delayers import delayNonPrimaries +from plenum.test.helper import waitForViewChange, \ + sendReqsToNodesAndVerifySuffReplies +from plenum.test.node_catchup.helper import ensure_all_nodes_have_same_data +from plenum.test.test_node import get_master_primary_node, getPrimaryReplica, \ + ensureElectionsDone +from plenum.test.view_change.helper import simulate_slow_master + +nodeCount = 7 + +# noinspection PyIncorrectDocstring +def test_view_change_on_performance_degraded(looper, nodeSet, up, viewNo, + wallet1, client1): + """ + Test that a view change is done when the performance of master goes down + Send multiple requests from the client and delay some requests by master + instance so that there is a view change. All nodes will agree that master + performance degraded + """ + old_primary_node = get_master_primary_node(list(nodeSet.nodes.values())) + + simulate_slow_master(looper, nodeSet, wallet1, client1) + waitForViewChange(looper, nodeSet, expectedViewNo=viewNo + 1) + + ensureElectionsDone(looper=looper, nodes=nodeSet) + ensure_all_nodes_have_same_data(looper, nodes=nodeSet) + new_primary_node = get_master_primary_node(list(nodeSet.nodes.values())) + assert old_primary_node.name != new_primary_node.name + + +def test_view_change_on_quorum_of_master_degraded(nodeSet, looper, up, + wallet1, client1, viewNo): + """ + Node will change view even though it does not find the master to be degraded + when a quorum of nodes agree that master performance degraded + """ + + m_primary_node = get_master_primary_node(list(nodeSet.nodes.values())) + + # Delay processing of PRE-PREPARE from all non primary replicas of master + # so master's performance falls and view changes + delayNonPrimaries(nodeSet, 0, 10) + + pr = getPrimaryReplica(nodeSet, 0) + relucatantNode = pr.node + + # Count sent instance changes of all nodes + sentInstChanges = {} + instChngMethodName = Node.sendInstanceChange.__name__ + for n in nodeSet: + sentInstChanges[n.name] = n.spylog.count(instChngMethodName) + + # Node reluctant to change view, never says master is degraded + relucatantNode.monitor.isMasterDegraded = types.MethodType( + lambda x: False, relucatantNode.monitor) + + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4) + + # Check that view change happened for all nodes + waitForViewChange(looper, nodeSet, expectedViewNo=viewNo+1) + + # All nodes except the reluctant node should have sent a view change and + # thus must have called `sendInstanceChange` + for n in nodeSet: + if n.name != relucatantNode.name: + assert n.spylog.count(instChngMethodName) > \ + sentInstChanges.get(n.name, 0) + else: + assert n.spylog.count(instChngMethodName) == \ + sentInstChanges.get(n.name, 0) + + ensureElectionsDone(looper=looper, nodes=nodeSet) + new_m_primary_node = get_master_primary_node(list(nodeSet.nodes.values())) + assert m_primary_node.name != new_m_primary_node.name + ensure_all_nodes_have_same_data(looper, nodes=nodeSet) diff --git a/plenum/test/view_change/test_view_change_timeout.py b/plenum/test/view_change/test_view_change_timeout.py new file mode 100644 index 0000000000..3877033878 --- /dev/null +++ b/plenum/test/view_change/test_view_change_timeout.py @@ -0,0 +1,73 @@ +import pytest +from plenum.test.delayers import reset_delays_and_process_delayeds, vcd_delay +from plenum.test.helper import waitForViewChange +from plenum.test.node_catchup.helper import ensure_all_nodes_have_same_data +from plenum.test.primary_selection.test_primary_selection_pool_txn import \ + ensure_pool_functional +from plenum.test.spy_helpers import get_count, getAllReturnVals +from plenum.test.test_node import get_master_primary_node, \ + ensureElectionsDone +from plenum.test.view_change.helper import ensure_view_change + +nodeCount = 7 + + +def test_view_change_timeout(nodeSet, looper, up, wallet1, client1): + """ + Check view change restarted if it is not completed in time + """ + + m_primary_node = get_master_primary_node(list(nodeSet.nodes.values())) + initial_view_no = waitForViewChange(looper, nodeSet) + # Setting view change timeout to low value to make test pass quicker + for node in nodeSet: + node._view_change_timeout = 5 + + # Delaying view change messages to make first view change fail + # due to timeout + for node in nodeSet: + node.nodeIbStasher.delay(vcd_delay(delay=50)) + + # Delaying preprepae messages from nodes and + # sending request to force view change + #for i in range(3): + # delayNonPrimaries(nodeSet, instId=i, delay=10) + #sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4) + + times = {} + for node in nodeSet: + times[node.name] = { + 'called': get_count(node, node._check_view_change_completed), + 'returned_true': len(getAllReturnVals( + node, node._check_view_change_completed, compare_val_to=True)) + } + + for node in nodeSet: + node.startViewChange(initial_view_no + 1) + + # First view change should fail, because of delayed + # instance change messages. This then leads to new view change that we need. + with pytest.raises(AssertionError): + ensureElectionsDone(looper=looper, nodes=nodeSet, customTimeout=10) + + # Resetting delays to let second view change go well + reset_delays_and_process_delayeds(nodeSet) + + # This view change should be completed with no problems + ensureElectionsDone(looper=looper, nodes=nodeSet) + ensure_all_nodes_have_same_data(looper, nodes=nodeSet) + new_m_primary_node = get_master_primary_node(list(nodeSet.nodes.values())) + assert m_primary_node.name != new_m_primary_node.name + + # The timeout method has been called at least once + for node in nodeSet: + assert get_count(node, node._check_view_change_completed) > times[node.name]['called'] + assert len(getAllReturnVals(node, + node._check_view_change_completed, + compare_val_to=True)) > times[node.name]['returned_true'] + + # Multiple view changes have been initiated + for node in nodeSet: + assert (node.viewNo - initial_view_no) > 1 + + ensure_pool_functional(looper, nodeSet, wallet1, client1) diff --git a/plenum/test/view_change/test_view_changes_if_master_primary_disconnected.py b/plenum/test/view_change/test_view_changes_if_master_primary_disconnected.py index d4c8104907..48a13f802e 100644 --- a/plenum/test/view_change/test_view_changes_if_master_primary_disconnected.py +++ b/plenum/test/view_change/test_view_changes_if_master_primary_disconnected.py @@ -1,17 +1,14 @@ import pytest +from plenum.test.node_catchup.helper import ensure_all_nodes_have_same_data -from plenum.test.test_node import ensureElectionsDone, \ - primaryNodeNameForInstance, nodeByName, get_master_primary_node, \ - ensure_node_disconnected -from plenum.test import waits +from plenum.test.test_node import get_master_primary_node, ensure_node_disconnected, ensureElectionsDone from stp_core.loop.eventually import eventually from plenum.test.pool_transactions.conftest import clientAndWallet1, \ client1, wallet1, client1Connected, looper from plenum.test.helper import stopNodes, checkViewNoForNodes, \ - sendReqsToNodesAndVerifySuffReplies + sendReqsToNodesAndVerifySuffReplies, waitForViewChange -@pytest.mark.skip(reason='SOV-1020') def testViewChangesIfMasterPrimaryDisconnected(txnPoolNodeSet, looper, wallet1, client1, client1Connected, tconf): @@ -28,18 +25,18 @@ def testViewChangesIfMasterPrimaryDisconnected(txnPoolNodeSet, # Stop primary stopNodes([old_pr_node], looper) looper.removeProdable(old_pr_node) - remainingNodes = set(nodes) - {old_pr_node} + remainingNodes = list(set(nodes) - {old_pr_node}) # Sometimes it takes time for nodes to detect disconnection ensure_node_disconnected(looper, old_pr_node, remainingNodes, timeout=20) looper.runFor(tconf.ToleratePrimaryDisconnection + 2) - def assertNewPrimariesElected(): - checkViewNoForNodes(remainingNodes, viewNoBefore + 1) - new_pr_node = get_master_primary_node(remainingNodes) - assert old_pr_node != new_pr_node - # Give some time to detect disconnection and then verify that view has # changed and new primary has been elected - looper.run(eventually(assertNewPrimariesElected, retryWait=1, timeout=90)) + waitForViewChange(looper, remainingNodes, viewNoBefore + 1) + ensure_all_nodes_have_same_data(looper, nodes=remainingNodes) + new_pr_node = get_master_primary_node(remainingNodes) + assert old_pr_node != new_pr_node + sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5) + diff --git a/plenum/test/view_change/test_view_not_changed_if_backup_primary_disconnected.py b/plenum/test/view_change/test_view_not_changed_if_backup_primary_disconnected.py index c9432813b0..7a6d8d8c0f 100644 --- a/plenum/test/view_change/test_view_not_changed_if_backup_primary_disconnected.py +++ b/plenum/test/view_change/test_view_not_changed_if_backup_primary_disconnected.py @@ -9,7 +9,7 @@ def testViewNotChangedIfBackupPrimaryDisconnected(txnPoolNodeSet, txnPoolNodesLooper, tconf): """ - View change does not occurs when master's primary is disconnected + View change does not occurs when backup's primary is disconnected """ # Setup diff --git a/plenum/test/view_change/test_view_not_changed_when_short_disconnection.py b/plenum/test/view_change/test_view_not_changed_when_short_disconnection.py index 9425d9c889..3b83028f5c 100644 --- a/plenum/test/view_change/test_view_not_changed_when_short_disconnection.py +++ b/plenum/test/view_change/test_view_not_changed_when_short_disconnection.py @@ -9,7 +9,6 @@ from plenum.test.test_node import get_master_primary_node -@pytest.mark.skip(reason='SOV-1020') def test_view_not_changed_when_short_disconnection(txnPoolNodeSet, looper, wallet1, client1, client1Connected, tconf): diff --git a/plenum/test/waits.py b/plenum/test/waits.py index a51142155a..c6905ef173 100644 --- a/plenum/test/waits.py +++ b/plenum/test/waits.py @@ -1,5 +1,4 @@ -from plenum.common import util -from stp_zmq.zstack import KITZStack +from plenum.server.quorums import Quorums from stp_core.common.log import getlogger from plenum.common.config_util import getConfig @@ -44,6 +43,14 @@ def expectedNodeToNodeMessageDeliveryTime(): return __Peer2PeerRequestDeliveryTime +def expectedNodeToAllNodesMessageDeliveryTime(nodeCount): + """ + From: The Node ready to send a message + To: The message is received by all other Nodes + """ + return expectedNodeToNodeMessageDeliveryTime() * (nodeCount - 1) + + ######################### # Pool internal timeouts ######################### @@ -70,7 +77,8 @@ def expectedPoolInterconnectionTime(nodeCount): # multiply by 2 because we need to re-create connections which can be done on a second re-try only # (we may send pings on some of the re-tries) return min(0.8 * config.TestRunningTimeLimitSec, - interconnectionCount * nodeConnectionTimeout + 2 * KITZStack.RETRY_TIMEOUT_RESTRICTED + 2) + interconnectionCount * nodeConnectionTimeout + + 2 * config.RETRY_TIMEOUT_RESTRICTED + 2) def expectedPoolDisconnectionTime(nodeCount): @@ -235,7 +243,7 @@ def expectedClientToPoolConnectionTimeout(nodeCount): # fixed in the 3pcbatch feature # https://evernym.atlassian.net/browse/SOV-995 return config.ExpectedConnectTime * nodeCount + \ - KITZStack.RETRY_TIMEOUT_RESTRICTED + config.RETRY_TIMEOUT_RESTRICTED def expectedClientConsistencyProof(nodeCount): @@ -243,7 +251,7 @@ def expectedClientConsistencyProof(nodeCount): From: the Client is connected to the Pool To: the Client finished the consistency proof procedure """ - qN = util.get_strong_quorum(nodeCount) + qN = Quorums(nodeCount).commit.value return qN * __Peer2PeerRequestExchangeTime + \ config.ConsistencyProofsTimeout @@ -253,7 +261,7 @@ def expectedClientCatchupTime(nodeCount): From: the Client finished the consistency proof procedure To: the Client finished the catchup procedure """ - qN = util.get_strong_quorum(nodeCount) + qN = Quorums(nodeCount).commit.value return qN * 2 * __Peer2PeerRequestExchangeTime + \ config.CatchupTransactionsTimeout @@ -263,7 +271,7 @@ def expectedClientToPoolRequestDeliveryTime(nodeCount): From: the Client send a request To: the request is delivered to f nodes """ - qN = util.get_strong_quorum(nodeCount) + qN = Quorums(nodeCount).commit.value return __Peer2PeerRequestExchangeTime * qN diff --git a/plenum/test/wallet/test_wallet_storage_helper.py b/plenum/test/wallet/test_wallet_storage_helper.py new file mode 100644 index 0000000000..51e1f53393 --- /dev/null +++ b/plenum/test/wallet/test_wallet_storage_helper.py @@ -0,0 +1,258 @@ +import pytest +import os +import stat +import jsonpickle + +from stp_core.common.log import getlogger +from plenum.client.wallet import Wallet, WalletStorageHelper + +logger = getlogger() + +DEFAULT_DMODE = 0o700 +DEFAULT_FMODE = 0o600 +NOT_LISTED_PERMISSION = stat.S_IWOTH + + +def encode_wallet(wallet): + return jsonpickle.encode(wallet, keys=True) + + +def decode_wallet(wdata): + return jsonpickle.decode(wdata, keys=True) + + +def set_permissions(path, mode): + os.chmod(path, mode) + return stat.S_IMODE(os.stat(path).st_mode) + +def get_permissions(path): + return stat.S_IMODE(os.stat(path).st_mode) + + +def check_permissions(path, mode): + assert get_permissions(path) == mode + + +@pytest.fixture(scope='function') +def tdir_hierarchy(tdir_for_func): + tdir_for_func = os.path.join(tdir_for_func, 'root') + dirs = ['1/2/3', 'a/b/c'] + files = ['1/2/3/1.txt', 'a/2.txt', 'a/b/3.txt'] + for d in dirs: + os.makedirs(os.path.join(tdir_for_func, d)) + for f in files: + open(os.path.join(tdir_for_func, f), 'a').close() + + # switch off test permission + for path in dirs + files: + path = os.path.join(tdir_for_func, path) + mode = get_permissions(path) + if mode & NOT_LISTED_PERMISSION: + set_permissions(path, mode & ~NOT_LISTED_PERMISSION) + + return (tdir_for_func, dirs, files) + + +@pytest.fixture(scope='function') +def keyrings_base_dir(tdir_for_func): + return os.path.join(tdir_for_func, 'keyrings') + + +@pytest.fixture(scope='function') +def test_wallet(): + return Wallet("TestWallet") + +def test_keyring_base_dir_new_permissions(tdir_for_func): + # default + keyringsBaseDir = os.path.join(tdir_for_func, 'keyrings') + WalletStorageHelper(keyringsBaseDir) + check_permissions(keyringsBaseDir, DEFAULT_DMODE) + + # non-default + dmode = DEFAULT_DMODE + 1 + keyringsBaseDir = os.path.join(tdir_for_func, 'keyrings2') + WalletStorageHelper(keyringsBaseDir, dmode=dmode) + check_permissions(keyringsBaseDir, dmode) + + +def test_keyring_base_dir_exists_as_file(tdir_hierarchy): + root, dirs, files = tdir_hierarchy + with pytest.raises(NotADirectoryError): + WalletStorageHelper(os.path.join(root, files[0])) + + +def test_keyring_base_dir_exists_as_dir(tdir_hierarchy): + root, dirs, files = tdir_hierarchy + dpath = os.path.join(root, dirs[0]) + mode1 = get_permissions(dpath) + mode2 = mode1 | NOT_LISTED_PERMISSION + WalletStorageHelper(dpath, dmode=mode2) + check_permissions(dpath, mode2) + + +def test_store_wallet_by_empty_path_fail(tdir_for_func, keyrings_base_dir, test_wallet): + + wsh = WalletStorageHelper(keyrings_base_dir) + + for path in (None, ''): + with pytest.raises(ValueError) as exc_info: + wsh.saveWallet(test_wallet, path) + + exc_info.match(r'empty path') + + +def test_store_wallet_outside_fail(tdir_for_func, keyrings_base_dir, test_wallet): + + wsh = WalletStorageHelper(keyrings_base_dir) + + inv_paths = [ + os.path.join(keyrings_base_dir, '../wallet'), + '../wallet', + 'a/../../wallet' + ] + + # docs says: "Availability: Unix.", so OSError is expected in some cases + src_path = os.path.join(keyrings_base_dir, "../wallet") + link_path = os.path.join(keyrings_base_dir, "wallet") + try: + os.symlink(src_path, link_path) + except OSError: + logger.warning('Failed to create symlink {} for {}'.format( + link_path, src_path), exc_info=True) + else: + inv_paths.append('wallet') + + def check_path(path): + with pytest.raises(ValueError) as exc_info: + wsh.saveWallet(test_wallet, path) + + exc_info.match(r"path {} is not is not relative to the keyrings {}".format( + path, keyrings_base_dir)) + + for path in inv_paths: + check_path(path) + + +def test_wallet_dir_path_exists_as_file(tdir_hierarchy, test_wallet): + root, dirs, files = tdir_hierarchy + + wdir = files[0] + + wsh = WalletStorageHelper(root) + with pytest.raises(NotADirectoryError) as exc_info: + wsh.saveWallet(test_wallet, os.path.join(wdir, 'wallet')) + + exc_info.match(r"{}".format(wdir)) + + + +def test_new_file_wallet_permissions(tdir_for_func, keyrings_base_dir, test_wallet): + wpath = 'ctx/test.wallet' + + # default + wsh = WalletStorageHelper(keyrings_base_dir) + wpath = '1/2/3/wallet' + wpath_res = wsh.saveWallet(test_wallet, wpath) + check_permissions(wpath_res, DEFAULT_FMODE) + + # non-default + fmode = DEFAULT_DMODE + 1 + wsh = WalletStorageHelper(keyrings_base_dir, fmode=fmode) + wpath = '4/5/6/wallet' + wpath_res = wsh.saveWallet(test_wallet, wpath) + check_permissions(wpath_res, fmode) + + +def test_existed_wallet_permissions(tdir_hierarchy, test_wallet): + root, dirs, files = tdir_hierarchy + wpath = os.path.join(root, files[0]) + mode1 = get_permissions(wpath) + mode2 = mode1 | NOT_LISTED_PERMISSION + wsh = WalletStorageHelper(root, fmode=mode2) + wsh.saveWallet(test_wallet, files[0]) + check_permissions(wpath, mode2) + + +def test_store_wallet_by_abs_path(tdir_for_func, keyrings_base_dir, test_wallet): + wsh = WalletStorageHelper(keyrings_base_dir) + abs_path = os.path.join(keyrings_base_dir, "1/2/3/wallet") + wsh.saveWallet(test_wallet, abs_path) + check_permissions(abs_path, DEFAULT_FMODE) + + +def test_stored_wallet_data(tdir_for_func, keyrings_base_dir, test_wallet): + wpath = 'ctx/test.wallet' + + wsh = WalletStorageHelper(keyrings_base_dir) + + wpath_res = wsh.saveWallet(test_wallet, wpath) + assert wpath_res == os.path.join(keyrings_base_dir, wpath) + + with open(wpath_res) as wf: + wdata = wf.read() + + # TODO no comparison operator for Wallet + assert wdata == encode_wallet(test_wallet) + + +def test_load_wallet_by_empty_path_fail(tdir_for_func, keyrings_base_dir): + + wsh = WalletStorageHelper(keyrings_base_dir) + + for path in (None, ''): + with pytest.raises(ValueError) as exc_info: + wsh.loadWallet(path) + + exc_info.match(r'empty path') + + +def test_load_wallet_outside_fail(tdir_for_func, keyrings_base_dir): + + wsh = WalletStorageHelper(keyrings_base_dir) + + inv_paths = [ + os.path.join(keyrings_base_dir, '../wallet'), + '../wallet', + 'a/../../wallet' + ] + + # docs says: "Availability: Unix.", so OSError is expected in some cases + src_path = os.path.join(keyrings_base_dir, "../wallet") + link_path = os.path.join(keyrings_base_dir, "wallet") + try: + os.symlink(src_path, link_path) + except OSError: + logger.warning('Failed to create symlink {} for {}'.format( + link_path, src_path), exc_info=True) + else: + inv_paths.append('wallet') + + def check_path(path): + with pytest.raises(ValueError) as exc_info: + wsh.loadWallet(path) + + exc_info.match(r"path {} is not is not relative to the keyrings {}".format( + path, keyrings_base_dir)) + + for path in inv_paths: + check_path(path) + + +def test_loaded_wallet_data(tdir_for_func, keyrings_base_dir, test_wallet): + wpath = 'ctx/test.wallet' + + wsh = WalletStorageHelper(keyrings_base_dir) + + wsh.saveWallet(test_wallet, wpath) + loaded_wallet = wsh.loadWallet(wpath) + # TODO no comparison operator for Wallet (and classes it used) + assert encode_wallet(test_wallet) == encode_wallet(loaded_wallet) + + +def test_load_wallet_by_abs_path(tdir_for_func, keyrings_base_dir, test_wallet): + wsh = WalletStorageHelper(keyrings_base_dir) + abs_path = os.path.join(keyrings_base_dir, "5/6/7/wallet") + wsh.saveWallet(test_wallet, abs_path) + loaded_wallet = wsh.loadWallet(abs_path) + # TODO no comparison operator for Wallet (and classes it used) + assert encode_wallet(test_wallet) == encode_wallet(loaded_wallet) diff --git a/plenum/test/zstack_tests/test_zstack_reconnection.py b/plenum/test/zstack_tests/test_zstack_reconnection.py index 1a30c9d145..17f05a8a79 100644 --- a/plenum/test/zstack_tests/test_zstack_reconnection.py +++ b/plenum/test/zstack_tests/test_zstack_reconnection.py @@ -1,10 +1,12 @@ import pytest +from plenum.test.node_catchup.helper import ensure_all_nodes_have_same_data from stp_core.common.log import getlogger from stp_core.loop.eventually import eventually from plenum.test.pool_transactions.conftest import looper, clientAndWallet1, \ client1, wallet1, client1Connected -from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, stopNodes +from plenum.test.helper import sendReqsToNodesAndVerifySuffReplies, stopNodes, \ + send_reqs_to_nodes_and_verify_all_replies from plenum.test.test_node import TestNode, ensureElectionsDone logger = getlogger() @@ -18,20 +20,6 @@ def tconf(conf, tdirWithPoolTxns): conf.UseZStack = True return conf - -def checkNodesSendingCommits(nodeSet): - for node in nodeSet: - for r in node.replicas: - i = r.instId - commitSenders = [_.voters for _ in r.commits.values()] - for otherNode in nodeSet: - if node == otherNode: - continue - otherReplica = otherNode.replicas[i] - for senders in commitSenders: - assert otherReplica.name in senders - - def testZStackNodeReconnection(tconf, looper, txnPoolNodeSet, client1, wallet1, tdirWithPoolTxns, client1Connected): sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1) @@ -56,14 +44,16 @@ def checkFlakyConnected(conn=True): stopNodes([nodeToCrash], looper) # TODO Select or create the timeout from 'waits'. Don't use constant. looper.run(eventually(checkFlakyConnected, False, retryWait=1, timeout=60)) + looper.runFor(1) node = TestNode(nodeToCrash.name, basedirpath=tdirWithPoolTxns, config=tconf, ha=nodeToCrash.nodestack.ha, cliha=nodeToCrash.clientstack.ha) looper.add(node) txnPoolNodeSet[idxToCrash] = node + # TODO Select or create the timeout from 'waits'. Don't use constant. looper.run(eventually(checkFlakyConnected, True, retryWait=2, timeout=50)) - # TODO Select or create the timeout from 'waits'. Don't use constant. - ensureElectionsDone(looper, txnPoolNodeSet, retryWait=2, customTimeout=50) - sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1) - checkNodesSendingCommits(txnPoolNodeSet) + ensureElectionsDone(looper, txnPoolNodeSet, retryWait=2) + ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) + + send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 10) diff --git a/runner.py b/runner.py deleted file mode 100644 index ca61f3fb12..0000000000 --- a/runner.py +++ /dev/null @@ -1,177 +0,0 @@ -import os -import re -import sys -import argparse - -import time - - -def run(pytest, output_file, repeatUntilFailure): - if repeatUntilFailure: - log("'repeatUntilFailure' is set") - log("Is going to repeat the test suite until failure") - log("Preparing test suite with {}".format(pytest)) - testListFile = "test_list.txt" - os.system('{} --collect-only > {}'.format(pytest, testListFile)) - log("Reading collected modules file") - collectedData = open(testListFile).read() - os.remove(testListFile) - log("Collecting modules") - testList = re.findall("", collectedData) - log("Found {} test modules".format(len(testList))) - if not testList: - m = re.search("errors during collection", collectedData) - if m: - log(collectedData) - return -1 - retVal = 0 - totalPassed = 0 - totalFailed = 0 - totalSkipped = 0 - totalErros = 0 - runsCount = 0 - allFailedTests = [] - allErrorTests = [] - failureData = [] - testRep = 'currentTestReport.txt' - passPat = re.compile(".* ([0-9]+) passed.*$") - skipPat = re.compile(".* ([0-9]+) skipped.*$") - failPat = re.compile(".* ([0-9]+) failed.*$") - errPat = re.compile(".* ([0-9]+) error.*$") - failedTestPat = re.compile('____ (test.+) ____') - errorTestPat = re.compile('____ (ERROR.+) ____') - while True: - for i, test in enumerate(testList): - # testRep = '{}.rep'.format(test.split("/")[-1]) - log("Going to run {}".format(test)) - testStartTime = time.time() - r = os.system('{} -k "{}" > {}'.format(pytest, test, testRep)) - testExecutionTime = time.time() - testStartTime - reportLines = open(testRep).readlines() - output = ''.join(reportLines) - pas = passPat.search(output) - passed = int(pas.groups()[0]) if pas else 0 - skp = skipPat.search(output) - skipped = int(skp.groups()[0]) if skp else 0 - if r: - fai = failPat.search(output) - err = errPat.search(output) - if not (fai or err): - log("Non zero return value from {} run but no failures " - "or errors reported".format(test)) - log(output) - return -1 - failed = int(fai.groups()[0]) if fai else 0 - errors = int(err.groups()[0]) if err else 0 - failedNames = [] - errorNames = [] - startedF = None - startedE = None - for line in reportLines: - if '= FAILURES =' in line: - startedF = True - startedE = None - continue - if '= ERRORS =' in line: - startedF = None - startedE = True - continue - if startedF: - failureData.append(line) - m = failedTestPat.search(line) - if m: - failedNames.append(m.groups()[0]) - if startedE: - failureData.append(line) - m = errorTestPat.search(line) - if m: - errorNames.append(m.groups()[0]) - else: - failed = 0 - errors = 0 - log('In {}, {} passed, {} failed, {} errors, {} skipped, {:.1f}s time ' - '({}/{} progress)'. - format(test, passed, errors, failed, skipped, - testExecutionTime, i+1, len(testList))) - if failed: - logError("Failed tests: {}".format(', '.join(failedNames))) - for nm in failedNames: - allFailedTests.append((test, nm)) - if errors: - logError("Error in tests: {}".format(', '.join(errorNames))) - for nm in errorNames: - allErrorTests.append((test, nm)) - retVal += r - totalPassed += passed - totalFailed += failed - totalErros += errors - totalSkipped += skipped - runsCount += 1 - - if repeatUntilFailure: - if totalFailed or totalErros: - break # repeatUntilFailure set and failures happened - else: - logSuccess('Run #{} was successful'.format(runsCount)) - log('\n\n') - - else: - break # just one run - - summaryMsg = 'Total {} runs {} passed, {} failed, {} errors, {} skipped'.\ - format(runsCount, totalPassed, totalFailed, totalErros, totalSkipped) - log(summaryMsg) - - if totalFailed: - log("Failed tests:") - for fm, fn in allFailedTests: - log('{}:{}'.format(fm, fn)) - - if totalErros: - log("Error in tests:") - for fm, fn in allErrorTests: - log('{}:{}'.format(fm, fn)) - - if failureData and output_file: - log("Writing failure data in Test-Report.txt") - with open(output_file, 'w') as f: - f.write(summaryMsg) - f.write(''.join(failureData)) - - if os.path.exists(testRep): - os.remove(testRep) - - log("Tests run. Returning {}".format(retVal)) - return retVal - - -def log(msg): - return print(msg, flush=True) - - -def logError(msg): - return print('\x1b[0;30;41m' + msg + '\x1b[0m', flush=True) - - -def logSuccess(msg): - return print('\x1b[6;30;42m' + msg + '\x1b[0m') - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('--pytest', type=str, - help='pytest instance', default='python -m pytest') - parser.add_argument('--output', type=str, - help='result file', default='../Test-Report.txt') - parser.add_argument('--nooutput', - help='no result file', action="store_true") - parser.add_argument('--repeat', dest='repeatUntilFailure', - action="store_true", - help='repeat the test suite until failure') - args = parser.parse_args() - r = run( - pytest=args.pytest, - output_file=args.output if not args.nooutput else None, - repeatUntilFailure=args.repeatUntilFailure, - ) - sys.exit(0 if r == 0 else 1) diff --git a/setup.cfg b/setup.cfg index b88034e414..6afcf47d48 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,5 @@ [metadata] description-file = README.md + +[aliases] +test=pytest diff --git a/setup.py b/setup.py index 23478f0a93..0071701f70 100644 --- a/setup.py +++ b/setup.py @@ -38,12 +38,12 @@ os.makedirs(BASE_DIR) setup( - name='plenum', + name='indy-plenum', version=__version__, description='Plenum Byzantine Fault Tolerant Protocol', long_description='Plenum Byzantine Fault Tolerant Protocol', - url='https://github.com/evernym/plenum', - download_url='https://github.com/evernym/plenum/tarball/{}'. + url='https://github.com/hyperledger/indy-plenum', + download_url='https://github.com/hyperledger/indy-plenum/tarball/{}'. format(__version__), author=__author__, author_email='dev@evernym.us', @@ -58,13 +58,16 @@ data_files=[( (BASE_DIR, ['data/pool_transactions_sandbox', ]) )], - install_requires=['ledger==0.2.16', 'stp==0.1.12', - 'state-trie==0.1.3', 'jsonpickle', + install_requires=['jsonpickle', 'ujson==1.33', 'prompt_toolkit==0.57', 'pygments', + 'crypto==1.4.1', 'rlp', 'sha3', 'leveldb', 'ioflo==1.5.4', 'semver', 'base58', 'orderedset', - 'sortedcontainers==1.5.7', 'psutil', 'pip'], + 'sortedcontainers==1.5.7', 'psutil', 'pip', + 'portalocker==0.5.7', 'pyzmq', 'raet', + 'psutil', 'intervaltree'], extras_require={ - 'stats': ['python-firebase'] + 'stats': ['python-firebase'], + 'benchmark': ['pympler'] }, setup_requires=['pytest-runner'], tests_require=['pytest', 'pytest-xdist'], diff --git a/state/__init__.py b/state/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/state/db/__init__.py b/state/db/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/state/db/db.py b/state/db/db.py new file mode 100644 index 0000000000..1fe6f9f0c1 --- /dev/null +++ b/state/db/db.py @@ -0,0 +1,16 @@ +from abc import abstractmethod + + +class BaseDB: + + @abstractmethod + def inc_refcount(self, key, value): + raise NotImplementedError + + @abstractmethod + def dec_refcount(self, key): + raise NotImplementedError + + @abstractmethod + def get(self, key): + raise NotImplementedError \ No newline at end of file diff --git a/state/db/persistent_db.py b/state/db/persistent_db.py new file mode 100644 index 0000000000..e20d302ccb --- /dev/null +++ b/state/db/persistent_db.py @@ -0,0 +1,29 @@ +from state.db.db import BaseDB +from state.kv.kv_store import KeyValueStorage + + +class PersistentDB(BaseDB): + def __init__(self, keyValueStorage: KeyValueStorage): + self._keyValueStorage = keyValueStorage + + def get(self, key: bytes) -> bytes: + return self._keyValueStorage.get(key) + + def _has_key(self, key: bytes): + try: + self.get(key) + return True + except KeyError: + return False + + def __contains__(self, key): + return self._has_key(key) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self._keyValueStorage == other._keyValueStorage + + def inc_refcount(self, key, value): + self._keyValueStorage.put(key, value) + + def dec_refcount(self, key): + pass diff --git a/state/db/refcount_db.py b/state/db/refcount_db.py new file mode 100644 index 0000000000..4a164e9ecd --- /dev/null +++ b/state/db/refcount_db.py @@ -0,0 +1,157 @@ +import sys + +import rlp + +import state.util.utils as utils +from state.db.db import BaseDB +from state.kv.kv_store import KeyValueStorage + +DEATH_ROW_OFFSET = 2**62 +ZERO_ENCODED = utils.encode_int(0) +ONE_ENCODED = utils.encode_int(1) + + +class RefcountDB(BaseDB): + + def __init__(self, keyValueStorage: KeyValueStorage): + self._keyValueStorage = keyValueStorage + self.journal = [] + self.death_row = [] + self.ttl = 500 + self.logging = False + + # Increase the reference count associated with a key + def inc_refcount(self, k, v): + # raise Exception("WHY AM I CHANGING A REFCOUNT?!:?") + try: + node_object = rlp.decode(self._keyValueStorage.get(b'r:'+k)) + refcount = utils.decode_int(node_object[0]) + self.journal.append([node_object[0], k]) + if refcount >= DEATH_ROW_OFFSET: + refcount = 0 + new_refcount = utils.encode_int(refcount + 1) + self._keyValueStorage.put(b'r:'+k, rlp.encode([new_refcount, v])) + if self.logging: + sys.stderr.write('increasing %s %r to: %d\n' % ( + utils.encode_hex(k), v, refcount + 1)) + except: + self._keyValueStorage.put(b'r:'+k, rlp.encode([ONE_ENCODED, v])) + self.journal.append([ZERO_ENCODED, k]) + if self.logging: + sys.stderr.write('increasing %s %r to: %d\n' % ( + utils.encode_hex(k), v, 1)) + + put = inc_refcount + + # Decrease the reference count associated with a key + def dec_refcount(self, k): + # raise Exception("WHY AM I CHANGING A REFCOUNT?!:?") + node_object = rlp.decode(self._keyValueStorage.get(b'r:'+k)) + refcount = utils.decode_int(node_object[0]) + if self.logging: + sys.stderr.write('decreasing %s to: %d\n' % ( + utils.encode_hex(k), refcount - 1)) + assert refcount > 0 + self.journal.append([node_object[0], k]) + new_refcount = utils.encode_int(refcount - 1) + self._keyValueStorage.put(b'r:'+k, rlp.encode([new_refcount, node_object[1]])) + if new_refcount == ZERO_ENCODED: + self.death_row.append(k) + + delete = dec_refcount + + def get_refcount(self, k): + try: + o = utils.decode_int(self._keyValueStorage.get(b'r:' + k))[0] + if o >= DEATH_ROW_OFFSET: + return 0 + return o + except: + return 0 + + # Get the value associated with a key + def get(self, k): + return rlp.decode(self._keyValueStorage.get(b'r:'+k))[1] + + # Kill nodes that are eligible to be killed, and remove the associated + # deathrow record. Also delete old journals. + def cleanup(self, epoch): + try: + death_row_node = self._keyValueStorage.get('deathrow:'+str(epoch)) + except: + death_row_node = rlp.encode([]) + death_row_nodes = rlp.decode(death_row_node) + pruned = 0 + for nodekey in death_row_nodes: + try: + refcount, val = rlp.decode(self._keyValueStorage.get(b'r:'+nodekey)) + if utils.decode_int(refcount) == DEATH_ROW_OFFSET + epoch: + self._keyValueStorage.remove(b'r:'+nodekey) + pruned += 1 + except: + pass + sys.stderr.write('%d nodes successfully pruned\n' % pruned) + # Delete the deathrow after processing it + try: + self._keyValueStorage.remove('deathrow:'+str(epoch)) + except: + pass + # Delete journals that are too old + try: + self._keyValueStorage.remove('journal:'+str(epoch - self.ttl)) + except: + pass + + # Commit changes to the journal and death row to the database + def commit_refcount_changes(self, epoch): + # Save death row nodes + timeout_epoch = epoch + self.ttl + try: + death_row_nodes = rlp.decode(self._keyValueStorage.get('deathrow:'+str(timeout_epoch))) + except: + death_row_nodes = [] + for nodekey in self.death_row: + refcount, val = rlp.decode(self._keyValueStorage.get(b'r:'+nodekey)) + if refcount == ZERO_ENCODED: + new_refcount = utils.encode_int(DEATH_ROW_OFFSET + timeout_epoch) + self._keyValueStorage.put(b'r:'+nodekey, rlp.encode([new_refcount, val])) + if len(self.death_row) > 0: + sys.stderr.write('%d nodes marked for pruning during block %d\n' % + (len(self.death_row), timeout_epoch)) + death_row_nodes.extend(self.death_row) + self.death_row = [] + self._keyValueStorage.put('deathrow:'+str(timeout_epoch), + rlp.encode(death_row_nodes)) + # Save journal + try: + journal = rlp.decode(self._keyValueStorage.get('journal:'+str(epoch))) + except: + journal = [] + journal.extend(self.journal) + self.journal = [] + self._keyValueStorage.put('journal:'+str(epoch), rlp.encode(journal)) + + # Revert changes made during an epoch + def revert_refcount_changes(self, epoch): + timeout_epoch = epoch + self.ttl + # Delete death row additions + try: + self._keyValueStorage.remove('deathrow:'+str(timeout_epoch)) + except: + pass + # Revert journal changes + try: + journal = rlp.decode(self._keyValueStorage.get('journal:'+str(epoch))) + for new_refcount, hashkey in journal[::-1]: + node_object = rlp.decode(self._keyValueStorage.get(b'r:'+hashkey)) + self._keyValueStorage.put(b'r:'+hashkey, + rlp.encode([new_refcount, node_object[1]])) + except: + pass + + def _has_key(self, key): + return b'r:'+key in self._keyValueStorage + + def __contains__(self, key): + return self._has_key(key) + diff --git a/state/kv/__init__.py b/state/kv/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/state/kv/kv_in_memory.py b/state/kv/kv_in_memory.py new file mode 100644 index 0000000000..e045e763a1 --- /dev/null +++ b/state/kv/kv_in_memory.py @@ -0,0 +1,51 @@ +from typing import Tuple, Iterable + +from rlp.utils import str_to_bytes +from state.kv.kv_store import KeyValueStorage +from state.util import utils + +# log = get_logger('db') + + +databases = {} + + +class KeyValueStorageInMemory(KeyValueStorage): + def __init__(self): + self._dict = {} + + def get(self, key): + if isinstance(key, str): + key = key.encode() + return self._dict[key] + + def put(self, key, value): + if isinstance(key, str): + key = key.encode() + if isinstance(value, str): + value = value.encode() + self._dict[key] = value + + def remove(self, key): + if isinstance(key, str): + key = key.encode() + del self._dict[key] + + def setBatch(self, batch: Iterable[Tuple]): + for key, value in batch: + self.put(key, value) + + def open(self): + pass + + def close(self): + pass + + def drop(self): + self._dict = {} + + def __eq__(self, other): + return isinstance(other, self.__class__) and self._dict == other._dict + + def __hash__(self): + return utils.big_endian_to_int(str_to_bytes(self.__repr__())) diff --git a/state/kv/kv_store.py b/state/kv/kv_store.py new file mode 100644 index 0000000000..3671638614 --- /dev/null +++ b/state/kv/kv_store.py @@ -0,0 +1,57 @@ +from abc import abstractmethod +from typing import Tuple, Iterable + + +class KeyValueStorage: + + @abstractmethod + def put(self, key, value): + raise NotImplementedError + + @abstractmethod + def get(self, key): + raise NotImplementedError + + @abstractmethod + def remove(self, key): + raise NotImplementedError + + @abstractmethod + def setBatch(self, batch: Iterable[Tuple]): + raise NotImplementedError + + @abstractmethod + def open(self): + raise NotImplementedError + + @abstractmethod + def close(self): + raise NotImplementedError + + @abstractmethod + def drop(self): + raise NotImplementedError + + def has_key(self, key): + try: + self.get(key) + return True + except KeyError: + return False + + def __contains__(self, key): + return self.has_key(key) + + @property + def closed(self): + return self._db is None + + @property + def size(self): + c = 0 + for _ in self.iter(include_value=False): + c += 1 + return c + + def __repr__(self): + return self._dbPath diff --git a/state/kv/kv_store_leveldb.py b/state/kv/kv_store_leveldb.py new file mode 100644 index 0000000000..f65a156672 --- /dev/null +++ b/state/kv/kv_store_leveldb.py @@ -0,0 +1,65 @@ +from typing import Iterable, Tuple + +import shutil +from state.kv.kv_store import KeyValueStorage +from state.util.utils import removeLockFiles + +try: + import leveldb +except ImportError: + print('Cannot import leveldb, please install') + + +class KeyValueStorageLeveldb(KeyValueStorage): + def __init__(self, dbPath, open=True): + if 'leveldb' not in globals(): + raise RuntimeError('Leveldb is needed to use this class') + self._dbPath = dbPath + self._db = None + if open: + self.open() + + def __repr__(self): + return self._dbPath + + def iter(self, start=None, end=None, include_value=True): + return self._db.RangeIter(key_from=start, key_to=end, include_value=include_value) + + def put(self, key, value): + if isinstance(key, str): + key = key.encode() + if isinstance(value, str): + value = value.encode() + self._db.Put(key, value) + + def get(self, key): + if isinstance(key, str): + key = key.encode() + return self._db.Get(key) + + def remove(self, key): + if isinstance(key, str): + key = key.encode() + self._db.Delete(key) + + def setBatch(self, batch: Iterable[Tuple]): + b = leveldb.WriteBatch() + for key, value in batch: + if isinstance(key, str): + key = key.encode() + if isinstance(value, str): + value = value.encode() + b.Put(key, value) + self._db.Write(b, sync=False) + + def open(self): + self._db = leveldb.LevelDB(self._dbPath) + + def close(self): + removeLockFiles(self._dbPath) + del self._db + self._db = None + + def drop(self): + self.close() + shutil.rmtree(self._dbPath) diff --git a/state/kv/kv_store_rocksdb.py b/state/kv/kv_store_rocksdb.py new file mode 100644 index 0000000000..f7c539be47 --- /dev/null +++ b/state/kv/kv_store_rocksdb.py @@ -0,0 +1,19 @@ +from typing import Iterable, Tuple +from state.kv.kv_store import KeyValueStorage + +# TODO: WIP below +class KeyValueStorageRocksdb(KeyValueStorage): + def set(self, key, value): + raise NotImplementedError + + def get(self, key): + raise NotImplementedError + + def remove(self, key): + raise NotImplementedError + + def setBatch(self, batch: Iterable[Tuple]): + raise NotImplementedError + + def close(self): + raise NotImplementedError diff --git a/state/pruning_state.py b/state/pruning_state.py new file mode 100644 index 0000000000..3701bd98a1 --- /dev/null +++ b/state/pruning_state.py @@ -0,0 +1,110 @@ +from binascii import unhexlify + +from state.db.persistent_db import PersistentDB +from state.kv.kv_store import KeyValueStorage +from state.state import State +from state.trie.pruning_trie import BLANK_ROOT, Trie, BLANK_NODE, bin_to_nibbles +from state.util.fast_rlp import encode_optimized as rlp_encode, \ + decode_optimized as rlp_decode +from state.util.utils import to_string, isHex + + +class PruningState(State): + # This class is used to store the + # committed root hash of the trie in the db. + # The committed root hash is only updated once a batch gets written to the + # ledger. It might happen that a few batches are in 3 phase commit and the + # node crashes. Now when the node restarts, it restores the db from the + # committed root hash and all entries for uncommitted batches will be + # ignored + + # some key that does not collide with any state variable's name + rootHashKey = b'\x88\xc8\x88 \x9a\xa7\x89\x1b' + + def __init__(self, keyValueStorage: KeyValueStorage): + self._kv = keyValueStorage + if self.rootHashKey in self._kv: + rootHash = bytes(self._kv.get(self.rootHashKey)) + else: + rootHash = BLANK_ROOT + self._kv.put(self.rootHashKey, BLANK_ROOT) + self._trie = Trie( + PersistentDB(self._kv), + rootHash) + + @property + def head(self): + # The current head of the state, if the state is a merkle tree then + # head is the root + return self._trie.root_node + + @property + def committedHead(self): + # The committed head of the state, if the state is a merkle tree then + # head is the root + if self.committedHeadHash == BLANK_ROOT: + return BLANK_NODE + else: + return self._trie._decode_to_node(self.committedHeadHash) + + def set(self, key: bytes, value: bytes): + self._trie.update(key, rlp_encode([value])) + + def get(self, key: bytes, isCommitted: bool = True): + if not isCommitted: + val = self._trie.get(key) + else: + val = self._trie._get(self.committedHead, + bin_to_nibbles(to_string(key))) + if val: + return rlp_decode(val)[0] + + def remove(self, key: bytes): + self._trie.delete(key) + + def commit(self, rootHash=None, rootNode=None): + if rootNode: + rootHash = self._trie._encode_node(rootNode) + elif rootHash and isHex(rootHash): + if isinstance(rootHash, str): + rootHash = rootHash.encode() + rootHash = unhexlify(rootHash) + elif rootHash: + rootHash = rootHash + else: + rootHash = self.headHash + self._kv.put(self.rootHashKey, rootHash) + + def revertToHead(self, headHash=None): + if headHash != BLANK_ROOT: + head = self._trie._decode_to_node(headHash) + else: + head = BLANK_NODE + self._trie.replace_root_hash(self._trie.root_node, head) + + @property + def as_dict(self): + d = self._trie.to_dict() + return {k: rlp_decode(v)[0] for k, v in d.items()} + + @property + def headHash(self): + """ + The hash of the current head of the state, if the state is a merkle + tree then hash of the root + :return: + """ + return self._trie.root_hash + + @property + def committedHeadHash(self): + return self._kv.get(self.rootHashKey) + + @property + def isEmpty(self): + return self.committedHeadHash == BLANK_ROOT + + def close(self): + if self._kv: + self._kv.close() + self._kv = None diff --git a/state/state.py b/state/state.py new file mode 100644 index 0000000000..2883414d9f --- /dev/null +++ b/state/state.py @@ -0,0 +1,60 @@ +from abc import abstractmethod, abstractproperty + + +class State: + + @abstractmethod + def set(self, key: bytes, value: bytes): + raise NotImplementedError + + @abstractmethod + def get(self, key: bytes, isCommitted: bool=True): + # If `isCommitted` is True then get value corresponding to the + # committed state else get the latest value + raise NotImplementedError + + @abstractmethod + def remove(self, key: bytes): + raise NotImplementedError + + @abstractmethod + def commit(self, rootHash=None, rootNode=None): + raise NotImplementedError + + @abstractmethod + def revertToHead(self, headHash=None): + # Revert to the given head + raise NotImplementedError + + @abstractmethod + def close(self): + raise NotImplementedError + + @abstractproperty + def head(self): + # The current head of the state, if the state is a merkle tree then + # head is the root + raise NotImplementedError + + @abstractproperty + def committedHead(self): + # The committed head of the state, if the state is a merkle tree then + # head is the root + raise NotImplementedError + + @abstractproperty + def headHash(self): + """ + The hash of the current head of the state, if the state is a merkle + tree then hash of the root + :return: + """ + raise NotImplementedError + + @abstractproperty + def committedHeadHash(self): + raise NotImplementedError + + @abstractproperty + def isEmpty(self): + raise NotImplementedError diff --git a/state/test/__init__.py b/state/test/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/state/test/bench.py b/state/test/bench.py new file mode 100644 index 0000000000..61b12d76b5 --- /dev/null +++ b/state/test/bench.py @@ -0,0 +1 @@ +# TODO: Benchmark CRUD over the Trie. diff --git a/state/test/conftest.py b/state/test/conftest.py new file mode 100644 index 0000000000..2c0d76049c --- /dev/null +++ b/state/test/conftest.py @@ -0,0 +1,6 @@ +import pytest + +@pytest.fixture(scope='function') +def tempdir(tmpdir_factory): + return tmpdir_factory.mktemp('').strpath + diff --git a/state/test/kv/__init__.py b/state/test/kv/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/state/test/kv/test_kv_leveldb.py b/state/test/kv/test_kv_leveldb.py new file mode 100644 index 0000000000..db35d9a965 --- /dev/null +++ b/state/test/kv/test_kv_leveldb.py @@ -0,0 +1,118 @@ +import os +import pytest +from state.kv.kv_store_leveldb import KeyValueStorageLeveldb + +i = 0 + +@pytest.yield_fixture(scope="function") +def kv(tempdir) -> KeyValueStorageLeveldb: + global i + kv = KeyValueStorageLeveldb(os.path.join(tempdir, 'kv{}'.format(i))) + i += 1 + yield kv + kv.close() + +def test_reopen(kv): + kv.put('k1', 'v1') + v1 = kv.get('k1') + kv.close() + + kv.open() + v2 = kv.get('k1') + + assert b'v1' == v1 + assert b'v1' == v2 + +def test_drop(kv): + kv.put('k1', 'v1') + hasKeyBeforeDrop = kv.has_key('k1') + kv.close() + kv.drop() + + kv.open() + hasKeyAfterDrop = kv.has_key('k1') + + assert hasKeyBeforeDrop + assert not hasKeyAfterDrop + +def test_put_string(kv): + kv.put('k1', 'v1') + v1 = kv.get('k1') + + kv.put('k2', 'v2') + v2 = kv.get('k2') + + kv.put('k1', 'v3') + v3 = kv.get('k1') + v4 = kv.get('k2') + + assert b'v1' == v1 + assert b'v2' == v2 + assert b'v3' == v3 + assert b'v2' == v4 + +def test_put_bytes(kv): + kv.put(b'k1', b'v1') + v1 = kv.get(b'k1') + + kv.put(b'k2', b'v2') + v2 = kv.get(b'k2') + + kv.put(b'k1', b'v3') + v3 = kv.get(b'k1') + v4 = kv.get(b'k2') + + assert b'v1' == v1 + assert b'v2' == v2 + assert b'v3' == v3 + assert b'v2' == v4 + +def test_put_string_and_bytes(kv): + kv.put(b'k1', 'v1') + v1 = kv.get('k1') + + kv.put('k2', b'v2') + v2 = kv.get(b'k2') + + kv.put('k1', b'v3') + v3 = kv.get('k1') + v4 = kv.get('k2') + + assert b'v1' == v1 + assert b'v2' == v2 + assert b'v3' == v3 + assert b'v2' == v4 + +def test_remove_string(kv): + kv.put('k1', 'v1') + hasKeyBeforeRemove = kv.has_key('k1') + kv.remove('k1') + hasKeyAfterRemove = kv.has_key('k1') + + assert hasKeyBeforeRemove + assert not hasKeyAfterRemove + +def test_remove_bytes(kv): + kv.put(b'k1', b'v1') + hasKeyBeforeRemove = kv.has_key(b'k1') + kv.remove(b'k1') + hasKeyAfterRemove = kv.has_key(b'k1') + + assert hasKeyBeforeRemove + assert not hasKeyAfterRemove + +def test_batch_string(kv): + batch = [('k'.format(i), 'v'.format(i)) + for i in range(5)] + kv.setBatch(batch) + + for i in range(5): + assert 'v'.format(i).encode() == kv.get('k'.format(i)) + +def test_batch_bytes(kv): + batch = [('k'.format(i).encode(), 'v'.format(i).encode()) + for i in range(5)] + kv.setBatch(batch) + + for i in range(5): + assert 'v'.format(i).encode() == kv.get('k'.format(i)) \ No newline at end of file diff --git a/state/test/kv/test_kv_memory.py b/state/test/kv/test_kv_memory.py new file mode 100644 index 0000000000..be34dbb061 --- /dev/null +++ b/state/test/kv/test_kv_memory.py @@ -0,0 +1,126 @@ +import pytest +from state.kv.kv_in_memory import KeyValueStorageInMemory +from state.kv.kv_store_leveldb import KeyValueStorageLeveldb + + +@pytest.yield_fixture(scope="function") +def kv() -> KeyValueStorageLeveldb: + kv = KeyValueStorageInMemory() + yield kv + kv.close() + + +def test_reopen(kv): + kv.put('k1', 'v1') + v1 = kv.get('k1') + kv.close() + + kv.open() + v2 = kv.get('k1') + + assert b'v1' == v1 + assert b'v1' == v2 + + +def test_drop(kv): + kv.put('k1', 'v1') + hasKeyBeforeDrop = kv.has_key('k1') + kv.close() + kv.drop() + + kv.open() + hasKeyAfterDrop = kv.has_key('k1') + + assert hasKeyBeforeDrop + assert not hasKeyAfterDrop + +def test_put_none(kv): + kv.put('k1', None) + +def test_put_string(kv): + kv.put('k1', 'v1') + v1 = kv.get('k1') + + kv.put('k2', 'v2') + v2 = kv.get('k2') + + kv.put('k1', 'v3') + v3 = kv.get('k1') + v4 = kv.get('k2') + + assert b'v1' == v1 + assert b'v2' == v2 + assert b'v3' == v3 + assert b'v2' == v4 + + +def test_put_bytes(kv): + kv.put(b'k1', b'v1') + v1 = kv.get(b'k1') + + kv.put(b'k2', b'v2') + v2 = kv.get(b'k2') + + kv.put(b'k1', b'v3') + v3 = kv.get(b'k1') + v4 = kv.get(b'k2') + + assert b'v1' == v1 + assert b'v2' == v2 + assert b'v3' == v3 + assert b'v2' == v4 + + +def test_put_string_and_bytes(kv): + kv.put(b'k1', 'v1') + v1 = kv.get('k1') + + kv.put('k2', b'v2') + v2 = kv.get(b'k2') + + kv.put('k1', b'v3') + v3 = kv.get('k1') + v4 = kv.get('k2') + + assert b'v1' == v1 + assert b'v2' == v2 + assert b'v3' == v3 + assert b'v2' == v4 + + +def test_remove_string(kv): + kv.put('k1', 'v1') + hasKeyBeforeRemove = kv.has_key('k1') + kv.remove('k1') + hasKeyAfterRemove = kv.has_key('k1') + + assert hasKeyBeforeRemove + assert not hasKeyAfterRemove + + +def test_remove_bytes(kv): + kv.put(b'k1', b'v1') + hasKeyBeforeRemove = kv.has_key(b'k1') + kv.remove(b'k1') + hasKeyAfterRemove = kv.has_key(b'k1') + + assert hasKeyBeforeRemove + assert not hasKeyAfterRemove + + +def test_batch_string(kv): + batch = [('k'.format(i), 'v'.format(i)) + for i in range(5)] + kv.setBatch(batch) + + for i in range(5): + assert 'v'.format(i).encode() == kv.get('k'.format(i)) + + +def test_batch_bytes(kv): + batch = [('k'.format(i).encode(), 'v'.format(i).encode()) + for i in range(5)] + kv.setBatch(batch) + + for i in range(5): + assert 'v'.format(i).encode() == kv.get('k'.format(i)) diff --git a/state/test/test_state_in_memory.py b/state/test/test_state_in_memory.py new file mode 100644 index 0000000000..eacd89e45d --- /dev/null +++ b/state/test/test_state_in_memory.py @@ -0,0 +1,226 @@ +import copy + +import pytest +from state.kv.kv_in_memory import KeyValueStorageInMemory +from state.pruning_state import PruningState +from state.state import State +from state.trie.pruning_trie import BLANK_ROOT, BLANK_NODE + + +@pytest.yield_fixture(scope="function") +def state() -> State: + state = PruningState(KeyValueStorageInMemory()) + yield state + state.close() + +@pytest.yield_fixture(scope="function") +def state2() -> State: + state = PruningState(KeyValueStorageInMemory()) + yield state + state.close() + +def test_set(state): + state.set(b'k1', b'v1') + state.commit(state.headHash) + assert b'v1' == state.get(b'k1') + + state.set(b'k2', b'v2') + state.commit(state.headHash) + assert b'v2' == state.get(b'k2') + +def test_set_same_key(state): + state.set(b'k1', b'v1') + state.commit(state.headHash) + assert b'v1' == state.get(b'k1') + + state.set(b'k1', b'v2') + state.commit(state.headHash) + assert b'v2' == state.get(b'k1') + +def test_get(state): + state.set(b'k1', b'v1') + assert b'v1' == state.get(b'k1', isCommitted=False) + assert None == state.get(b'k1', isCommitted=True) + + state.commit(state.headHash) + assert b'v1' == state.get(b'k1', isCommitted=False) + assert b'v1' == state.get(b'k1', isCommitted=True) + + state.set(b'k2', b'v2') + assert b'v2' == state.get(b'k2', isCommitted=False) + assert None == state.get(b'k2', isCommitted=True) + assert b'v1' == state.get(b'k1', isCommitted=True) + + state.set(b'k1', b'v3') + assert b'v3' == state.get(b'k1', isCommitted=False) + assert b'v1' == state.get(b'k1', isCommitted=True) + +def test_remove_uncommitted(state): + state.set(b'k1', b'v1') + assert b'v1' == state.get(b'k1', isCommitted=False) + assert None == state.get(b'k1', isCommitted=True) + + state.remove(b'k1') + assert None == state.get(b'k1', isCommitted=False) + assert None == state.get(b'k1', isCommitted=True) + +def test_remove_committed(state): + state.set(b'k1', b'v1') + state.commit(state.headHash) + assert b'v1' == state.get(b'k1', isCommitted=False) + assert b'v1' == state.get(b'k1', isCommitted=True) + + state.remove(b'k1') + # do not remove committed + assert None == state.get(b'k1', isCommitted=False) + assert b'v1' == state.get(b'k1', isCommitted=True) + + +def test_revert_to_last_committed_head(state): + state.set(b'k1', b'v1') + state.commit(state.headHash) + state.set(b'k1', b'v2') + assert b'v2' == state.get(b'k1', isCommitted=False) + assert b'v1' == state.get(b'k1', isCommitted=True) + + state.revertToHead(state.committedHead) + assert b'v1' == state.get(b'k1', isCommitted=False) + assert b'v1' == state.get(b'k1', isCommitted=True) + + +def test_revert_to_old_head(state): + state.set(b'k1', b'v1') + state.commit(state.headHash) + head1 = state.committedHeadHash + state.set(b'k1', b'v2') + state.commit(state.headHash) + state.set(b'k1', b'v3') + state.commit(state.headHash) + assert b'v3' == state.get(b'k1', isCommitted=False) + assert b'v3' == state.get(b'k1', isCommitted=True) + + state.revertToHead(head1) + assert b'v1' == state.get(b'k1', isCommitted=False) + # do not revert committed + assert b'v3' == state.get(b'k1', isCommitted=True) + + +def test_head_initially(state): + assert BLANK_NODE == state.head + assert BLANK_ROOT == state.headHash + + +def test_state_head_after_updates(state, state2): + state.set(b'k1', b'v1') + state.set(b'k2', b'v2') + state.set(b'k1', b'v1a') + state.set(b'k3', b'v3') + state.remove(b'k2') + + state2.set(b'k1', b'v1a') + state2.set(b'k3', b'v3') + + assert state.headHash == state2.headHash + assert state.head == state2.head + + +def test_committed_head_initially(state): + assert BLANK_NODE == state.committedHead + assert BLANK_ROOT == state.committedHeadHash + + +def test_committed_state_head_after_updates(state, state2): + state.set(b'k1', b'v1') + state.set(b'k2', b'v2') + state.commit(state.headHash) + state.set(b'k1', b'v1a') + state.set(b'k3', b'v3') + + state2.set(b'k1', b'v1') + state2.set(b'k2', b'v2') + state2.commit(state2.headHash) + + assert state.committedHead == state2.committedHead + assert state.committedHeadHash == state2.committedHeadHash + + +def test_commit_current(state): + state.set(b'k1', b'v1') + state.set(b'k2', b'v2') + head = state.head + headHash = state.headHash + state.commit() + + assert head == state.committedHead + assert headHash == state.committedHeadHash + +def test_commit_multiple_times(state): + state.set(b'k1', b'v1') + state.set(b'k2', b'v2') + head = state.head + headHash = state.headHash + state.commit() + state.commit() + state.commit() + state.commit() + state.commit() + + assert head == state.committedHead + assert headHash == state.committedHeadHash + + +def test_commit_to_current_head_hash(state): + state.set(b'k1', b'v1') + state.set(b'k2', b'v2') + head = state.head + headHash = state.headHash + state.commit(headHash) + + assert head == state.committedHead + assert headHash == state.committedHeadHash + + +def test_commit_to_old_head_hash(state): + state.set(b'k1', b'v1') + state.set(b'k2', b'v2') + headHash = state.headHash + state.set(b'k3', b'v3') + state.set(b'k4', b'v4') + state.commit(headHash) + + assert headHash == state.committedHeadHash + + +def test_commit_to_current_head(state): + state.set(b'k1', b'v1') + state.set(b'k2', b'v2') + head = state.head + headHash = state.headHash + state.commit(rootNode=head) + + assert head == state.committedHead + assert headHash == state.committedHeadHash + + +def test_commit_to_old_head(state): + state.set(b'k1', b'v1') + state.set(b'k2', b'v2') + head = copy.deepcopy(state.head) + headHash = state.headHash + state.set(b'k3', b'v3') + state.set(b'k4', b'v4') + state.commit(rootNode=head) + + assert head == state.committedHead + assert headHash == state.committedHeadHash + +def testStateData(state): + state.set(b'k1', b'v1') + state.set(b'k2', b'v2') + state.set(b'k3', b'v3') + + data = {k: v for k, v in state.as_dict.items()} + assert data == {b'k1': b'v1', b'k2': b'v2', b'k3': b'v3'} + + + diff --git a/state/test/test_state_leveldb.py b/state/test/test_state_leveldb.py new file mode 100644 index 0000000000..b8d664db66 --- /dev/null +++ b/state/test/test_state_leveldb.py @@ -0,0 +1,239 @@ +import copy +import os + +import pytest +from state.kv.kv_store_leveldb import KeyValueStorageLeveldb +from state.pruning_state import PruningState +from state.state import State +from state.trie.pruning_trie import BLANK_NODE, BLANK_ROOT + +i = 0 + +#TODO: combine with in-memory tests + + +@pytest.yield_fixture(scope="function") +def state(tempdir) -> State: + global i + state = PruningState( + KeyValueStorageLeveldb(os.path.join(tempdir, 'kv{}'.format(i)))) + yield state + state.close() + + +@pytest.yield_fixture(scope="function") +def state2(tempdir) -> State: + global i + state = PruningState( + KeyValueStorageLeveldb(os.path.join(tempdir, 'kv2{}'.format(i)))) + yield state + state.close() + + +def test_set(state): + state.set(b'k1', b'v1') + state.commit(state.headHash) + assert b'v1' == state.get(b'k1') + + state.set(b'k2', b'v2') + state.commit(state.headHash) + assert b'v2' == state.get(b'k2') + + +def test_set_same_key(state): + state.set(b'k1', b'v1') + state.commit(state.headHash) + assert b'v1' == state.get(b'k1') + + state.set(b'k1', b'v2') + state.commit(state.headHash) + assert b'v2' == state.get(b'k1') + + +def test_get(state): + state.set(b'k1', b'v1') + assert b'v1' == state.get(b'k1', isCommitted=False) + assert None == state.get(b'k1', isCommitted=True) + + state.commit(state.headHash) + assert b'v1' == state.get(b'k1', isCommitted=False) + assert b'v1' == state.get(b'k1', isCommitted=True) + + state.set(b'k2', b'v2') + assert b'v2' == state.get(b'k2', isCommitted=False) + assert None == state.get(b'k2', isCommitted=True) + assert b'v1' == state.get(b'k1', isCommitted=True) + + state.set(b'k1', b'v3') + assert b'v3' == state.get(b'k1', isCommitted=False) + assert b'v1' == state.get(b'k1', isCommitted=True) + + +def test_remove_uncommitted(state): + state.set(b'k1', b'v1') + assert b'v1' == state.get(b'k1', isCommitted=False) + assert None == state.get(b'k1', isCommitted=True) + + state.remove(b'k1') + assert None == state.get(b'k1', isCommitted=False) + assert None == state.get(b'k1', isCommitted=True) + + +def test_remove_committed(state): + state.set(b'k1', b'v1') + state.commit(state.headHash) + assert b'v1' == state.get(b'k1', isCommitted=False) + assert b'v1' == state.get(b'k1', isCommitted=True) + + state.remove(b'k1') + # do not remove committed + assert None == state.get(b'k1', isCommitted=False) + assert b'v1' == state.get(b'k1', isCommitted=True) + + +def test_revert_to_last_committed_head(state): + state.set(b'k1', b'v1') + state.commit(state.headHash) + state.set(b'k1', b'v2') + assert b'v2' == state.get(b'k1', isCommitted=False) + assert b'v1' == state.get(b'k1', isCommitted=True) + + state.revertToHead(state.committedHead) + assert b'v1' == state.get(b'k1', isCommitted=False) + assert b'v1' == state.get(b'k1', isCommitted=True) + + +def test_revert_to_old_head(state): + state.set(b'k1', b'v1') + state.commit(state.headHash) + head1 = state.committedHeadHash + state.set(b'k1', b'v2') + state.commit(state.headHash) + state.set(b'k1', b'v3') + state.commit(state.headHash) + assert b'v3' == state.get(b'k1', isCommitted=False) + assert b'v3' == state.get(b'k1', isCommitted=True) + + state.revertToHead(head1) + assert b'v1' == state.get(b'k1', isCommitted=False) + # do not revert committed + assert b'v3' == state.get(b'k1', isCommitted=True) + + +def test_head_initially(state): + assert BLANK_NODE == state.head + assert BLANK_ROOT == state.headHash + + +def test_state_head_after_updates(state, state2): + state.set(b'k1', b'v1') + state.set(b'k2', b'v2') + state.set(b'k1', b'v1a') + state.set(b'k3', b'v3') + state.remove(b'k2') + + state2.set(b'k1', b'v1a') + state2.set(b'k3', b'v3') + + assert state.headHash == state2.headHash + assert state.head == state2.head + + +def test_committed_head_initially(state): + assert BLANK_NODE == state.committedHead + assert BLANK_ROOT == state.committedHeadHash + + +def test_committed_state_head_after_updates(state, state2): + state.set(b'k1', b'v1') + state.set(b'k2', b'v2') + state.commit(state.headHash) + state.set(b'k1', b'v1a') + state.set(b'k3', b'v3') + + state2.set(b'k1', b'v1') + state2.set(b'k2', b'v2') + state2.commit(state2.headHash) + + assert state.committedHead == state2.committedHead + assert state.committedHeadHash == state2.committedHeadHash + + +def test_commit_current(state): + state.set(b'k1', b'v1') + state.set(b'k2', b'v2') + head = state.head + headHash = state.headHash + state.commit() + + assert head == state.committedHead + assert headHash == state.committedHeadHash + + +def test_commit_multiple_times(state): + state.set(b'k1', b'v1') + state.set(b'k2', b'v2') + head = state.head + headHash = state.headHash + state.commit() + state.commit() + state.commit() + state.commit() + state.commit() + + assert head == state.committedHead + assert headHash == state.committedHeadHash + + +def test_commit_to_current_head_hash(state): + state.set(b'k1', b'v1') + state.set(b'k2', b'v2') + head = state.head + headHash = state.headHash + state.commit(headHash) + + assert head == state.committedHead + assert headHash == state.committedHeadHash + + +def test_commit_to_old_head_hash(state): + state.set(b'k1', b'v1') + state.set(b'k2', b'v2') + headHash = state.headHash + state.set(b'k3', b'v3') + state.set(b'k4', b'v4') + state.commit(headHash) + + assert headHash == state.committedHeadHash + + +def test_commit_to_current_head(state): + state.set(b'k1', b'v1') + state.set(b'k2', b'v2') + head = state.head + headHash = state.headHash + state.commit(rootNode=head) + + assert head == state.committedHead + assert headHash == state.committedHeadHash + + +def test_commit_to_old_head(state): + state.set(b'k1', b'v1') + state.set(b'k2', b'v2') + head = copy.deepcopy(state.head) + headHash = state.headHash + state.set(b'k3', b'v3') + state.set(b'k4', b'v4') + state.commit(rootNode=head) + + assert head == state.committedHead + assert headHash == state.committedHeadHash + +def testStateData(state): + state.set(b'k1', b'v1') + state.set(b'k2', b'v2') + state.set(b'k3', b'v3') + + data = {k: v for k, v in state.as_dict.items()} + assert data == {b'k1': b'v1', b'k2': b'v2', b'k3': b'v3'} diff --git a/state/test/trie/__init__.py b/state/test/trie/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/state/test/trie/test_trie_values_at_different_roots.py b/state/test/trie/test_trie_values_at_different_roots.py new file mode 100644 index 0000000000..46388f6f2d --- /dev/null +++ b/state/test/trie/test_trie_values_at_different_roots.py @@ -0,0 +1,80 @@ +from state.db.persistent_db import PersistentDB +from state.kv.kv_in_memory import KeyValueStorageInMemory +from state.trie.pruning_trie import BLANK_NODE, Trie +from state.util.fast_rlp import encode_optimized as rlp_encode, \ + decode_optimized as rlp_decode + +def test_get_values_at_roots_in_memory(): + # Update key with different values but preserve root after each update + # Check values of keys with different previous roots and check that they + # are correct + trie = Trie(PersistentDB(KeyValueStorageInMemory())) + + trie.update('k1'.encode(), rlp_encode(['v1'])) + # print state.root_hash.encode('hex') + # print state.root_node + + val = trie.get('k1') + print(rlp_decode(val)) + oldroot1 = trie.root_node + old_root1_hash = trie.root_hash + assert trie._decode_to_node(old_root1_hash) == oldroot1 + trie.update('k1'.encode(), rlp_encode(['v1a'])) + val = trie.get('k1') + assert rlp_decode(val) == [b'v1a', ] + + # Already saved roots help in getting previous values + oldval = trie.get_at(oldroot1, 'k1') + assert rlp_decode(oldval) == [b'v1', ] + oldroot1a = trie.root_node + + trie.update('k1'.encode(), rlp_encode([b'v1b'])) + val = trie.get('k1') + assert rlp_decode(val) == [b'v1b'] + + oldval = trie.get_at(oldroot1a, 'k1') + assert rlp_decode(oldval) == [b'v1a', ] + + oldval = trie.get_at(oldroot1, 'k1') + assert rlp_decode(oldval) == [b'v1', ] + + oldroot1b = trie.root_node + + trie.update('k1'.encode(), rlp_encode([b'v1c'])) + val = trie.get('k1') + assert rlp_decode(val) == [b'v1c', ] + + oldval = trie.get_at(oldroot1b, 'k1') + assert rlp_decode(oldval) == [b'v1b', ] + + oldval = trie.get_at(oldroot1a, 'k1') + assert rlp_decode(oldval) == [b'v1a', ] + + oldval = trie.get_at(oldroot1, 'k1') + assert rlp_decode(oldval) == [b'v1', ] + + oldroot1c = trie.root_node + + trie.delete('k1'.encode()) + assert trie.get('k1') == BLANK_NODE + + oldval = trie.get_at(oldroot1c, 'k1') + assert rlp_decode(oldval) == [b'v1c', ] + + oldval = trie.get_at(oldroot1b, 'k1') + assert rlp_decode(oldval) == [b'v1b', ] + + oldval = trie.get_at(oldroot1a, 'k1') + assert rlp_decode(oldval) == [b'v1a', ] + + oldval = trie.get_at(oldroot1, 'k1') + assert rlp_decode(oldval) == [b'v1', ] + + trie.root_node = oldroot1c + val = trie.get('k1') + assert rlp_decode(val) == [b'v1c', ] + + trie.root_node = oldroot1 + val = trie.get('k1') + assert rlp_decode(val) == [b'v1', ] + diff --git a/state/trie/__init__.py b/state/trie/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/state/trie/pruning_trie.py b/state/trie/pruning_trie.py new file mode 100644 index 0000000000..ef39c7edd5 --- /dev/null +++ b/state/trie/pruning_trie.py @@ -0,0 +1,955 @@ +#!/usr/bin/env python + +import copy +import sys + +import rlp +from rlp.utils import decode_hex, encode_hex, ascii_chr, str_to_bytes +from state.db.db import BaseDB +from state.kv.kv_in_memory import KeyValueStorageInMemory +from state.util.fast_rlp import encode_optimized +from state.util.utils import is_string, to_string, sha3, sha3rlp, encode_int + +rlp_encode = encode_optimized + +bin_to_nibbles_cache = {} + +hti = {} +for i, c in enumerate('0123456789abcdef'): + hti[c] = i + + +def bin_to_nibbles(s): + """convert string s to nibbles (half-bytes) + + >>> bin_to_nibbles("") + [] + >>> bin_to_nibbles("h") + [6, 8] + >>> bin_to_nibbles("he") + [6, 8, 6, 5] + >>> bin_to_nibbles("hello") + [6, 8, 6, 5, 6, 12, 6, 12, 6, 15] + """ + return [hti[c] for c in encode_hex(s)] + + +def nibbles_to_bin(nibbles): + if any(x > 15 or x < 0 for x in nibbles): + raise Exception("nibbles can only be [0,..15]") + + if len(nibbles) % 2: + raise Exception("nibbles must be of even numbers") + + res = b'' + for i in range(0, len(nibbles), 2): + res += ascii_chr(16 * nibbles[i] + nibbles[i + 1]) + return res + + +NIBBLE_TERMINATOR = 16 +RECORDING = 1 +NONE = 0 +VERIFYING = -1 +ZERO_ENCODED = encode_int(0) + +proving = False + + +class ProofConstructor: + + def __init__(self): + self.mode = [] + self.nodes = [] + self.exempt = [] + + def push(self, mode, nodes=[]): + global proving + proving = True + self.mode.append(mode) + self.exempt.append(set()) + if mode == VERIFYING: + self.nodes.append(set([rlp_encode(x) for x in nodes])) + else: + self.nodes.append(set()) + + def pop(self): + global proving + self.mode.pop() + self.nodes.pop() + self.exempt.pop() + if not self.mode: + proving = False + + def get_nodelist(self): + return list(map(rlp.decode, list(self.nodes[-1]))) + + def get_nodes(self): + return self.nodes[-1] + + def add_node(self, node): + node = rlp_encode(node) + if node not in self.exempt[-1]: + self.nodes[-1].add(node) + + def add_exempt(self, node): + self.exempt[-1].add(rlp_encode(node)) + + def get_mode(self): + return self.mode[-1] + +proof = ProofConstructor() + + +class InvalidSPVProof(Exception): + pass + + +def with_terminator(nibbles): + nibbles = nibbles[:] + if not nibbles or nibbles[-1] != NIBBLE_TERMINATOR: + nibbles.append(NIBBLE_TERMINATOR) + return nibbles + + +def without_terminator(nibbles): + nibbles = nibbles[:] + if nibbles and nibbles[-1] == NIBBLE_TERMINATOR: + del nibbles[-1] + return nibbles + + +def adapt_terminator(nibbles, has_terminator): + if has_terminator: + return with_terminator(nibbles) + else: + return without_terminator(nibbles) + + +def pack_nibbles(nibbles): + """pack nibbles to binary + + :param nibbles: a nibbles sequence. may have a terminator + """ + + if nibbles[-1:] == [NIBBLE_TERMINATOR]: + flags = 2 + nibbles = nibbles[:-1] + else: + flags = 0 + + oddlen = len(nibbles) % 2 + flags |= oddlen # set lowest bit if odd number of nibbles + if oddlen: + nibbles = [flags] + nibbles + else: + nibbles = [flags, 0] + nibbles + o = b'' + for i in range(0, len(nibbles), 2): + o += ascii_chr(16 * nibbles[i] + nibbles[i + 1]) + return o + + +def unpack_to_nibbles(bindata): + """unpack packed binary data to nibbles + + :param bindata: binary packed from nibbles + :return: nibbles sequence, may have a terminator + """ + o = bin_to_nibbles(bindata) + flags = o[0] + if flags & 2: + o.append(NIBBLE_TERMINATOR) + if flags & 1 == 1: + o = o[1:] + else: + o = o[2:] + return o + + +def starts_with(full, part): + ''' test whether the items in the part is + the leading items of the full + ''' + if len(full) < len(part): + return False + return full[:len(part)] == part + + +( + NODE_TYPE_BLANK, + NODE_TYPE_LEAF, + NODE_TYPE_EXTENSION, + NODE_TYPE_BRANCH +) = tuple(range(4)) + + +def is_key_value_type(node_type): + return node_type in [NODE_TYPE_LEAF, + NODE_TYPE_EXTENSION] + +BLANK_NODE = b'' +BLANK_ROOT = sha3rlp(BLANK_NODE) +DEATH_ROW_OFFSET = 2**62 + + +def transient_trie_exception(*args): + raise Exception("Transient trie") + + +class Trie: + + def __init__(self, db: BaseDB, root_hash=BLANK_ROOT, transient=False): + '''it also present a dictionary like interface + + :param db key value database + :root: blank or trie node in form of [key, value] or [v0,v1..v15,v] + ''' + self._db = db # Pass in a database object directly + self.transient = transient + if self.transient: + self.update = self.get = self.delete = transient_trie_exception + self.set_root_hash(root_hash) + self.death_row_timeout = 5000 + self.nodes_for_death_row = [] + self.journal = [] + + # For SPV proof production/verification purposes + def spv_grabbing(self, node): + global proving + if not proving: + pass + elif proof.get_mode() == RECORDING: + proof.add_node(copy.copy(node)) + # print('recording %s' % encode_hex(utils.sha3(rlp_encode(node)))) + elif proof.get_mode() == VERIFYING: + # print('verifying %s' % encode_hex(utils.sha3(rlp_encode(node)))) + if rlp_encode(node) not in proof.get_nodes(): + raise InvalidSPVProof("Proof invalid!") + + def spv_storing(self, node): + global proving + if not proving: + pass + elif proof.get_mode() == RECORDING: + proof.add_exempt(copy.copy(node)) + elif proof.get_mode() == VERIFYING: + proof.add_node(copy.copy(node)) + + @property + def root_hash(self): + '''always empty or a 32 bytes string + ''' + return self.get_root_hash() + + def get_root_hash(self): + if self.transient: + return self.transient_root_hash + if self.root_node == BLANK_NODE: + return BLANK_ROOT + assert isinstance(self.root_node, list) + val = rlp_encode(self.root_node) + key = sha3(val) + self.spv_grabbing(self.root_node) + return key + + def replace_root_hash(self, old_node, new_node): + # sys.stderr.write('rrh %r %r\n' % (old_node, new_node)) + self._delete_node_storage(old_node, is_root=True) + self._encode_node(new_node, is_root=True) + self.root_node = new_node + # sys.stderr.write('nrh: %s\n' % encode_hex(self.root_hash)) + + @root_hash.setter + def root_hash(self, value): + self.set_root_hash(value) + + def set_root_hash(self, root_hash): + assert is_string(root_hash) + assert len(root_hash) in [0, 32] + if self.transient: + self.transient_root_hash = root_hash + return + if root_hash == BLANK_ROOT: + self.root_node = BLANK_NODE + return + # print(repr(root_hash)) + self.root_node = self._decode_to_node(root_hash) + # dummy to increase reference count + # self._encode_node(self.root_node) + + def all_nodes(self, node=None): + proof.push(RECORDING) + self.get_root_hash() + self.to_dict() + o = proof.get_nodelist() + proof.pop() + return list(o) + # if node is None: + # node = self.root_node + # node_type = self._get_node_type(node) + # o = 1 if len(rlp_encode(node)) >= 32 else 0 + # if node_type == NODE_TYPE_BRANCH: + # for item in node[:16]: + # o += self.total_node_count(self._decode_to_node(item)) + # elif is_key_value_type(node_type): + # if node_type == NODE_TYPE_EXTENSION: + # o += self.total_node_count(self._decode_to_node(node[1])) + # return o + + def clear(self): + ''' clear all tree data + ''' + self._delete_child_storage(self.root_node) + self._delete_node_storage(self.root_node) + self.root_node = BLANK_NODE + + def _delete_child_storage(self, node): + node_type = self._get_node_type(node) + if node_type == NODE_TYPE_BRANCH: + for item in node[:16]: + self._delete_child_storage(self._decode_to_node(item)) + elif is_key_value_type(node_type): + node_type = self._get_node_type(node) + if node_type == NODE_TYPE_EXTENSION: + self._delete_child_storage(self._decode_to_node(node[1])) + + def _encode_node(self, node, is_root=False): + if node == BLANK_NODE: + return BLANK_NODE + # assert isinstance(node, list) + rlpnode = rlp_encode(node) + if len(rlpnode) < 32 and not is_root: + return node + + hashkey = sha3(rlpnode) + self._db.inc_refcount(hashkey, rlpnode) + return hashkey + + def _decode_to_node(self, encoded): + if encoded == BLANK_NODE: + return BLANK_NODE + if isinstance(encoded, list): + return encoded + o = rlp.decode(self._db.get(encoded)) + self.spv_grabbing(o) + return o + + def _get_node_type(self, node): + ''' get node type and content + + :param node: node in form of list, or BLANK_NODE + :return: node type + ''' + if node == BLANK_NODE: + return NODE_TYPE_BLANK + + if len(node) == 2: + nibbles = unpack_to_nibbles(node[0]) + has_terminator = (nibbles and nibbles[-1] == NIBBLE_TERMINATOR) + return NODE_TYPE_LEAF if has_terminator\ + else NODE_TYPE_EXTENSION + if len(node) == 17: + return NODE_TYPE_BRANCH + + def _get(self, node, key): + """ get value inside a node + + :param node: node in form of list, or BLANK_NODE + :param key: nibble list without terminator + :return: + BLANK_NODE if does not exist, otherwise value or hash + """ + node_type = self._get_node_type(node) + + if node_type == NODE_TYPE_BLANK: + return BLANK_NODE + + if node_type == NODE_TYPE_BRANCH: + # already reach the expected node + if not key: + return node[-1] + sub_node = self._decode_to_node(node[key[0]]) + return self._get(sub_node, key[1:]) + + # key value node + curr_key = without_terminator(unpack_to_nibbles(node[0])) + if node_type == NODE_TYPE_LEAF: + return node[1] if key == curr_key else BLANK_NODE + + if node_type == NODE_TYPE_EXTENSION: + # traverse child nodes + if starts_with(key, curr_key): + sub_node = self._decode_to_node(node[1]) + return self._get(sub_node, key[len(curr_key):]) + else: + return BLANK_NODE + + def _update(self, node, key, value): + # sys.stderr.write('u\n') + """ update item inside a node + + :param node: node in form of list, or BLANK_NODE + :param key: nibble list without terminator + .. note:: key may be [] + :param value: value string + :return: new node + + if this node is changed to a new node, it's parent will take the + responsibility to *store* the new node storage, and delete the old + node storage + """ + node_type = self._get_node_type(node) + + if node_type == NODE_TYPE_BLANK: + o = [pack_nibbles(with_terminator(key)), value] + self._encode_node(o) + return o + + elif node_type == NODE_TYPE_BRANCH: + if not key: + node[-1] = value + else: + new_node = self._update_and_delete_storage( + self._decode_to_node(node[key[0]]), + key[1:], value) + node[key[0]] = self._encode_node(new_node) + self._delete_node_storage(new_node) + self._encode_node(node) + return node + + elif is_key_value_type(node_type): + return self._update_kv_node(node, key, value) + + def _update_and_delete_storage(self, node, key, value): + # sys.stderr.write('uds_start %r\n' % node) + old_node = copy.deepcopy(node) + new_node = self._update(node, key, value) + # sys.stderr.write('uds_mid %r\n' % old_node) + self._delete_node_storage(old_node) + # sys.stderr.write('uds_end %r\n' % old_node) + return new_node + + def _update_kv_node(self, node, key, value): + node_type = self._get_node_type(node) + curr_key = without_terminator(unpack_to_nibbles(node[0])) + is_inner = node_type == NODE_TYPE_EXTENSION + # sys.stderr.write('ukv %r %r\n' % (key, value)) + + # find longest common prefix + prefix_length = 0 + for i in range(min(len(curr_key), len(key))): + if key[i] != curr_key[i]: + break + prefix_length = i + 1 + + # sys.stderr.write('pl: %d\n' % prefix_length) + + remain_key = key[prefix_length:] + remain_curr_key = curr_key[prefix_length:] + new_node_encoded = False + + if remain_key == [] == remain_curr_key: + # sys.stderr.write('1111\n') + if not is_inner: + o = [node[0], value] + self._encode_node(o) + return o + new_node = self._update_and_delete_storage( + self._decode_to_node(node[1]), remain_key, value) + new_node_encoded = True + + elif remain_curr_key == []: + if is_inner: + # sys.stderr.write('22221\n') + new_node = self._update_and_delete_storage( + self._decode_to_node(node[1]), remain_key, value) + new_node_encoded = True + # sys.stderr.write('22221e\n') + else: + # sys.stderr.write('22222\n') + new_node = [BLANK_NODE] * 17 + new_node[-1] = node[1] + new_node[remain_key[0]] = self._encode_node([ + pack_nibbles(with_terminator(remain_key[1:])), + value + ]) + else: + # sys.stderr.write('3333\n') + new_node = [BLANK_NODE] * 17 + if len(remain_curr_key) == 1 and is_inner: + new_node[remain_curr_key[0]] = node[1] + else: + new_node[remain_curr_key[0]] = self._encode_node([ + pack_nibbles( + adapt_terminator(remain_curr_key[1:], not is_inner) + ), + node[1] + ]) + + if remain_key == []: + new_node[-1] = value + else: + new_node[remain_key[0]] = self._encode_node([ + pack_nibbles(with_terminator(remain_key[1:])), value + ]) + + if prefix_length: + # sys.stderr.write('444441: %d\n' % prefix_length) + # create node for key prefix + o = [pack_nibbles(curr_key[:prefix_length]), + self._encode_node(new_node)] + if new_node_encoded: + self._delete_node_storage(new_node) + self._encode_node(o) + return o + else: + # sys.stderr.write('444442: %d\n' % prefix_length) + if not new_node_encoded: + self._encode_node(new_node) + return new_node + + def _getany(self, node, reverse=False, path=[]): + node_type = self._get_node_type(node) + if node_type == NODE_TYPE_BLANK: + return None + if node_type == NODE_TYPE_BRANCH: + if node[16]: + return [16] + scan_range = list(range(16)) + if reverse: + scan_range.reverse() + for i in scan_range: + o = self._getany(self._decode_to_node(node[i]), path=path + [i]) + if o: + return [i] + o + return None + curr_key = without_terminator(unpack_to_nibbles(node[0])) + if node_type == NODE_TYPE_LEAF: + return curr_key + + if node_type == NODE_TYPE_EXTENSION: + curr_key = without_terminator(unpack_to_nibbles(node[0])) + sub_node = self._decode_to_node(node[1]) + return self._getany(sub_node, path=path + curr_key) + + def _iter(self, node, key, reverse=False, path=[]): + node_type = self._get_node_type(node) + + if node_type == NODE_TYPE_BLANK: + return None + + elif node_type == NODE_TYPE_BRANCH: + if len(key): + sub_node = self._decode_to_node(node[key[0]]) + o = self._iter(sub_node, key[1:], reverse, path + [key[0]]) + if o: + return [key[0]] + o + if reverse: + scan_range = list(range(key[0] if len(key) else 0)) + else: + scan_range = list(range(key[0] + 1 if len(key) else 0, 16)) + for i in scan_range: + sub_node = self._decode_to_node(node[i]) + o = self._getany(sub_node, reverse, path + [i]) + if o: + return [i] + o + if reverse and node[16]: + return [16] + return None + + descend_key = without_terminator(unpack_to_nibbles(node[0])) + if node_type == NODE_TYPE_LEAF: + if reverse: + return descend_key if descend_key < key else None + else: + return descend_key if descend_key > key else None + + if node_type == NODE_TYPE_EXTENSION: + # traverse child nodes + sub_node = self._decode_to_node(node[1]) + sub_key = key[len(descend_key):] + if starts_with(key, descend_key): + o = self._iter(sub_node, sub_key, reverse, path + descend_key) + elif descend_key > key[:len(descend_key)] and not reverse: + o = self._getany(sub_node, sub_key, False, path + descend_key) + elif descend_key < key[:len(descend_key)] and reverse: + o = self._getany(sub_node, sub_key, True, path + descend_key) + else: + o = None + return descend_key + o if o else None + + def next(self, key): + key = bin_to_nibbles(key) + o = self._iter(self.root_node, key) + return nibbles_to_bin(o) if o else None + + def prev(self, key): + key = bin_to_nibbles(key) + o = self._iter(self.root_node, key, reverse=True) + return nibbles_to_bin(o) if o else None + + def _delete_node_storage(self, node, is_root=False): + '''delete storage + :param node: node in form of list, or BLANK_NODE + ''' + if node == BLANK_NODE: + return + # assert isinstance(node, list) + encoded = rlp_encode(node) + if len(encoded) < 32 and not is_root: + return + """ + ===== FIXME ==== + in the current trie implementation two nodes can share identical subtrees + thus we can not safely delete nodes for now + """ + hashkey = sha3(encoded) + self._db.dec_refcount(hashkey) + + def _delete(self, node, key): + """ update item inside a node + + :param node: node in form of list, or BLANK_NODE + :param key: nibble list without terminator + .. note:: key may be [] + :return: new node + + if this node is changed to a new node, it's parent will take the + responsibility to *store* the new node storage, and delete the old + node storage + """ + # sys.stderr.write('del\n') + node_type = self._get_node_type(node) + if node_type == NODE_TYPE_BLANK: + return BLANK_NODE + + if node_type == NODE_TYPE_BRANCH: + return self._delete_branch_node(node, key) + + if is_key_value_type(node_type): + return self._delete_kv_node(node, key) + + def _normalize_branch_node(self, node): + # sys.stderr.write('nbn\n') + '''node should have only one item changed + ''' + not_blank_items_count = sum(1 for x in range(17) if node[x]) + assert not_blank_items_count >= 1 + + if not_blank_items_count > 1: + self._encode_node(node) + return node + + # now only one item is not blank + not_blank_index = [i for i, item in enumerate(node) if item][0] + + # the value item is not blank + if not_blank_index == 16: + o = [pack_nibbles(with_terminator([])), node[16]] + self._encode_node(o) + return o + + # normal item is not blank + sub_node = self._decode_to_node(node[not_blank_index]) + sub_node_type = self._get_node_type(sub_node) + + if is_key_value_type(sub_node_type): + # collape subnode to this node, not this node will have same + # terminator with the new sub node, and value does not change + self._delete_node_storage(sub_node) + new_key = [not_blank_index] + \ + unpack_to_nibbles(sub_node[0]) + o = [pack_nibbles(new_key), sub_node[1]] + self._encode_node(o) + return o + if sub_node_type == NODE_TYPE_BRANCH: + o = [pack_nibbles([not_blank_index]), + node[not_blank_index]] + self._encode_node(o) + return o + assert False + + def _delete_and_delete_storage(self, node, key): + # sys.stderr.write('dds_start %r\n' % node) + old_node = copy.deepcopy(node) + new_node = self._delete(node, key) + # sys.stderr.write('dds_mid %r\n' % old_node) + self._delete_node_storage(old_node) + # sys.stderr.write('dds_end %r %r\n' % (old_node, new_node)) + return new_node + + def _delete_branch_node(self, node, key): + # sys.stderr.write('dbn\n') + # already reach the expected node + if not key: + node[-1] = BLANK_NODE + return self._normalize_branch_node(node) + + o = self._delete_and_delete_storage( + self._decode_to_node(node[key[0]]), key[1:]) + + encoded_new_sub_node = self._encode_node(o) + self._delete_node_storage(o) + # sys.stderr.write('dbn2\n') + + # if encoded_new_sub_nod == node[key[0]]: + # return node + + node[key[0]] = encoded_new_sub_node + if encoded_new_sub_node == BLANK_NODE: + return self._normalize_branch_node(node) + self._encode_node(node) + + return node + + def _delete_kv_node(self, node, key): + # sys.stderr.write('dkv\n') + node_type = self._get_node_type(node) + assert is_key_value_type(node_type) + curr_key = without_terminator(unpack_to_nibbles(node[0])) + + if not starts_with(key, curr_key): + # key not found + self._encode_node(node) + return node + + if node_type == NODE_TYPE_LEAF: + if key == curr_key: + return BLANK_NODE + else: + self._encode_node(node) + return node + + # for inner key value type + new_sub_node = self._delete_and_delete_storage( + self._decode_to_node(node[1]), key[len(curr_key):]) + # sys.stderr.write('nsn: %r %r\n' % (node, new_sub_node)) + + # if self._encode_node(new_sub_node) == node[1]: + # return node + + # new sub node is BLANK_NODE + if new_sub_node == BLANK_NODE: + return BLANK_NODE + + assert isinstance(new_sub_node, list) + + # new sub node not blank, not value and has changed + new_sub_node_type = self._get_node_type(new_sub_node) + + if is_key_value_type(new_sub_node_type): + # sys.stderr.write('nsn1\n') + # collape subnode to this node, not this node will have same + # terminator with the new sub node, and value does not change + new_key = curr_key + unpack_to_nibbles(new_sub_node[0]) + o = [pack_nibbles(new_key), new_sub_node[1]] + self._delete_node_storage(new_sub_node) + self._encode_node(o) + return o + + if new_sub_node_type == NODE_TYPE_BRANCH: + # sys.stderr.write('nsn2\n') + o = [pack_nibbles(curr_key), self._encode_node(new_sub_node)] + self._delete_node_storage(new_sub_node) + self._encode_node(o) + return o + + # should be no more cases + assert False + + def delete(self, key): + ''' + :param key: a string with length of [0, 32] + ''' + if not is_string(key): + raise Exception("Key must be string") + + if len(key) > 32: + raise Exception("Max key length is 32") + + old_root = copy.deepcopy(self.root_node) + self.root_node = self._delete_and_delete_storage( + self.root_node, + bin_to_nibbles(to_string(key))) + self.replace_root_hash(old_root, self.root_node) + + def clear_all(self, node=None): + if node is None: + node = self.root_node + self._delete_node_storage(node) + if node == BLANK_NODE: + return + + node_type = self._get_node_type(node) + + self._delete_node_storage(node) + + if is_key_value_type(node_type): + value_is_node = node_type == NODE_TYPE_EXTENSION + if value_is_node: + self.clear_all(self._decode_to_node(node[1])) + + elif node_type == NODE_TYPE_BRANCH: + for i in range(16): + self.clear_all(self._decode_to_node(node[i])) + + def _get_size(self, node): + '''Get counts of (key, value) stored in this and the descendant nodes + + :param node: node in form of list, or BLANK_NODE + ''' + if node == BLANK_NODE: + return 0 + + node_type = self._get_node_type(node) + + if is_key_value_type(node_type): + value_is_node = node_type == NODE_TYPE_EXTENSION + if value_is_node: + return self._get_size(self._decode_to_node(node[1])) + else: + return 1 + elif node_type == NODE_TYPE_BRANCH: + sizes = [self._get_size(self._decode_to_node(node[x])) + for x in range(16)] + sizes = sizes + [1 if node[-1] else 0] + return sum(sizes) + + def _to_dict(self, node): + '''convert (key, value) stored in this and the descendant nodes + to dict items. + + :param node: node in form of list, or BLANK_NODE + + .. note:: + + Here key is in full form, rather than key of the individual node + ''' + if node == BLANK_NODE: + return {} + + node_type = self._get_node_type(node) + + if is_key_value_type(node_type): + nibbles = without_terminator(unpack_to_nibbles(node[0])) + key = b'+'.join([to_string(x) for x in nibbles]) + if node_type == NODE_TYPE_EXTENSION: + sub_dict = self._to_dict(self._decode_to_node(node[1])) + else: + sub_dict = {to_string(NIBBLE_TERMINATOR): node[1]} + + # prepend key of this node to the keys of children + res = {} + for sub_key, sub_value in sub_dict.items(): + full_key = (key + b'+' + sub_key).strip(b'+') + res[full_key] = sub_value + return res + + elif node_type == NODE_TYPE_BRANCH: + res = {} + for i in range(16): + sub_dict = self._to_dict(self._decode_to_node(node[i])) + + for sub_key, sub_value in sub_dict.items(): + full_key = (str_to_bytes(str(i)) + b'+' + sub_key).strip(b'+') + res[full_key] = sub_value + + if node[16]: + res[to_string(NIBBLE_TERMINATOR)] = node[-1] + return res + + def to_dict(self): + d = self._to_dict(self.root_node) + res = {} + for key_str, value in d.items(): + if key_str: + nibbles = [int(x) for x in key_str.split(b'+')] + else: + nibbles = [] + key = nibbles_to_bin(without_terminator(nibbles)) + res[key] = value + return res + + def get(self, key): + return self._get(self.root_node, bin_to_nibbles(to_string(key))) + + def __len__(self): + return self._get_size(self.root_node) + + def __getitem__(self, key): + return self.get(key) + + def __setitem__(self, key, value): + return self.update(key, value) + + def __delitem__(self, key): + return self.delete(key) + + def __iter__(self): + return iter(self.to_dict()) + + def __contains__(self, key): + return self.get(key) != BLANK_NODE + + def update(self, key, value): + ''' + :param key: a string + :value: a string + ''' + if not is_string(key): + raise Exception("Key must be string") + + # if len(key) > 32: + # raise Exception("Max key length is 32") + + if not is_string(value): + raise Exception("Value must be string") + + # if value == '': + # return self.delete(key) + old_root = copy.deepcopy(self.root_node) + self.root_node = self._update_and_delete_storage( + self.root_node, + bin_to_nibbles(to_string(key)), + to_string(value)) + self.replace_root_hash(old_root, self.root_node) + + def root_hash_valid(self): + if self.root_hash == BLANK_ROOT: + return True + return self.root_hash in self._db + + def produce_spv_proof(self, key): + proof.push(RECORDING) + self.get(key) + o = proof.get_nodelist() + proof.pop() + return o + + def get_at(self, root_node, key): + """ + Get value of a key when the root node was `root_node` + :param root_node: + :param key: + :return: + """ + return self._get(root_node, bin_to_nibbles(to_string(key))) + + +if __name__ == "__main__": + + def encode_node(nd): + if is_string(nd): + return encode_hex(nd) + else: + return encode_hex(rlp_encode(nd)) + + if len(sys.argv) >= 2: + if sys.argv[1] == 'insert': + t = Trie(KeyValueStorageInMemory(), decode_hex(sys.argv[3])) + t.update(sys.argv[4], sys.argv[5]) + print(encode_node(t.root_hash)) + elif sys.argv[1] == 'get': + t = Trie(KeyValueStorageInMemory(), decode_hex(sys.argv[3])) + print(t.get(sys.argv[4])) diff --git a/state/util/__init__.py b/state/util/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/state/util/fast_rlp.py b/state/util/fast_rlp.py new file mode 100644 index 0000000000..5fe45c7a97 --- /dev/null +++ b/state/util/fast_rlp.py @@ -0,0 +1,112 @@ +import sys + +import rlp +from state.kv.kv_in_memory import KeyValueStorageInMemory +from state.util.utils import int_to_big_endian, big_endian_to_int, safe_ord + + +def _encode_optimized(item): + """RLP encode (a nested sequence of) bytes""" + if isinstance(item, bytes): + if len(item) == 1 and ord(item) < 128: + return item + prefix = length_prefix(len(item), 128) + else: + item = b''.join([_encode_optimized(x) for x in item]) + prefix = length_prefix(len(item), 192) + return prefix + item + + +def length_prefix(length, offset): + """Construct the prefix to lists or strings denoting their length. + + :param length: the length of the item in bytes + :param offset: ``0x80`` when encoding raw bytes, ``0xc0`` when encoding a + list + """ + if length < 56: + return chr(offset + length) + else: + length_string = int_to_big_endian(length) + return chr(offset + 56 - 1 + len(length_string)) + length_string + + +def _decode_optimized(rlp): + o = [] + pos = 0 + _typ, _len, pos = consume_length_prefix(rlp, pos) + if _typ != list: + return rlp[pos: pos + _len] + while pos < len(rlp): + _, _l, _p = consume_length_prefix(rlp, pos) + o.append(_decode_optimized(rlp[pos: _l + _p])) + pos = _l + _p + return o + + +def consume_length_prefix(rlp, start): + """Read a length prefix from an RLP string. + + :param rlp: the rlp string to read from + :param start: the position at which to start reading + :returns: a tuple ``(type, length, end)``, where ``type`` is either ``str`` + or ``list`` depending on the type of the following payload, + ``length`` is the length of the payload in bytes, and ``end`` is + the position of the first payload byte in the rlp string + """ + b0 = safe_ord(rlp[start]) + if b0 < 128: # single byte + return (str, 1, start) + elif b0 < 128 + 56: # short string + return (str, b0 - 128, start + 1) + elif b0 < 192: # long string + ll = b0 - 128 - 56 + 1 + l = big_endian_to_int(rlp[start + 1:start + 1 + ll]) + return (str, l, start + 1 + ll) + elif b0 < 192 + 56: # short list + return (list, b0 - 192, start + 1) + else: # long list + ll = b0 - 192 - 56 + 1 + l = big_endian_to_int(rlp[start + 1:start + 1 + ll]) + return (list, l, start + 1 + ll) + +# +if sys.version_info.major == 2: + encode_optimized = _encode_optimized + decode_optimized = _decode_optimized +else: + encode_optimized = rlp.codec.encode_raw + # rlp does not implement a decode_raw function. + # decode_optimized = rlp.codec.decode_raw + decode_optimized = _decode_optimized + + +def main(): + import time + import state.trie.pruning_trie as trie + + def run(): + st = time.time() + x = trie.Trie(KeyValueStorageInMemory()) + for i in range(10000): + x.update(str(i), str(i**3)) + print('elapsed', time.time() - st) + return x.root_hash + + trie.rlp_encode = _encode_optimized + print('trie.rlp_encode = encode_optimized') + r3 = run() + + trie.rlp_encode = rlp.codec.encode_raw + print('trie.rlp_encode = rlp.codec.encode_raw') + r2 = run() + assert r2 == r3 + + trie.rlp_encode = rlp.encode + print('trie.rlp_encode = rlp.encode') + r = run() + assert r == r2 + + +if __name__ == '__main__': + main() diff --git a/state/util/utils.py b/state/util/utils.py new file mode 100644 index 0000000000..61a248b4d3 --- /dev/null +++ b/state/util/utils.py @@ -0,0 +1,522 @@ +import os +import string + +try: + from crypto.Hash import keccak + sha3_256 = lambda x: keccak.new(digest_bits=256, data=x).digest() +except: + import sha3 as _sha3 + sha3_256 = lambda x: _sha3.sha3_256(x).digest() + +# from bitcoin import privtopub +import sys +import rlp +from rlp.sedes import big_endian_int, BigEndianInt, Binary +from rlp.utils import decode_hex, encode_hex, ascii_chr, str_to_bytes +import random + +big_endian_to_int = lambda x: big_endian_int.deserialize(str_to_bytes(x).lstrip(b'\x00')) +int_to_big_endian = lambda x: big_endian_int.serialize(x) + + +TT256 = 2 ** 256 +TT256M1 = 2 ** 256 - 1 +TT255 = 2 ** 255 + +if sys.version_info.major == 2: + is_numeric = lambda x: isinstance(x, (int, long)) + is_string = lambda x: isinstance(x, (str, unicode)) + + def to_string(value): + return str(value) + + def int_to_bytes(value): + if isinstance(value, str): + return value + return int_to_big_endian(value) + + def to_string_for_regexp(value): + return str(value) + unicode = unicode + + def bytearray_to_bytestr(value): + return bytes(''.join(chr(c) for c in value)) + +else: + is_numeric = lambda x: isinstance(x, int) + is_string = lambda x: isinstance(x, bytes) + + def to_string(value): + if isinstance(value, bytes): + return value + if isinstance(value, str): + return bytes(value, 'utf-8') + if isinstance(value, int): + return bytes(str(value), 'utf-8') + + def int_to_bytes(value): + if isinstance(value, bytes): + return value + return int_to_big_endian(value) + + def to_string_for_regexp(value): + return str(to_string(value), 'utf-8') + unicode = str + + def bytearray_to_bytestr(value): + return bytes(value) + +isnumeric = is_numeric + +def removeLockFiles(dbPath): + if os.path.isdir(dbPath): + lockFilePath = os.path.join(dbPath, 'LOCK') + if os.path.isfile(lockFilePath): + os.remove(lockFilePath) + +def mk_contract_address(sender, nonce): + return sha3(rlp.encode([normalize_address(sender), nonce]))[12:] + + +def mk_metropolis_contract_address(sender, initcode): + return sha3(normalize_address(sender) + initcode)[12:] + + +def safe_ord(value): + if isinstance(value, int): + return value + else: + return ord(value) + +def isHex(val: str) -> bool: + """ + Return whether the given str represents a hex value or not + + :param val: the string to check + :return: whether the given str represents a hex value + """ + if isinstance(val, bytes): + # only decodes utf-8 string + try: + val = val.decode() + except ValueError: + return False + return isinstance(val, str) and all(c in string.hexdigits for c in val) + +# decorator + + +def debug(label): + def deb(f): + def inner(*args, **kwargs): + i = random.randrange(1000000) + print(label, i, 'start', args) + x = f(*args, **kwargs) + print(label, i, 'end', x) + return x + return inner + return deb + + +def flatten(li): + o = [] + for l in li: + o.extend(l) + return o + + +def bytearray_to_int(arr): + o = 0 + for a in arr: + o = (o << 8) + a + return o + + +def int_to_32bytearray(i): + o = [0] * 32 + for x in range(32): + o[31 - x] = i & 0xff + i >>= 8 + return o + +sha3_count = [0] + + +def sha3(seed): + sha3_count[0] += 1 + return sha3_256(to_string(seed)) + +# assert encode_hex(sha3(b'')) == b'c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470' + + +# def privtoaddr(x, extended=False): +# if len(x) > 32: +# x = decode_hex(x) +# o = sha3(privtopub(x)[1:])[12:] +# return add_checksum(o) if extended else o + + +def add_checksum(x): + if len(x) in (40, 48): + x = decode_hex(x) + if len(x) == 24: + return x + return x + sha3(x)[:4] + + +def add_cool_checksum(addr): + addr = normalize_address(addr) + addr_hex = encode_hex(addr) + + o = '' + h = encode_hex(sha3(addr_hex)) + if not isinstance(addr_hex, str): + # py3 bytes sequence + addr_hex = list(chr(c) for c in addr_hex) + h = list(chr(c) for c in h) + + for i, c in enumerate(addr_hex): + if c in '0123456789': + o += c + else: + o += c.lower() if h[i] in '01234567' else c.upper() + return '0x' + o + + +def check_and_strip_checksum(x): + if len(x) in (40, 48): + x = decode_hex(x) + assert len(x) == 24 and sha3(x[:20])[:4] == x[-4:] + return x[:20] + + +def check_and_strip_cool_checksum(addr): + assert add_cool_checksum(addr.lower()) == addr + return normalize_address(addr) + + +def normalize_address(x, allow_blank=False): + if is_numeric(x): + return int_to_addr(x) + if allow_blank and x in {'', b''}: + return b'' + if len(x) in (42, 50) and x[:2] in {'0x', b'0x'}: + x = x[2:] + if len(x) in (40, 48): + x = decode_hex(x) + if len(x) == 24: + assert len(x) == 24 and sha3(x[:20])[:4] == x[-4:] + x = x[:20] + if len(x) != 20: + raise Exception("Invalid address format: %r" % x) + return x + + +def zpad(x, l): + """ Left zero pad value `x` at least to length `l`. + + >>> zpad('', 1) + '\x00' + >>> zpad('\xca\xfe', 4) + '\x00\x00\xca\xfe' + >>> zpad('\xff', 1) + '\xff' + >>> zpad('\xca\xfe', 2) + '\xca\xfe' + """ + return b'\x00' * max(0, l - len(x)) + x + + +def rzpad(value, total_length): + """ Right zero pad value `x` at least to length `l`. + + >>> zpad('', 1) + '\x00' + >>> zpad('\xca\xfe', 4) + '\xca\xfe\x00\x00' + >>> zpad('\xff', 1) + '\xff' + >>> zpad('\xca\xfe', 2) + '\xca\xfe' + """ + return value + b'\x00' * max(0, total_length - len(value)) + + +def zunpad(x): + i = 0 + while i < len(x) and (x[i] == 0 or x[i] == b'\x00'): + i += 1 + return x[i:] + + +def int_to_addr(x): + o = [b''] * 20 + for i in range(20): + o[19 - i] = ascii_chr(x & 0xff) + x >>= 8 + return b''.join(o) + + +def coerce_addr_to_bin(x): + if is_numeric(x): + return encode_hex(zpad(big_endian_int.serialize(x), 20)) + elif len(x) == 40 or len(x) == 0: + return decode_hex(x) + else: + return zpad(x, 20)[-20:] + + +def coerce_addr_to_hex(x): + if is_numeric(x): + return encode_hex(zpad(big_endian_int.serialize(x), 20)) + elif len(x) == 40 or len(x) == 0: + return x + else: + return encode_hex(zpad(x, 20)[-20:]) + + +def coerce_to_int(x): + if is_numeric(x): + return x + elif len(x) == 40: + return big_endian_to_int(decode_hex(x)) + else: + return big_endian_to_int(x) + + +def coerce_to_bytes(x): + if is_numeric(x): + return big_endian_int.serialize(x) + elif len(x) == 40: + return decode_hex(x) + else: + return x + + +def parse_int_or_hex(s): + if is_numeric(s): + return s + elif s[:2] in (b'0x', '0x'): + s = to_string(s) + tail = (b'0' if len(s) % 2 else b'') + s[2:] + return big_endian_to_int(decode_hex(tail)) + else: + return int(s) + + +def ceil32(x): + return x if x % 32 == 0 else x + 32 - (x % 32) + + +def to_signed(i): + return i if i < TT255 else i - TT256 + + +def sha3rlp(x): + return sha3(rlp.encode(x)) + + +# Format encoders/decoders for bin, addr, int + + +def decode_bin(v): + '''decodes a bytearray from serialization''' + if not is_string(v): + raise Exception("Value must be binary, not RLP array") + return v + + +def decode_addr(v): + '''decodes an address from serialization''' + if len(v) not in [0, 20]: + raise Exception("Serialized addresses must be empty or 20 bytes long!") + return encode_hex(v) + + +def decode_int(v): + '''decodes and integer from serialization''' + if len(v) > 0 and (v[0] == b'\x00' or v[0] == 0): + raise Exception("No leading zero bytes allowed for integers") + return big_endian_to_int(v) + + +def decode_int256(v): + return big_endian_to_int(v) + + +def encode_bin(v): + '''encodes a bytearray into serialization''' + return v + + +def encode_root(v): + '''encodes a trie root into serialization''' + return v + + +def encode_int(v): + '''encodes an integer into serialization''' + if not is_numeric(v) or v < 0 or v >= TT256: + raise Exception("Integer invalid or out of range: %r" % v) + return int_to_big_endian(v) + + +def encode_int256(v): + return zpad(int_to_big_endian(v), 256) + + +def scan_bin(v): + if v[:2] in ('0x', b'0x'): + return decode_hex(v[2:]) + else: + return decode_hex(v) + + +def scan_int(v): + if v[:2] in ('0x', b'0x'): + return big_endian_to_int(decode_hex(v[2:])) + else: + return int(v) + + +# Decoding from RLP serialization +decoders = { + "bin": decode_bin, + "addr": decode_addr, + "int": decode_int, + "int256b": decode_int256, +} + +# Encoding to RLP serialization +encoders = { + "bin": encode_bin, + "int": encode_int, + "trie_root": encode_root, + "int256b": encode_int256, +} + +# Encoding to printable format +printers = { + "bin": lambda v: b'0x' + encode_hex(v), + "addr": lambda v: v, + "int": lambda v: to_string(v), + "trie_root": lambda v: encode_hex(v), + "int256b": lambda x: encode_hex(zpad(encode_int256(x), 256)) +} + +# Decoding from printable format +scanners = { + "bin": scan_bin, + "addr": lambda x: x[2:] if x[:2] == b'0x' else x, + "int": scan_int, + "trie_root": lambda x: scan_bin, + "int256b": lambda x: big_endian_to_int(decode_hex(x)) +} + + +def int_to_hex(x): + o = encode_hex(encode_int(x)) + return b'0x' + (o[1:] if (len(o) > 0 and o[0] == b'0') else o) + + +def remove_0x_head(s): + return s[2:] if s[:2] == b'0x' else s + + +def print_func_call(ignore_first_arg=False, max_call_number=100): + ''' utility function to facilitate debug, it will print input args before + function call, and print return value after function call + + usage: + + @print_func_call + def some_func_to_be_debu(): + pass + + :param ignore_first_arg: whether print the first arg or not. + useful when ignore the `self` parameter of an object method call + ''' + from functools import wraps + + def display(x): + x = to_string(x) + try: + x.decode('ascii') + except: + return 'NON_PRINTABLE' + return x + + local = {'call_number': 0} + + def inner(f): + + @wraps(f) + def wrapper(*args, **kwargs): + local['call_number'] += 1 + tmp_args = args[1:] if ignore_first_arg and len(args) else args + this_call_number = local['call_number'] + print(('{0}#{1} args: {2}, {3}'.format( + f.__name__, + this_call_number, + ', '.join([display(x) for x in tmp_args]), + ', '.join(display(key) + '=' + to_string(value) + for key, value in kwargs.items()) + ))) + res = f(*args, **kwargs) + print(('{0}#{1} return: {2}'.format( + f.__name__, + this_call_number, + display(res)))) + + if local['call_number'] > 100: + raise Exception("Touch max call number!") + return res + return wrapper + return inner + + +def dump_state(trie): + res = '' + for k, v in list(trie.to_dict().items()): + res += '%r:%r\n' % (encode_hex(k), encode_hex(v)) + return res + + +class Denoms(): + + def __init__(self): + self.wei = 1 + self.babbage = 10 ** 3 + self.lovelace = 10 ** 6 + self.shannon = 10 ** 9 + self.szabo = 10 ** 12 + self.finney = 10 ** 15 + self.ether = 10 ** 18 + self.turing = 2 ** 256 + +denoms = Denoms() + + +address = Binary.fixed_length(20, allow_empty=True) +int20 = BigEndianInt(20) +int32 = BigEndianInt(32) +int256 = BigEndianInt(256) +hash32 = Binary.fixed_length(32) +trie_root = Binary.fixed_length(32, allow_empty=True) + + +class bcolors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKGREEN = '\033[92m' + WARNING = '\033[91m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + + +# def DEBUG(msg, *args, **kwargs): +# from ethereum import slogging +# +# slogging.DEBUG(msg, *args, **kwargs) diff --git a/stp_core/__init__.py b/stp_core/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/stp_core/common/__init__.py b/stp_core/common/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/stp_core/common/config/__init__.py b/stp_core/common/config/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/stp_core/common/config/util.py b/stp_core/common/config/util.py new file mode 100644 index 0000000000..1d0ab89e28 --- /dev/null +++ b/stp_core/common/config/util.py @@ -0,0 +1,22 @@ +import os +from importlib import import_module +from importlib.util import module_from_spec, spec_from_file_location + + +# TODO: this is a stub, remove it when new +# configuration management tool is used + +CONFIG = None + + +def getConfig(homeDir=None): + """ + Reads a file called config.py in the project directory + + :raises: FileNotFoundError + :return: the configuration as a python object + """ + global CONFIG + if not CONFIG: + CONFIG = import_module("stp_core.config") + return CONFIG diff --git a/stp_core/common/error.py b/stp_core/common/error.py new file mode 100644 index 0000000000..ae7f95fa98 --- /dev/null +++ b/stp_core/common/error.py @@ -0,0 +1,16 @@ +# TODO: move it to plenum-util repo + + +def fault(ex: Exception, msg: str): + from stp_core.common.log import getlogger + getlogger().error(msg, exc_info=ex) + + +def error(msg: str) -> Exception: + """ + Wrapper to get around Python's distinction between statements and expressions + Can be used in lambdas and expressions such as: a if b else error(c) + + :param msg: error message + """ + raise Exception(msg) diff --git a/stp_core/common/log.py b/stp_core/common/log.py new file mode 100644 index 0000000000..2175257d2f --- /dev/null +++ b/stp_core/common/log.py @@ -0,0 +1,162 @@ +import inspect +import logging +import os +import sys +from ioflo.base.consoling import getConsole, Console +from stp_core.common.logging.TimeAndSizeRotatingFileHandler import TimeAndSizeRotatingFileHandler +from stp_core.common.util import Singleton +from stp_core.common.logging.handlers import CliHandler +from stp_core.common.config.util import getConfig + +TRACE_LOG_LEVEL = 5 +DISPLAY_LOG_LEVEL = 25 + +# TODO: move it to plenum-utils + + +class CustomAdapter(logging.LoggerAdapter): + def trace(self, msg, *args, **kwargs): + self.log(TRACE_LOG_LEVEL, msg, *args, **kwargs) + + def display(self, msg, *args, **kwargs): + self.log(DISPLAY_LOG_LEVEL, msg, *args, **kwargs) + + +def getlogger(name: object = None) -> object: + return Logger().getlogger(name) + + +class Logger(metaclass=Singleton): + def __init__(self, config=None): + + # TODO: This should take directory + self._config = config or getConfig() + self._addTraceToLogging() + self._addDisplayToLogging() + + self._handlers = {} + self._format = logging.Formatter(fmt=self._config.logFormat, + style=self._config.logFormatStyle) + + self._default_raet_verbosity = \ + getRAETLogLevelFromConfig("RAETLogLevel", + Console.Wordage.terse, + self._config) + + self._default_raet_log_file = \ + getRAETLogFilePath("RAETLogFilePath", self._config) + + if self._config.enableStdOutLogging: + self.enableStdLogging() + + logLevel = logging.INFO + if hasattr(self._config, "logLevel"): + logLevel = self._config.logLevel + self.setLogLevel(logLevel) + + @staticmethod + def getlogger(name=None): + if not name: + curframe = inspect.currentframe() + calframe = inspect.getouterframes(curframe, 2) + name = inspect.getmodule(calframe[1][0]).__name__ + logger = logging.getLogger(name) + return logger + + @staticmethod + def setLogLevel(log_level): + logging.root.setLevel(log_level) + + def setupRaet(self, raet_log_level=None, raet_log_file=None): + console = getConsole() + + verbosity = raet_log_level \ + if raet_log_level is not None \ + else self._default_raet_verbosity + file = raet_log_file or self._default_raet_log_file + + logging.info("Setting RAET log level {}".format(verbosity), + extra={"cli": False}) + + console.reinit(verbosity=verbosity, path=file, flushy=True) + + def enableStdLogging(self): + # only enable if CLI is not + if 'cli' in self._handlers: + raise RuntimeError('cannot configure STD logging ' + 'when CLI logging is enabled') + new = logging.StreamHandler(sys.stdout) + self._setHandler('std', new) + + def enableCliLogging(self, callback, override_tags=None): + h = CliHandler(callback, override_tags) + self._setHandler('cli', h) + # assumption is there's never a need to have std logging when in CLI + self._clearHandler('std') + + def enableFileLogging(self, filename): + d = os.path.dirname(filename) + if not os.path.exists(d): + os.makedirs(d) + new = TimeAndSizeRotatingFileHandler( + filename, + when=self._config.logRotationWhen, + interval=self._config.logRotationInterval, + backupCount=self._config.logRotationBackupCount, + utc=True, + maxBytes=self._config.logRotationMaxBytes) + self._setHandler('file', new) + + def _setHandler(self, typ: str, new_handler): + if new_handler.formatter is None: + new_handler.setFormatter(self._format) + + # assuming indempotence and removing old one first + self._clearHandler(typ) + + self._handlers[typ] = new_handler + logging.root.addHandler(new_handler) + + def _clearHandler(self, typ: str): + old = self._handlers.get(typ) + if old: + logging.root.removeHandler(old) + + @staticmethod + def _addTraceToLogging(): + logging.addLevelName(TRACE_LOG_LEVEL, "TRACE") + + def trace(self, message, *args, **kwargs): + if self.isEnabledFor(TRACE_LOG_LEVEL): + self._log(TRACE_LOG_LEVEL, message, args, **kwargs) + + logging.Logger.trace = trace + + @staticmethod + def _addDisplayToLogging(): + logging.addLevelName(DISPLAY_LOG_LEVEL, "DISPLAY") + + def display(self, message, *args, **kwargs): + if self.isEnabledFor(DISPLAY_LOG_LEVEL): + self._log(DISPLAY_LOG_LEVEL, message, args, **kwargs) + + logging.Logger.display = display + + +def getRAETLogLevelFromConfig(paramName, defaultValue, config): + try: + defaultVerbosity = config.__getattribute__(paramName) + defaultVerbosity = Console.Wordage.__getattribute__(defaultVerbosity) + except AttributeError: + defaultVerbosity = defaultValue + logging.debug("Ignoring RAET log level {} from config and using {} " + "instead".format(paramName, defaultValue)) + return defaultVerbosity + + +def getRAETLogFilePath(paramName, config): + try: + filePath = config.__getattribute__(paramName) + except AttributeError: + filePath = None + return filePath diff --git a/stp_core/common/logging/TimeAndSizeRotatingFileHandler.py b/stp_core/common/logging/TimeAndSizeRotatingFileHandler.py new file mode 100644 index 0000000000..f4b57d317c --- /dev/null +++ b/stp_core/common/logging/TimeAndSizeRotatingFileHandler.py @@ -0,0 +1,40 @@ +import os +from logging.handlers import TimedRotatingFileHandler +from logging.handlers import RotatingFileHandler + + +class TimeAndSizeRotatingFileHandler(TimedRotatingFileHandler, RotatingFileHandler): + + def __init__(self, filename, when = 'h', interval = 1, backupCount = 0, + encoding = None, delay = False, utc = False, atTime = None, + maxBytes=0): + + TimedRotatingFileHandler.__init__(self, filename, when, interval, + backupCount, encoding, delay, + utc, atTime) + self.maxBytes = maxBytes + + def shouldRollover(self, record): + return bool(TimedRotatingFileHandler.shouldRollover(self, record)) or \ + bool(RotatingFileHandler.shouldRollover(self, record)) + + def rotation_filename(self, default_name: str): + + if not os.path.exists(default_name): + return default_name + + dir = os.path.dirname(default_name) + defaultFileName = os.path.basename(default_name) + fileNames = os.listdir(dir) + + maxIndex = -1 + for fileName in fileNames: + if fileName.startswith(defaultFileName): + split = fileName.split(".") + try: + index = int(split[-1] if len(split) > 0 else 0) + except ValueError: + index = 0 + if index > maxIndex: + maxIndex = index + return "{}.{}".format(default_name, maxIndex + 1) diff --git a/stp_core/common/logging/__init__.py b/stp_core/common/logging/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/stp_core/common/logging/handlers.py b/stp_core/common/logging/handlers.py new file mode 100644 index 0000000000..9fe2742ca8 --- /dev/null +++ b/stp_core/common/logging/handlers.py @@ -0,0 +1,76 @@ +import logging + + +class CallbackHandler(logging.Handler): + def __init__(self, typestr, default_tags, callback, override_tags): + """ + Initialize the handler. + """ + super().__init__() + self.callback = callback + self.tags = default_tags + self.update_tags(override_tags or {}) + self.typestr = typestr + + def update_tags(self, override_tags): + self.tags.update(override_tags) + + def emit(self, record): + """ + Passes the log record back to the CLI for rendering + """ + should_cb = None + attr_val = None + if hasattr(record, self.typestr): + attr_val = getattr(record, self.typestr) + should_cb = bool(attr_val) + if should_cb is None and record.levelno >= logging.INFO: + should_cb = True + if hasattr(record, 'tags'): + for t in record.tags: + if t in self.tags: + if self.tags[t]: + should_cb = True + continue + else: + should_cb = False + break + if should_cb: + self.callback(record, attr_val) + + +class CliHandler(CallbackHandler): + def __init__(self, callback, override_tags=None): + default_tags = { + "add_replica": True + } + super().__init__(typestr="cli", + default_tags=default_tags, + callback=callback, + override_tags=override_tags) + + +class DemoHandler(CallbackHandler): + def __init__(self, callback, override_tags=None): + default_tags = { + "add_replica": True + } + super().__init__(typestr="demo", + default_tags=default_tags, + callback=callback, + override_tags=override_tags) + + +class TestingHandler(logging.Handler): + def __init__(self, tester): + """ + Initialize the handler. + """ + super().__init__() + self.tester = tester + + def emit(self, record): + """ + Captures a record. + """ + self.tester(record) \ No newline at end of file diff --git a/stp_core/common/temp_file_util.py b/stp_core/common/temp_file_util.py new file mode 100644 index 0000000000..7e4b79387d --- /dev/null +++ b/stp_core/common/temp_file_util.py @@ -0,0 +1,18 @@ +import shutil +import warnings +from tempfile import TemporaryDirectory + +# TODO: move it to plenum-util repo + +class SafeTemporaryDirectory(TemporaryDirectory): + """TemporaryDirectory that works on Windows 10 + """ + + @classmethod + def _cleanup(cls, name, warn_message): + shutil.rmtree(name, ignore_errors=True) + warnings.warn(warn_message, ResourceWarning) + + def cleanup(self): + if self._finalizer.detach(): + shutil.rmtree(self.name, ignore_errors=True) diff --git a/stp_core/common/util.py b/stp_core/common/util.py new file mode 100644 index 0000000000..a0067c38a5 --- /dev/null +++ b/stp_core/common/util.py @@ -0,0 +1,47 @@ +# TODO: move it to plenum-util repo + + +class Singleton(type): + _instances = {} + + def __call__(cls, *args, **kwargs): + if cls not in cls._instances: + cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) + return cls._instances[cls] + + +def lxor(a, b): + # Logical xor of 2 items, return true when one of them is truthy and + # one of them falsy + return bool(a) != bool(b) + + +class adict(dict): + """Dict with attr access to keys.""" + marker = object() + + def __init__(self, **kwargs): + super().__init__() + for key in kwargs: + self.__setitem__(key, kwargs[key]) + + def __setitem__(self, key, value): + if isinstance(value, dict) and not isinstance(value, adict): + value = adict(**value) + super(adict, self).__setitem__(key, value) + + def __getitem__(self, key): + found = self.get(key, adict.marker) + if found is adict.marker: + found = adict() + super(adict, self).__setitem__(key, found) + return found + + def copy(self): + return self.__copy__() + + def __copy__(self): + return adict(**self) + + __setattr__ = __setitem__ + __getattr__ = __getitem__ \ No newline at end of file diff --git a/stp_core/config.py b/stp_core/config.py new file mode 100644 index 0000000000..68e828862a --- /dev/null +++ b/stp_core/config.py @@ -0,0 +1,43 @@ +import os + +import sys + +import logging + +baseDir = os.getcwd() + +# Log configuration +logRotationWhen = 'D' +logRotationInterval = 1 +logRotationBackupCount = 10 +logRotationMaxBytes = 100 * 1024 * 1024 +logFormat = '{asctime:s} | {levelname:8s} | {filename:20s} ({lineno:d}) | {funcName:s} | {message:s}' +logFormatStyle='{' + +logLevel = logging.NOTSET +enableStdOutLogging = True + + +RETRY_TIMEOUT_NOT_RESTRICTED = 6 +RETRY_TIMEOUT_RESTRICTED = 15 +MAX_RECONNECT_RETRY_ON_SAME_SOCKET = 1 + + +# RAET Configuration +RAETLogLevel = "terse" +RAETLogLevelCli = "mute" +RAETLogFilePath = os.path.join(os.path.expanduser(baseDir), "test.log") +RAETLogFilePathCli = None +RAETMessageTimeout = 60 + + +# Zeromq configuration +DEFAULT_LISTENER_QUOTA = 100 +DEFAULT_SENDER_QUOTA = 100 +KEEPALIVE_INTVL = 1 # seconds +KEEPALIVE_IDLE = 20 # seconds +KEEPALIVE_CNT = 10 +MAX_SOCKETS = 16384 if sys.platform != 'win32' else None +ENABLE_HEARTBEATS = False +HEARTBEAT_FREQ = 5 # seconds +ZMQ_INTERNAL_QUEUE_SIZE = 0 # messages (0 - no limit) diff --git a/stp_core/crypto/__init__.py b/stp_core/crypto/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/stp_core/crypto/encoding.py b/stp_core/crypto/encoding.py new file mode 100644 index 0000000000..cddc3c0aac --- /dev/null +++ b/stp_core/crypto/encoding.py @@ -0,0 +1,29 @@ +import binascii + + +class RawEncoder(object): + + @staticmethod + def encode(data): + return data + + @staticmethod + def decode(data): + return data + + +class HexEncoder(object): + + @staticmethod + def encode(data): + return binascii.hexlify(data) + + @staticmethod + def decode(data): + return binascii.unhexlify(data) + + +class Encodable(object): + + def encode(self, encoder=RawEncoder): + return encoder.encode(bytes(self)) diff --git a/stp_core/crypto/nacl_wrappers.py b/stp_core/crypto/nacl_wrappers.py new file mode 100644 index 0000000000..6ec71344de --- /dev/null +++ b/stp_core/crypto/nacl_wrappers.py @@ -0,0 +1,514 @@ +import libnacl + +from stp_core.crypto import encoding + + +class SignedMessage(bytes): + """ + A bytes subclass that holds a messaged that has been signed by a + :class:`SigningKey`. + """ + + @classmethod + def _from_parts(cls, signature, message, combined): + obj = cls(combined) + obj._signature = signature + obj._message = message + return obj + + @property + def signature(self): + """ + The signature contained within the :class:`SignedMessage`. + """ + return self._signature + + @property + def message(self): + """ + The message contained within the :class:`SignedMessage`. + """ + return self._message + + +class EncryptedMessage(bytes): + """ + A bytes subclass that holds a messaged that has been encrypted by a + :class:`SecretBox`. + """ + + @classmethod + def _from_parts(cls, nonce, ciphertext, combined): + obj = cls(combined) + obj._nonce = nonce + obj._ciphertext = ciphertext + return obj + + @property + def nonce(self): + """ + The nonce used during the encryption of the :class:`EncryptedMessage`. + """ + return self._nonce + + @property + def ciphertext(self): + """ + The ciphertext contained within the :class:`EncryptedMessage`. + """ + return self._ciphertext + + +class VerifyKey(encoding.Encodable): + """ + The public key counterpart to an Ed25519 SigningKey for producing digital + signatures. + + :param key: [:class:`bytes`] Serialized Ed25519 public key + :param encoder: A class that is able to decode the `key` + """ + + def __init__(self, key, encoder=encoding.RawEncoder): + # Decode the key + key = encoder.decode(key) + + if len(key) != libnacl.crypto_sign_PUBLICKEYBYTES: + raise ValueError( + "The key must be exactly %s bytes long" % + libnacl.crypto_sign_PUBLICKEYBYTES, + ) + + self._key = key + + def __bytes__(self): + return self._key + + def verify(self, smessage, signature=None, encoder=encoding.RawEncoder): + """ + Verifies the signature of a signed message, returning the message + if it has not been tampered with else raising + :class:`~ValueError`. + + :param smessage: [:class:`bytes`] Either the original messaged or a + signature and message concated together. + :param signature: [:class:`bytes`] If an unsigned message is given for + smessage then the detached signature must be provded. + :param encoder: A class that is able to decode the secret message and + signature. + :rtype: :class:`bytes` + """ + if signature is not None: + # If we were given the message and signature separately, combine + # them. + smessage = signature + smessage + + # Decode the signed message + smessage = encoder.decode(smessage) + + return libnacl.crypto_sign_open(smessage, self._key) + + +class SigningKey(encoding.Encodable): + """ + Private key for producing digital signatures using the Ed25519 algorithm. + + Signing keys are produced from a 32-byte (256-bit) random seed value. This + value can be passed into the :class:`~SigningKey` as a + :func:`bytes` whose length is 32. + + .. warning:: This **must** be protected and remain secret. Anyone who knows + the value of your :class:`~SigningKey` or it's seed can + masquerade as you. + + :param seed: [:class:`bytes`] Random 32-byte value (i.e. private key) + :param encoder: A class that is able to decode the seed + + :ivar: verify_key: [:class:`~VerifyKey`] The verify + (i.e. public) key that corresponds with this signing key. + """ + + def __init__(self, seed, encoder=encoding.RawEncoder): + # Decode the seed + seed = encoder.decode(seed) + + # Verify that our seed is the proper size + if len(seed) != libnacl.crypto_sign_SEEDBYTES: + raise ValueError( + "The seed must be exactly %d bytes long" % + libnacl.crypto_sign_SEEDBYTES + ) + + public_key, secret_key = libnacl.crypto_sign_seed_keypair(seed) + + self._seed = seed + self._signing_key = secret_key + self.verify_key = VerifyKey(public_key) + + def __bytes__(self): + return self._seed + + @classmethod + def generate(cls): + """ + Generates a random :class:`~SigningKey` object. + + :rtype: :class:`~SigningKey` + """ + return cls( + libnacl.randombytes(libnacl.crypto_sign_SEEDBYTES), + encoder=encoding.RawEncoder, + ) + + def sign(self, message, encoder=encoding.RawEncoder): + """ + Sign a message using this key. + + :param message: [:class:`bytes`] The data to be signed. + :param encoder: A class that is used to encode the signed message. + :rtype: :class:`~SignedMessage` + """ + raw_signed = libnacl.crypto_sign(message, self._signing_key) + + signature = encoder.encode(raw_signed[:libnacl.crypto_sign_BYTES]) + message = encoder.encode(raw_signed[libnacl.crypto_sign_BYTES:]) + signed = encoder.encode(raw_signed) + + return SignedMessage._from_parts(signature, message, signed) + + +class Signer: + ''' + Used to sign messages with nacl digital signature + ''' + def __init__(self, key=None): + if key: + if not isinstance(key, SigningKey): # not key so seed to regenerate + if len(key) == 32: + key = SigningKey(seed=key, encoder=encoding.RawEncoder) + else: + key = SigningKey(seed=key, encoder=encoding.HexEncoder) + else: + key = SigningKey.generate() + self.key = key + self.keyhex = self.key.encode(encoding.HexEncoder) # seed + self.keyraw = self.key.encode(encoding.RawEncoder) # seed + self.verhex = self.key.verify_key.encode(encoding.HexEncoder) + self.verraw = self.key.verify_key.encode(encoding.RawEncoder) + + def sign(self, msg): + ''' + Sign the message + ''' + return self.key.sign(msg) + + def signature(self, msg): + ''' + Return only the signature string resulting from signing the message + ''' + return self.key.sign(msg).signature + + +class Verifier: + ''' + Used to verify messages with nacl digital signature + ''' + def __init__(self, key=None): + if key: + if not isinstance(key, VerifyKey): + if len(key) == 32: + key = VerifyKey(key, encoding.RawEncoder) + else: + key = VerifyKey(key, encoding.HexEncoder) + self.key = key + if isinstance(self.key, VerifyKey): + self.keyhex = self.key.encode(encoding.HexEncoder) + self.keyraw = self.key.encode(encoding.RawEncoder) + else: + self.keyhex = '' + self.keyraw = '' + + def verify(self, signature, msg): + ''' + Verify the message + ''' + if not self.key: + return False + try: + self.key.verify(signature + msg) + except ValueError: + return False + return True + + +class PublicKey(encoding.Encodable): + """ + The public key counterpart to an Curve25519 :class:`PrivateKey` + for encrypting messages. + + :param public_key: [:class:`bytes`] Encoded Curve25519 public key + :param encoder: A class that is able to decode the `public_key` + + :cvar SIZE: The size that the public key is required to be + """ + + SIZE = libnacl.crypto_box_PUBLICKEYBYTES + + def __init__(self, public_key, encoder=encoding.RawEncoder): + self._public_key = encoder.decode(public_key) + + if len(self._public_key) != self.SIZE: + raise ValueError("The public key must be exactly %s bytes long" % + self.SIZE) + + def __bytes__(self): + return self._public_key + + +class PrivateKey(encoding.Encodable): + """ + Private key for decrypting messages using the Curve25519 algorithm. + + .. warning:: This **must** be protected and remain secret. Anyone who + knows the value of your :class:`~PrivateKey` can decrypt + any message encrypted by the corresponding + :class:`~PublicKey` + + :param private_key: The private key used to decrypt messages + :param encoder: The encoder class used to decode the given keys + + :cvar SIZE: The size that the private key is required to be + """ + + SIZE = libnacl.crypto_box_SECRETKEYBYTES + + def __init__(self, private_key, encoder=encoding.RawEncoder): + # Decode the secret_key + private_key = encoder.decode(private_key) + + # Verify that our seed is the proper size + if len(private_key) != self.SIZE: + raise ValueError( + "The secret key must be exactly %d bytes long" % self.SIZE) + + raw_public_key = libnacl.crypto_scalarmult_base(private_key) + + self._private_key = private_key + self.public_key = PublicKey(raw_public_key) + + def __bytes__(self): + return self._private_key + + @classmethod + def generate(cls): + """ + Generates a random :class:`~PrivateKey` object + + :rtype: :class:`~PrivateKey` + """ + return cls(libnacl.randombytes(PrivateKey.SIZE), encoder=encoding.RawEncoder) + + +class Box(encoding.Encodable): + """ + The Box class boxes and unboxes messages between a pair of keys + + The ciphertexts generated by :class:`~Box` include a 16 + byte authenticator which is checked as part of the decryption. An invalid + authenticator will cause the decrypt function to raise an exception. The + authenticator is not a signature. Once you've decrypted the message you've + demonstrated the ability to create arbitrary valid message, so messages you + send are repudiable. For non-repudiable messages, sign them after + encryption. + + :param private_key: :class:`~PrivateKey` used to encrypt and + decrypt messages + :param public_key: :class:`~PublicKey` used to encrypt and + decrypt messages + + :cvar NONCE_SIZE: The size that the nonce is required to be. + """ + + NONCE_SIZE = libnacl.crypto_box_NONCEBYTES + + def __init__(self, private_key, public_key): + if private_key and public_key: + self._shared_key = libnacl.crypto_box_beforenm( + public_key.encode(encoder=encoding.RawEncoder), + private_key.encode(encoder=encoding.RawEncoder), + ) + else: + self._shared_key = None + + def __bytes__(self): + return self._shared_key + + @classmethod + def decode(cls, encoded, encoder=encoding.RawEncoder): + # Create an empty box + box = cls(None, None) + + # Assign our decoded value to the shared key of the box + box._shared_key = encoder.decode(encoded) + + return box + + def encrypt(self, plaintext, nonce, encoder=encoding.RawEncoder): + """ + Encrypts the plaintext message using the given `nonce` and returns + the ciphertext encoded with the encoder. + + .. warning:: It is **VITALLY** important that the nonce is a nonce, + i.e. it is a number used only once for any given key. If you fail + to do this, you compromise the privacy of the messages encrypted. + + :param plaintext: [:class:`bytes`] The plaintext message to encrypt + :param nonce: [:class:`bytes`] The nonce to use in the encryption + :param encoder: The encoder to use to encode the ciphertext + :rtype: [:class:`nacl.utils.EncryptedMessage`] + """ + if len(nonce) != self.NONCE_SIZE: + raise ValueError("The nonce must be exactly %s bytes long" % + self.NONCE_SIZE) + + ciphertext = libnacl.crypto_box_afternm( + plaintext, + nonce, + self._shared_key, + ) + + encoded_nonce = encoder.encode(nonce) + encoded_ciphertext = encoder.encode(ciphertext) + + return EncryptedMessage._from_parts( + encoded_nonce, + encoded_ciphertext, + encoder.encode(nonce + ciphertext), + ) + + def decrypt(self, ciphertext, nonce=None, encoder=encoding.RawEncoder): + """ + Decrypts the ciphertext using the given nonce and returns the + plaintext message. + + :param ciphertext: [:class:`bytes`] The encrypted message to decrypt + :param nonce: [:class:`bytes`] The nonce used when encrypting the + ciphertext + :param encoder: The encoder used to decode the ciphertext. + :rtype: [:class:`bytes`] + """ + # Decode our ciphertext + ciphertext = encoder.decode(ciphertext) + + if nonce is None: + # If we were given the nonce and ciphertext combined, split them. + nonce = ciphertext[:self.NONCE_SIZE] + ciphertext = ciphertext[self.NONCE_SIZE:] + + if len(nonce) != self.NONCE_SIZE: + raise ValueError("The nonce must be exactly %s bytes long" % + self.NONCE_SIZE) + + plaintext = libnacl.crypto_box_open_afternm( + ciphertext, + nonce, + self._shared_key, + ) + + return plaintext + + +class Publican: + ''' + Container to manage remote nacl public key + .key is the public key + Intelligently converts hex encoded to object + ''' + def __init__(self, key=None): + if key: + if not isinstance(key, PublicKey): + if len(key) == 32: + key = PublicKey(key, encoding.RawEncoder) + else: + key = PublicKey(key, encoding.HexEncoder) + self.key = key + if isinstance(self.key, PublicKey): + self.keyhex = self.key.encode(encoding.HexEncoder) + self.keyraw = self.key.encode(encoding.RawEncoder) + else: + self.keyhex = '' + self.keyraw = '' + + +class Privateer: + ''' + Container for local nacl key pair + .key is the private key + ''' + def __init__(self, key=None): + if key: + if not isinstance(key, PrivateKey): + if len(key) == 32: + key = PrivateKey(key, encoding.RawEncoder) + else: + key = PrivateKey(key, encoding.HexEncoder) + else: + key = PrivateKey.generate() + self.key = key + self.keyhex = self.key.encode(encoding.HexEncoder) + self.keyraw = self.key.encode(encoding.RawEncoder) + self.pubhex = self.key.public_key.encode(encoding.HexEncoder) + self.pubraw = self.key.public_key.encode(encoding.RawEncoder) + + def nonce(self): + ''' + Generate a safe nonce value (safe assuming only this method is used to + create nonce values) + ''' + return libnacl.randombytes(Box.NONCE_SIZE) + + def encrypt(self, msg, pubkey, enhex=False): + ''' + Return duple of (cyphertext, nonce) resulting from encrypting the message + using shared key generated from the .key and the pubkey + If pubkey is hex encoded it is converted first + If enhex is True then use HexEncoder otherwise use RawEncoder + + Intended for the owner of the passed in public key + + msg is string + pub is Publican instance + ''' + if not isinstance(pubkey, PublicKey): + if len(pubkey) == 32: + pubkey = PublicKey(pubkey, encoding.RawEncoder) + else: + pubkey = PublicKey(pubkey, encoding.HexEncoder) + box = Box(self.key, pubkey) + nonce = self.nonce() + encoder = encoding.HexEncoder if enhex else encoding.RawEncoder + encrypted = box.encrypt(msg, nonce, encoder) + return (encrypted.ciphertext, encrypted.nonce) + + def decrypt(self, cipher, nonce, pubkey, dehex=False): + ''' + Return decrypted msg contained in cypher using nonce and shared key + generated from .key and pubkey. + If pubkey is hex encoded it is converted first + If dehex is True then use HexEncoder otherwise use RawEncoder + + Intended for the owner of .key + + cypher is string + nonce is string + pub is Publican instance + ''' + if not isinstance(pubkey, PublicKey): + if len(pubkey) == 32: + pubkey = PublicKey(pubkey, encoding.RawEncoder) + else: + pubkey = PublicKey(pubkey, encoding.HexEncoder) + box = Box(self.key, pubkey) + decoder = encoding.HexEncoder if dehex else encoding.RawEncoder + if dehex and len(nonce) != box.NONCE_SIZE: + nonce = decoder.decode(nonce) + return box.decrypt(cipher, nonce, decoder) \ No newline at end of file diff --git a/stp_core/crypto/signer.py b/stp_core/crypto/signer.py new file mode 100644 index 0000000000..49cdccb54d --- /dev/null +++ b/stp_core/crypto/signer.py @@ -0,0 +1,27 @@ +from abc import abstractmethod +from typing import Dict + +from stp_core.types import Identifier + +# TODO: move it to crypto repo + + +class Signer: + """ + Interface that defines a sign method. + """ + @property + @abstractmethod + def identifier(self) -> Identifier: + raise NotImplementedError + + @abstractmethod + def sign(self, msg: Dict) -> Dict: + raise NotImplementedError + + @property + @abstractmethod + def alias(self) -> str: + raise NotImplementedError + + diff --git a/stp_core/crypto/util.py b/stp_core/crypto/util.py new file mode 100644 index 0000000000..73ea2d7c4f --- /dev/null +++ b/stp_core/crypto/util.py @@ -0,0 +1,90 @@ +import ctypes +import random +import string +from binascii import unhexlify, hexlify + +from libnacl import crypto_box_SECRETKEYBYTES, nacl, crypto_box_PUBLICKEYBYTES +from stp_core.crypto.nacl_wrappers import Signer + +# TODO: move it to crypto repo + + +# TODO returning a None when a None is passed is non-obvious; refactor +def cleanSeed(seed=None): + if seed: + bts = seedFromHex(seed) + if not bts: + if isinstance(seed, str): + seed = seed.encode('utf-8') + bts = bytes(seed) + assert len(seed) == 32, 'seed length must be 32 bytes' + return bts + + +# TODO this behavior is non-obvious; refactor +def seedFromHex(seed): + if len(seed) == 64: + try: + return unhexlify(seed) + except: + pass + + +def isHex(val: str) -> bool: + """ + Return whether the given str represents a hex value or not + + :param val: the string to check + :return: whether the given str represents a hex value + """ + if isinstance(val, bytes): + # only decodes utf-8 string + try: + val = val.decode() + except ValueError: + return False + return isinstance(val, str) and all(c in string.hexdigits for c in val) + + +def ed25519SkToCurve25519(sk, toHex=False): + if isHex(sk): + sk = unhexlify(sk) + secretKey = ctypes.create_string_buffer(crypto_box_SECRETKEYBYTES) + ret = nacl.crypto_sign_ed25519_sk_to_curve25519(secretKey, sk) + if ret: + raise Exception("error in converting ed22519 key to curve25519") + return hexlify(secretKey.raw) if toHex else secretKey.raw + + +def ed25519PkToCurve25519(pk, toHex=False): + if isHex(pk): + pk = unhexlify(pk) + publicKey = ctypes.create_string_buffer(crypto_box_PUBLICKEYBYTES) + ret = nacl.crypto_sign_ed25519_pk_to_curve25519(publicKey, pk) + if ret: + raise Exception("error in converting ed22519 key to curve25519") + return hexlify(publicKey.raw) if toHex else publicKey.raw + + +def getEd25519AndCurve25519Keys(seed=None): + if seed: + seed = cleanSeed(seed) + signer = Signer(seed) + sigkey, verkey = signer.keyhex, signer.verhex + prikey, pubkey = hexlify(ed25519SkToCurve25519(signer.keyraw)), \ + hexlify(ed25519PkToCurve25519(signer.verraw)) + return (sigkey, verkey), (prikey, pubkey) + + +def randomSeed(size=32): + return ''.join(random.choice(string.hexdigits) + for _ in range(size)).encode() + + +def isHexKey(key): + try: + return len(key) == 64 and isHex(key) + except ValueError as ex: + return False + except Exception as ex: + raise ex diff --git a/stp_core/error_codes.py b/stp_core/error_codes.py new file mode 100644 index 0000000000..bd205a4005 --- /dev/null +++ b/stp_core/error_codes.py @@ -0,0 +1,3 @@ +SOCKET_BIND_ERROR_ALREADY_IN_USE = 98 +WS_SOCKET_BIND_ERROR_ALREADY_IN_USE = 10048 +WS_SOCKET_BIND_ERROR_NOT_AVAILABLE = 10049 diff --git a/stp_core/loop/__init__.py b/stp_core/loop/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/stp_core/loop/eventually.py b/stp_core/loop/eventually.py new file mode 100644 index 0000000000..1ca92b1625 --- /dev/null +++ b/stp_core/loop/eventually.py @@ -0,0 +1,196 @@ +import asyncio +import inspect +import os +import time +from asyncio.coroutines import CoroWrapper +from inspect import isawaitable +from typing import Callable, TypeVar, Optional, Iterable +import psutil + +from stp_core.common.log import getlogger +from stp_core.ratchet import Ratchet + +# TODO: move it to plenum-util repo + +T = TypeVar('T') + +logger = getlogger() + +FlexFunc = TypeVar('flexFunc', CoroWrapper, Callable[[], T]) + + +def isMinimalConfiguration(): + mem = psutil.virtual_memory() + memAvailableGb = mem.available / (1024 * 1024 * 1024) + cpuCount = psutil.cpu_count() + # we can have a 8 cpu but 100Mb free RAM and the tests will be slow + return memAvailableGb <= 1.5 # and cpuCount == 1 + + +# increase this number to allow eventually to change timeouts proportionatly +def getSlowFactor(): + if isMinimalConfiguration(): + return 1.5 + else: + return 1 + +slowFactor = getSlowFactor() + + +async def eventuallySoon(coroFunc: FlexFunc, *args): + return await eventually(coroFunc, *args, + retryWait=0.1, + timeout=3, + ratchetSteps=10) + + +async def eventuallyAll(*coroFuncs: FlexFunc, # (use functools.partials if needed) + totalTimeout: float, + retryWait: float=0.1, + acceptableExceptions=None, + acceptableFails: int=0, + override_timeout_limit=False): + # TODO: Bug when `acceptableFails` > 0 if the first check fails, it will + # exhaust the entire timeout. + """ + :param coroFuncs: iterable of no-arg functions + :param totalTimeout: + :param retryWait: + :param acceptableExceptions: + :param acceptableFails: how many of the passed in coroutines can + ultimately fail and still be ok + :return: + """ + start = time.perf_counter() + + def remaining(): + return totalTimeout + start - time.perf_counter() + + funcNames = [] + others = 0 + fails = 0 + rem = None + for cf in coroFuncs: + if len(funcNames) < 2: + funcNames.append(getFuncName(cf)) + else: + others += 1 + # noinspection PyBroadException + try: + rem = remaining() + if rem <= 0: + break + await eventually(cf, + retryWait=retryWait, + timeout=rem, + acceptableExceptions=acceptableExceptions, + verbose=True, + override_timeout_limit=override_timeout_limit) + except Exception: + fails += 1 + logger.debug("a coro {} with args {} timed out without succeeding; fail count: " + "{}, acceptable: {}". + format(getFuncName(cf), get_func_args(cf), fails, acceptableFails)) + if fails > acceptableFails: + raise + + if rem is not None and rem <= 0: + fails += 1 + if fails > acceptableFails: + err= 'All checks could not complete successfully since total timeout ' \ + 'expired {} sec ago'.format(-1*rem if rem<0 else 0) + raise Exception(err) + + if others: + funcNames.append("and {} others".format(others)) + desc = ", ".join(funcNames) + logger.debug("{} succeeded with {:.2f} seconds to spare". + format(desc, remaining())) + + +def getFuncName(f): + if hasattr(f, "__name__"): + return f.__name__ + elif hasattr(f, "func"): + return "partial({})".format(getFuncName(f.func)) + else: + return "" + + +def get_func_args(f): + if hasattr(f, 'args'): + return f.args + else: + return list(inspect.signature(f).parameters) + + +def recordFail(fname, timeout): + pass + + +def recordSuccess(fname, timeout, param, remain): + pass + + +async def eventually(coroFunc: FlexFunc, + *args, + retryWait: float=0.1, + timeout: float=5, + ratchetSteps: Optional[int]=None, + acceptableExceptions=None, + verbose=True, + override_timeout_limit=False) -> T: + assert timeout > 0, 'Need a timeout value of greater than 0 but got {} instead'.format(timeout) + if not override_timeout_limit: + assert timeout < 240, '`eventually` timeout ({:.2f} sec) is huge. ' \ + 'Is it expected?'.format(timeout) + else: + logger.debug('Overriding timeout limit to {} for evaluating {}' + .format(timeout, coroFunc)) + if acceptableExceptions and not isinstance(acceptableExceptions, Iterable): + acceptableExceptions = [acceptableExceptions] + start = time.perf_counter() + + ratchet = Ratchet.fromGoalDuration(retryWait*slowFactor, + ratchetSteps, + timeout*slowFactor).gen() \ + if ratchetSteps else None + + fname = getFuncName(coroFunc) + while True: + remain = 0 + try: + remain = start + timeout*slowFactor - time.perf_counter() + if remain < 0: + # this provides a convenient breakpoint for a debugger + logger.warning("{} last try...".format(fname), + extra={"cli": False}) + # noinspection PyCallingNonCallable + res = coroFunc(*args) + + if isawaitable(res): + result = await res + else: + result = res + + if verbose: + recordSuccess(fname, timeout, timeout*slowFactor, remain) + + logger.debug("{} succeeded with {:.2f} seconds to spare". + format(fname, remain)) + return result + except Exception as ex: + if acceptableExceptions and type(ex) not in acceptableExceptions: + raise + if remain >= 0: + sleep_dur = next(ratchet) if ratchet else retryWait + if verbose: + logger.trace("{} not succeeded yet, {:.2f} seconds " + "remaining..., will sleep for {}".format(fname, remain, sleep_dur)) + await asyncio.sleep(sleep_dur) + else: + recordFail(fname, timeout) + logger.error("{} failed; not trying any more because {} " + "seconds have passed; args were {}". + format(fname, timeout, args)) + raise ex diff --git a/stp_core/loop/exceptions.py b/stp_core/loop/exceptions.py new file mode 100644 index 0000000000..8b95e1cea3 --- /dev/null +++ b/stp_core/loop/exceptions.py @@ -0,0 +1,7 @@ +# TODO: move it to plenum-util repo + + +class ProdableAlreadyAdded(Exception): + pass + + diff --git a/stp_core/loop/looper.py b/stp_core/loop/looper.py new file mode 100644 index 0000000000..d80200b97a --- /dev/null +++ b/stp_core/loop/looper.py @@ -0,0 +1,305 @@ +import asyncio +import inspect +import signal +import sys +import time +from asyncio import Task +from asyncio.coroutines import CoroWrapper +from typing import List, Optional + +# import uvloop +from stp_core.common.log import getlogger +from stp_core.common.util import lxor +from stp_core.loop.exceptions import ProdableAlreadyAdded +from stp_core.loop.startable import Status + +logger = getlogger() + +# TODO: move it to plenum-util repo + +class Prodable: + """ + An interface for declaring classes that can be started and prodded. When an + object is prodded, it just means that the event loop is giving it a chance + to do something. + """ + + def name(self): + raise NotImplementedError("subclass {} should implement this method" + .format(self)) + + async def prod(self, limit) -> int: + """ + Action to be performed each time the Prodable object gets processor + resources. + + :param limit: the number of messages to be processed + """ + raise NotImplementedError("subclass {} should implement this method" + .format(self)) + + def start(self, loop): + """ + Actions to be performed when the Prodable is starting up. + """ + raise NotImplementedError("subclass {} should implement this method" + .format(self)) + + def stop(self): + """ + Actions to be performed when the Prodable is starting up. + """ + raise NotImplementedError("subclass {} should implement this method" + .format(self)) + + def get_status(self) -> Status: + """ + Get the current status of this Prodable + """ + raise NotImplementedError("subclass {} should implement this method" + .format(self)) + + +class Looper: + """ + A helper class for asyncio's event_loop + """ + + def __init__(self, + prodables: List[Prodable]=None, + loop=None, + debug=False, + autoStart=True): + """ + Initialize looper with an event loop. + + :param prodables: a list of prodables that this event loop will execute + :param loop: the event loop to use + :param debug: set_debug on event loop will be set to this value + :param autoStart: start immediately? + """ + self.prodables = list(prodables) if prodables is not None \ + else [] # type: List[Prodable] + + # if sys.platform == 'linux': + # asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) + + if loop: + self.loop = loop + else: + try: + #if sys.platform == 'win32': + # loop = asyncio.ProactorEventLoop() + # asyncio.set_event_loop(loop) + l = asyncio.get_event_loop() + if l.is_closed(): + raise RuntimeError("event loop was closed") + except Exception as ex: + logger.warning("Looper could not get default event loop; " + "creating a new one: {}".format(ex)) + # Trying out uvloop for linux + l = asyncio.new_event_loop() + asyncio.set_event_loop(l) + self.loop = l + + self.runFut = self.loop.create_task(self.runForever()) # type: Task + self.running = True # type: bool + self.loop.set_debug(debug) + + # TODO: uncomment this when python bug fixed (not just closed, but solved!) + # https://bugs.python.org/issue23548 + # + # signals = [item for item in dir(signal) + # if item.startswith("SIG") and item[3] != "_"] + + signals = ["SIGINT"] + + setSignal = \ + signal.signal if sys.platform == 'win32' \ + else self.loop.add_signal_handler + + for sigName in signals: + try: + logger.debug("Setting handler for {}".format(sigName)) + sigNum = getattr(signal, sigName) + setSignal(sigNum, self.handleSignal) + except RuntimeError as e: + logger.debug("Cannot set handler for {} because {}".format(sigName, e)) + + self.autoStart = autoStart # type: bool + if self.autoStart: + self.startall() + + # @staticmethod + # def new_event_loop(): + # eventLib = asyncio if sys.platform == 'win32' else uvloop + # return eventLib.new_event_loop() + + async def prodAllOnce(self): + """ + Call `prod` once for each Prodable in this Looper + + :return: the sum of the number of events executed successfully + """ + limit = None + s = 0 + for n in self.prodables: + s += await n.prod(limit) + return s + + def add(self, prodable: Prodable) -> None: + """ + Add one Prodable object to this Looper's list of Prodables + + :param prodable: the Prodable object to add + """ + if prodable.name in [p.name for p in self.prodables]: + raise ProdableAlreadyAdded("Prodable {} already added.". + format(prodable.name)) + self.prodables.append(prodable) + if self.autoStart: + prodable.start(self.loop) + + def removeProdable(self, prodable: Prodable=None, name: str=None) -> Optional[Prodable]: + """ + Remove the specified Prodable object from this Looper's list of Prodables + + :param prodable: the Prodable to remove + """ + if prodable: + self.prodables.remove(prodable) + return prodable + elif name: + for p in self.prodables: + if hasattr(p, "name") and getattr(p, "name") == name: + prodable = p + break + if prodable: + self.prodables.remove(prodable) + return prodable + else: + logger.warning("Trying to remove a prodable {} which is not present" + .format(prodable)) + else: + logger.error("Provide a prodable object or a prodable name") + + def hasProdable(self, prodable: Prodable=None, name: str=None) -> bool: + assert lxor(prodable, name), \ + "One and only one of prodable or name must be provided" + + for p in self.prodables: + if (prodable and p == prodable) or (name and name == p.name): + return True + + return False + + async def runOnceNicely(self): + """ + Execute `runOnce` with a small tolerance of 0.01 seconds so that the Prodables + can complete their other asynchronous tasks not running on the event-loop. + """ + start = time.perf_counter() + msgsProcessed = await self.prodAllOnce() + if msgsProcessed == 0: + await asyncio.sleep(0.01, loop=self.loop) # if no let other stuff run + dur = time.perf_counter() - start + if dur >= 0.5: + logger.info("it took {:.3f} seconds to run once nicely". + format(dur), extra={"cli": False}) + + def runFor(self, timeout): + self.run(asyncio.sleep(timeout)) + + async def runForever(self): + """ + Keep calling `runOnceNicely` in an infinite loop. + """ + while self.running: + await self.runOnceNicely() + + def run(self, *coros: CoroWrapper): + """ + Runs an arbitrary list of coroutines in order and then quits the loop, + if not running as a context manager. + """ + if not self.running: + raise RuntimeError("not running!") + + async def wrapper(): + results = [] + for coro in coros: + try: + if inspect.isawaitable(coro): + results.append(await coro) + elif inspect.isfunction(coro): + res = coro() + if inspect.isawaitable(res): + results.append(await res) + else: + results.append(res) + else: + raise RuntimeError("don't know how to run {}".format(coro)) + except Exception as ex: + logger.error("Error while running coroutine {}: {}" + .format(coro.__name__, ex.__repr__())) + raise ex + if len(results) == 1: + return results[0] + return results + if coros: + what = wrapper() + else: + # if no coros supplied, then assume we run forever + what = self.runFut + return self.loop.run_until_complete(what) + + def handleSignal(self, sig=None): + # Allowing sig to be optional since asyncio not passing the signal or + # KeyboardInterrupt (Ctrl+C) + logger.info("Signal {} received, stopping looper...".format(sig)) + self.running = False + + async def shutdown(self): + """ + Shut down this Looper. + """ + logger.info("Looper shutting down now...", + extra={"cli": False}) + self.running = False + start = time.perf_counter() + await self.runFut + self.stopall() + logger.info("Looper shut down in {:.3f} seconds.". + format(time.perf_counter() - start), + extra={"cli": False}) + + def __enter__(self): + return self + + def shutdownSync(self): + self.loop.run_until_complete(self.shutdown()) + + # noinspection PyUnusedLocal + def __exit__(self, exc_type, exc_val, exc_tb): + self.shutdownSync() + + async def __aenter__(self): + return self + + # noinspection PyUnusedLocal + async def __aexit__(self, exc_type, exc, tb): + await self.shutdown() + + def startall(self): + """ + Start all the Prodables in this Looper's `prodables` + """ + for n in self.prodables: + n.start(self.loop) + + def stopall(self): + """ + Stop all the Prodables in this Looper's `prodables` + """ + for n in self.prodables: + n.stop() diff --git a/stp_core/loop/motor.py b/stp_core/loop/motor.py new file mode 100644 index 0000000000..195df753cf --- /dev/null +++ b/stp_core/loop/motor.py @@ -0,0 +1,86 @@ +from stp_core.common.log import getlogger +from stp_core.loop.looper import Prodable +from stp_core.loop.startable import Status + +logger = getlogger() + +# TODO: move it to plenum-util repo + +class Motor(Prodable): + """ + Base class for Prodable that includes status management. + Subclasses are responsible for changing status from starting to started. + """ + + def __init__(self): + """ + Motor is initialized with a status of Stopped. + """ + self._status = Status.stopped + + def get_status(self) -> Status: + """ + Return the current status + """ + return self._status + + def set_status(self, value): + """ + Set the status of the motor to the specified value if not already set. + """ + if not self._status == value: + old = self._status + self._status = value + logger.debug("{} changing status from {} to {}". + format(self, old.name, value.name)) + self._statusChanged(old, value) + + status = property(fget=get_status, fset=set_status) + + def isReady(self): + """ + Is the status in Status.ready()? + """ + return self.status in Status.ready() + + def isGoing(self): + """ + Is the status in Status.going()? + """ + return self.status in Status.going() + + def start(self, loop): + """ + Set the status to Status.starting + """ + self.status = Status.starting + + def stop(self, *args, **kwargs): + """ + Set the status to Status.stopping and also call `onStopping` + with the provided args and kwargs. + """ + if self.status in (Status.stopping, Status.stopped): + logger.info("{} is already {}".format(self, self.status.name)) + else: + self.status = Status.stopping + self.onStopping(*args, **kwargs) + self.status = Status.stopped + + def _statusChanged(self, old, new): + """ + Perform some actions based on whether this node is ready or not. + + :param old: the previous status + :param new: the current status + """ + raise NotImplementedError("{} must implement this method".format(self)) + + def onStopping(self, *args, **kwargs): + """ + A series of actions to be performed when stopping the motor. + """ + raise NotImplementedError("{} must implement this method".format(self)) + + async def prod(self, limit) -> int: + raise NotImplementedError("{} must implement this method".format(self)) diff --git a/stp_core/loop/startable.py b/stp_core/loop/startable.py new file mode 100644 index 0000000000..f3c83eebc0 --- /dev/null +++ b/stp_core/loop/startable.py @@ -0,0 +1,45 @@ +from enum import IntEnum, unique + +# TODO: move it to plenum-util repo + + +@unique +class Status(IntEnum): + """ + Status of a node. + + Members: (serial number corresponds to enum code) + + 1. stopped: instance stopped + 2. starting: looking for enough other nodes to start the instance + 3. started_hungry: instance started, but still looking for more nodes + 4. started: instance started, no longer seeking more instance nodes + 5. stopping: instance stopping + + """ + stopped = 1 + starting = 2 + started_hungry = 3 + started = 4 + stopping = 5 + + @classmethod + def going(cls): + """ + Return a tuple of starting, started_hungry and started + """ + return cls.starting, cls.started_hungry, cls.started + + @classmethod + def hungry(cls): + """ + Return a tuple of starting and started_hungry + """ + return cls.starting, cls.started_hungry + + @classmethod + def ready(cls): + """ + Return a tuple of started_hungry and started + """ + return cls.started_hungry, cls.started diff --git a/stp_core/network/__init__.py b/stp_core/network/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/stp_core/network/auth_mode.py b/stp_core/network/auth_mode.py new file mode 100644 index 0000000000..72d645a13b --- /dev/null +++ b/stp_core/network/auth_mode.py @@ -0,0 +1,10 @@ +from enum import Enum, unique + + +@unique +class AuthMode(Enum): + # a client certificate needs to be in the certificates directory + RESTRICTED = 0 + + # allow all client keys without checking + ALLOW_ANY = 2 \ No newline at end of file diff --git a/stp_core/network/exceptions.py b/stp_core/network/exceptions.py new file mode 100644 index 0000000000..669998257c --- /dev/null +++ b/stp_core/network/exceptions.py @@ -0,0 +1,53 @@ +from typing import Sequence + +class RemoteError(Exception): + def __init__(self, remote): + self.remote = remote + + +class RemoteNotFound(RemoteError): + pass + + +class DuplicateRemotes(Exception): + def __init__(self, remotes): + msg = "Found remotes {}: {}". \ + format(len(remotes), [(r.name, r.ha) for r in remotes]) + super(DuplicateRemotes, self).__init__(msg) + + +class EndpointException(Exception): + pass + + +class MissingEndpoint(EndpointException): + def __init__(self): + super().__init__('missing endpoint') + + +class InvalidEndpointIpAddress(EndpointException): + def __init__(self, endpoint): + super().__init__("invalid endpoint address: '{}'".format(endpoint)) + + +class InvalidEndpointPort(EndpointException): + def __init__(self, endpoint): + super().__init__("invalid endpoint port: '{}'".format(endpoint)) + + +class PortNotAvailable(OSError): + def __init__(self, port): + self.port = port + super().__init__("port not available: {}".format(port)) + +class PublicKeyNotFoundOnDisk(Exception): + def __init__(self, stackName, remoteName): + self.stackName = stackName + super().__init__("{} could not get {}'s public key from disk. Make sure the keys are initialized for this remote or provided explicitly." + .format(stackName, remoteName)) + +class VerKeyNotFoundOnDisk(Exception): + def __init__(self, stackName, remoteName): + self.stackName = stackName + super().__init__("{} could not get {}'s verifiction key from disk. Make sure the keys are initialized for this remote or provided explicitly." + .format(stackName, remoteName)) \ No newline at end of file diff --git a/stp_core/network/keep_in_touch.py b/stp_core/network/keep_in_touch.py new file mode 100644 index 0000000000..91ff3a420c --- /dev/null +++ b/stp_core/network/keep_in_touch.py @@ -0,0 +1,147 @@ +from abc import abstractmethod +from typing import Dict, Set + + +from stp_core.common.log import getlogger +from stp_core.ratchet import Ratchet +from stp_core.types import HA + +logger = getlogger() + + +class KITNetworkInterface: + # Keep In Touch Stack which maintains connections mentioned in + # its registry + def __init__(self, registry: Dict[str, HA]): + self.registry = registry + + self.lastcheck = {} # type: Dict[int, Tuple[int, float]] + self.ratchet = Ratchet(a=8, b=0.198, c=-4, base=8, peak=3600) + + # holds the last time we checked remotes + self.nextCheck = 0 + + @abstractmethod + def maintainConnections(self, force=False): + """ + Ensure appropriate connections. + + """ + raise NotImplementedError + + @abstractmethod + def reconcileNodeReg(self): + raise NotImplementedError + + def serviceLifecycle(self) -> None: + """ + Function that does the following activities if the node is going: + (See `Status.going`) + + - check connections (See `checkConns`) + - maintain connections (See `maintainConnections`) + """ + self.checkConns() + self.maintainConnections() + + @property + def conns(self) -> Set[str]: + """ + Get connections of this node which participate in the communication + + :return: set of names of the connected nodes + """ + return self._conns + + @conns.setter + def conns(self, value: Set[str]) -> None: + """ + Updates the connection count of this node if not already done. + """ + if not self._conns == value: + old = self._conns + self._conns = value + ins = value - old + outs = old - value + logger.debug("{}'s connections changed from {} to {}".format(self, + old, + value)) + self._connsChanged(ins, outs) + + def checkConns(self): + """ + Evaluate the connected nodes + """ + self.conns = self.connecteds + + def _connsChanged(self, ins: Set[str], outs: Set[str]) -> None: + """ + A series of operations to perform once a connection count has changed. + + - Set f to max number of failures this system can handle. + - Set status to one of started, started_hungry or starting depending on + the number of protocol instances. + - Check protocol instances. See `checkProtocolInstaces()` + + :param ins: new nodes connected + :param outs: nodes no longer connected + """ + for o in outs: + logger.info("{} disconnected from {}".format(self, o), + extra={"cli": "IMPORTANT", + "tags": ["connected"]}) + for i in ins: + logger.info("{} now connected to {}".format(self, i), + extra={"cli": "IMPORTANT", + "tags": ["connected"]}) + + # remove remotes for same ha when a connection is made + remote = self.getRemote(i) + others = [r for r in self.remotes.values() + if r.ha == remote.ha and r.name != i] + for o in others: + logger.debug("{} removing other remote".format(self)) + self.removeRemote(o) + + self.onConnsChanged(ins, outs) + + def onConnsChanged(self, ins: Set[str], outs: Set[str]): + """ + Subclasses can override + """ + pass + + def findInNodeRegByHA(self, remoteHa): + """ + Returns the name of the remote by HA if found in the node registry, else + returns None + """ + regName = [nm for nm, ha in self.registry.items() + if self.sameAddr(ha, remoteHa)] + if len(regName) > 1: + raise RuntimeError("more than one node registry entry with the " + "same ha {}: {}".format(remoteHa, regName)) + if regName: + return regName[0] + return None + + def getRemoteName(self, remote): + """ + Returns the name of the remote object if found in node registry. + + :param remote: the remote object + """ + if remote.name not in self.registry: + find = [name for name, ha in self.registry.items() + if ha == remote.ha] + assert len(find) == 1 + return find[0] + return remote.name + + @property + def notConnectedNodes(self) -> Set[str]: + """ + Returns the names of nodes in the registry this node is NOT connected + to. + """ + return set(self.registry.keys()) - self.conns diff --git a/stp_core/network/network_interface.py b/stp_core/network/network_interface.py new file mode 100644 index 0000000000..5784c79a2b --- /dev/null +++ b/stp_core/network/network_interface.py @@ -0,0 +1,216 @@ +import time +from abc import abstractmethod, ABCMeta +from typing import Set + +from stp_core.common.log import getlogger +from stp_core.network.exceptions import RemoteNotFound, DuplicateRemotes +from stp_core.types import HA + +logger = getlogger() + + +# TODO: There a number of methods related to keys management, they can be moved to some class like KeysManager +class NetworkInterface(metaclass=ABCMeta): + localips = ['127.0.0.1', '0.0.0.0'] + + @property + @abstractmethod + def remotes(self): + """ + Return all remote nodes (both connected and not) + """ + pass + + @property + @abstractmethod + def created(self): + pass + + @property + @abstractmethod + def name(self): + pass + + @staticmethod + @abstractmethod + def isRemoteConnected(r) -> bool: + """ + A node is considered to be connected if it is joined, allowed and alive + + :param r: the remote to check + """ + pass + + @staticmethod + @abstractmethod + def initLocalKeys(name, baseDir, sigseed, override=False): + pass + + @staticmethod + @abstractmethod + def initRemoteKeys(name, remoteName, baseDir, verkey, override=False): + pass + + @abstractmethod + def onHostAddressChanged(self): + pass + + @staticmethod + @abstractmethod + def areKeysSetup(name, baseDir): + pass + + @staticmethod + @abstractmethod + def learnKeysFromOthers(baseDir, name, others): + pass + + @abstractmethod + def tellKeysToOthers(self, others): + pass + + @staticmethod + @abstractmethod + def getHaFromLocal(name, basedirpath): + pass + + @abstractmethod + def removeRemote(self, r): + pass + + @abstractmethod + def transmit(self, msg, uid, timeout=None): + pass + + @abstractmethod + def start(self): + pass + + @abstractmethod + def stop(self): + pass + + @abstractmethod + def connect(self, name=None, remoteId=None, ha=None, verKeyRaw=None, + publicKeyRaw=None): + pass + + @abstractmethod + def send(self, msg, remote: str = None, ha=None): + pass + + def connectIfNotConnected(self, name=None, remoteId=None, ha=None, + verKeyRaw=None, publicKeyRaw=None): + if not self.isConnectedTo(name=name, ha=ha): + self.connect(name=name, remoteId=remoteId, ha=ha, + verKeyRaw=verKeyRaw, publicKeyRaw=publicKeyRaw) + else: + logger.debug('{} already connected {}'.format(self.name, ha)) + + # TODO: Does this serve the same purpose as `conns`, if yes then remove + @property + def connecteds(self) -> Set[str]: + """ + Return the names of the remote nodes this node is connected to. + Not all of these nodes may be used for communication + (as opposed to conns property) + """ + return {r.name for r in self.remotes.values() + if self.isRemoteConnected(r)} + + @property + def age(self): + """ + Returns the time elapsed since this stack was created + """ + return time.perf_counter() - self.created + + def isConnectedTo(self, name: str = None, ha: HA = None): + try: + remote = self.getRemote(name, ha) + except RemoteNotFound: + return False + return self.isRemoteConnected(remote) + + def getRemote(self, name: str = None, ha: HA = None): + """ + Find the remote by name or ha. + + :param name: the name of the remote to find + :param ha: host address pair the remote to find + :raises: RemoteNotFound + """ + return self.findInRemotesByName(name) if name else \ + self.findInRemotesByHA(ha) + + def findInRemotesByHA(self, remoteHa: HA): + remotes = [r for r in self.remotes.values() + if r.ha == remoteHa] + if len(remotes) > 1: + raise DuplicateRemotes(remotes) + if not remotes: + raise RemoteNotFound(remoteHa) + return remotes[0] + + def findInRemotesByName(self, name: str): + """ + Find the remote by name. + + :param name: the name of the remote to find + :raises: RemoteNotFound + """ + remotes = [r for r in self.remotes.values() + if r.name == name] + if len(remotes) > 1: + raise DuplicateRemotes(remotes) + if not remotes: + raise RemoteNotFound(name) + return remotes[0] + + def hasRemote(self, name): + try: + self.getRemote(name=name) + return True + except RemoteNotFound: + return False + + def removeRemoteByName(self, name: str) -> int: + """ + Remove the remote by name. + + :param name: the name of the remote to remove + :raises: RemoteNotFound + """ + remote = self.getRemote(name) + rid = remote.uid + self.removeRemote(remote) + return rid + + def getHa(self, name): + try: + remote = self.getRemote(name) + except RemoteNotFound: + return None + return remote.ha + + def sameAddr(self, ha, ha2) -> bool: + """ + Check whether the two arguments correspond to the same address + """ + if ha == ha2: + return True + if ha[1] != ha2[1]: + return False + return ha[0] in self.localips and ha2[0] in self.localips + + def remotesByConnected(self): + """ + Partitions the remotes into connected and disconnected + + :return: tuple(connected remotes, disconnected remotes) + """ + conns, disconns = [], [] + for r in self.remotes.values(): + array = conns if self.isRemoteConnected(r) else disconns + array.append(r) + return conns, disconns diff --git a/stp_core/network/port_dispenser.py b/stp_core/network/port_dispenser.py new file mode 100644 index 0000000000..357773c815 --- /dev/null +++ b/stp_core/network/port_dispenser.py @@ -0,0 +1,77 @@ +import os +import tempfile + +import logging +import portalocker + +from stp_core.types import HA +from stp_core.network.util import checkPortAvailable + + +class PortDispenser: + """ + This class provides a system-wide mechanism to provide a available socket + ports for testing. Tests should call getNext to get the next available port. + There is no guarantee of sequential port numbers, as other tests running + concurrently might grab a port before one process is done getting all the + ports it needs. This should pose no problem, as tests shouldn't depend on + port numbers. It leverages the filesystem lock mechanism to ensure there + are no overlaps. + """ + + maxportretries = 3 + logger = logging.getLogger() + + def __init__(self, ip: str, filename: str=None, minPort=6000, maxPort=9999): + self.ip = ip + self.FILE = filename or os.path.join(tempfile.gettempdir(), + 'stp-portmutex.{}.txt'.format(ip)) + self.minPort = minPort + self.maxPort = maxPort + self.initFile() + + def initFile(self): + if not os.path.exists(self.FILE): + with open(self.FILE, "w") as file: + file.write(str(self.minPort)) + + def get(self, count: int=1, readOnly: bool=False, recurlvl=0): + with open(self.FILE, "r+") as file: + portalocker.lock(file, portalocker.LOCK_EX) + ports = [] + while len(ports) < count: + file.seek(0) + port = int(file.readline()) + if readOnly: + return port + port += 1 + if port > self.maxPort: + port = self.minPort + file.seek(0) + file.write(str(port)) + try: + checkPortAvailable(("", port)) + ports.append(port) + self.logger.debug("new port dispensed: {}".format(port)) + except: + if recurlvl < self.maxportretries: + self.logger.debug("port {} unavailable, trying again...". + format(port)) + recurlvl += 1 + else: + self.logger.debug("port {} unavailable, max retries {} " + "reached". + format(port, self.maxportretries)) + raise + return ports + + def getNext(self, count: int=1, ip=None): + ip = ip or self.ip + has = [HA(ip, port) for port in self.get(count)] + if len(has) == 1: + return has[0] + else: + return has + + +genHa = PortDispenser("127.0.0.1").getNext diff --git a/stp_core/network/util.py b/stp_core/network/util.py new file mode 100644 index 0000000000..ac6ce3b23c --- /dev/null +++ b/stp_core/network/util.py @@ -0,0 +1,81 @@ +import logging +import socket +from collections import OrderedDict +from typing import List + +import struct + +import errno + +from stp_core.error_codes import WS_SOCKET_BIND_ERROR_ALREADY_IN_USE, \ + WS_SOCKET_BIND_ERROR_NOT_AVAILABLE +from stp_core.network.exceptions import PortNotAvailable + +import itertools + +import math +from libnacl import crypto_hash_sha256 + + +def checkPortAvailable(ha): + """Checks whether the given port is available""" + # Not sure why OS would allow binding to one type and not other. + # Checking for port available for TCP and UDP, this is done since + # either RAET (UDP) or CurveZMQ(TCP) could have been used + sockTypes = (socket.SOCK_DGRAM, socket.SOCK_STREAM) + for typ in sockTypes: + sock = socket.socket(socket.AF_INET, typ) + try: + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + sock.bind(ha) + if typ == socket.SOCK_STREAM: + l_onoff = 1 + l_linger = 0 + sock.setsockopt(socket.SOL_SOCKET, socket.SO_LINGER, + struct.pack('ii', l_onoff, l_linger)) + except OSError as exc: + if exc.errno in [ + errno.EADDRINUSE, errno.EADDRNOTAVAIL, + WS_SOCKET_BIND_ERROR_ALREADY_IN_USE, + WS_SOCKET_BIND_ERROR_NOT_AVAILABLE + ]: + raise PortNotAvailable(ha) + else: + raise exc + finally: + sock.close() + + +def evenCompare(a: str, b: str) -> bool: + """ + A deterministic but more evenly distributed comparator than simple alphabetical. + Useful when comparing consecutive strings and an even distribution is needed. + Provides an even chance of returning true as often as false + """ + ab = a.encode('utf-8') + bb = b.encode('utf-8') + ac = crypto_hash_sha256(ab) + bc = crypto_hash_sha256(bb) + return ac < bc + + +def distributedConnectionMap(names: List[str]) -> OrderedDict: + """ + Create a map where every node is connected every other node. + Assume each key in the returned dictionary to be connected to each item in + its value(list). + + :param names: a list of node names + :return: a dictionary of name -> list(name). + """ + names.sort() + combos = list(itertools.combinations(names, 2)) + maxPer = math.ceil(len(list(combos)) / len(names)) + # maxconns = math.ceil(len(names) / 2) + connmap = OrderedDict((n, []) for n in names) + for a, b in combos: + if len(connmap[a]) < maxPer: + connmap[a].append(b) + else: + connmap[b].append(a) + return connmap \ No newline at end of file diff --git a/stp_core/ratchet.py b/stp_core/ratchet.py new file mode 100644 index 0000000000..95c76c13b0 --- /dev/null +++ b/stp_core/ratchet.py @@ -0,0 +1,91 @@ +import functools +from math import exp, log + + +class Ratchet: + def __init__(self, a: float, b: float, c: float=0, + base: float=None, peak: float=None): + """ + Models an exponential curve; useful for providing the number of seconds + to wait between retries + + :param a: multiplier + :param b: exponent multiplier + :param c: offset + :param base: minimum number returned + :param peak: maximum number returned + """ + self.a = a + self.b = b + self.c = c + self.base = base + self.peak = peak + + @classmethod + def fromGoals(cls, start: float, end: float, steps: int): + b = log(end/start)/(steps-1) + return cls(a=start, b=b) + + @staticmethod + def _sumSeries(a: float, b: float, steps: int) -> float: + """ + Return value of the the following polynomial. + .. math:: + (a * e^(b*steps) - 1) / (e^b - 1) + + :param a: multiplier + :param b: exponent multiplier + :param steps: the number of steps + """ + return a * (exp(b*steps)-1) / (exp(b)-1) + + @classmethod + def fromGoalDuration(cls, start, steps, total): + return cls(a=start, b=Ratchet.goalDuration(start, steps, total)) + + @staticmethod + @functools.lru_cache() + def goalDuration(start: float, steps: int, total: float) -> float: + """ + Finds a b-value (common ratio) that satisfies a total duration within + 1 millisecond. Not terribly efficient, so using lru_cache. Don't know + a way to compute the common ratio when the sum of a finite geometric + series is known. Found myself needing to factor polynomials with an + arbitrarily + high degree. + + :param start: a-value + :param steps: how many steps + :param total: total duration of the series of n-steps + :return: b value + """ + a = start + up = None + dn = None + b = 1.0 + while True: + s = Ratchet._sumSeries(a, b, steps) - total + if abs(s) < .001: + break + elif s < 0: + dn = b + b = (up+b)/2 if up else b + 1 # halfway between b and upper if upper defined + else: + up = b + b = (dn+b)/2 if dn else b/2 + return b + + def get(self, iteration: int): + v = (self.a * exp(self.b * iteration)) + self.c + v = max(self.base, v) if self.base else v + v = min(self.peak, v) if self.peak else v + return v + + def gen(self): + i = 0 + while True: + newI = yield self.get(i) + if newI is not None: + i = newI + else: + i += 1 diff --git a/stp_core/test/__init__.py b/stp_core/test/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/stp_core/test/conftest.py b/stp_core/test/conftest.py new file mode 100644 index 0000000000..ad52479f39 --- /dev/null +++ b/stp_core/test/conftest.py @@ -0,0 +1,19 @@ +import logging +import os +import pytest + +logger = logging.getLogger() + + +basePath = os.path.dirname(os.path.abspath(__file__)) + + +@pytest.fixture(scope='module') +def tdir(tmpdir_factory): + return tmpdir_factory.mktemp('').strpath + + +@pytest.fixture(scope='function') +def tdir_for_func(tmpdir_factory): + return tmpdir_factory.mktemp('').strpath + diff --git a/stp_core/test/helper.py b/stp_core/test/helper.py new file mode 100644 index 0000000000..de5cde947c --- /dev/null +++ b/stp_core/test/helper.py @@ -0,0 +1,136 @@ +import os + +import logging + +import json +from stp_core.crypto.util import randomSeed + +from stp_core.loop.motor import Motor +from stp_core.network.keep_in_touch import KITNetworkInterface + +logger = logging.getLogger() + + +def createTempDir(tmpdir_factory, counter): + tempdir = os.path.join(tmpdir_factory.getbasetemp().strpath, + str(next(counter))) + logger.debug("module-level temporary directory: {}".format(tempdir)) + return tempdir + + +class Printer: + def __init__(self, name): + self.name = name + self.printeds = [] + + def print(self, m): + print('{} printing... {}'.format(self.name, m)) + self.printeds.append(m) + + def reset(self): + self.printeds = [] + + +def chkPrinted(p, m): + assert m in [_[0] for _ in p.printeds] + + +class CollectingMsgsHandler: + def __init__(self): + self.receivedMessages = [] + + def handler(self, m): + msg, sender = m + self.receivedMessages.append(msg) + # print("Got message", msg) + + +class CounterMsgsHandler: + def __init__(self): + self.receivedMsgCount = 0 + + def handler(self, m): + msg, sender = m + self.receivedMsgCount += 1 + + +class SMotor(Motor): + def __init__(self, stack): + Motor.__init__(self) + self.stack = stack + + async def prod(self, limit) -> int: + c = await self.stack.service(limit) + if isinstance(self.stack, KITNetworkInterface): + self.stack.serviceLifecycle() + return c + + def start(self, loop): + self.stack.start() + + def stop(self): + self.stack.stop() + + +def prepStacks(looper, *stacks, connect=True, useKeys=True): + motors = [] + for stack in stacks: + motor = SMotor(stack) + looper.add(motor) + motors.append(motor) + if connect: + connectStacks(stacks, useKeys) + looper.runFor(1) + return motors + + +def connectStacks(stacks, useKeys=True): + for stack in stacks: + for otherStack in stacks: + if stack != otherStack: + stack.connect(name=otherStack.name, ha=otherStack.ha, + verKeyRaw=otherStack.verKeyRaw if useKeys else None, + publicKeyRaw=otherStack.publicKeyRaw if useKeys else None) + + +def checkStacksConnected(stacks): + for stack in stacks: + for otherStack in stacks: + if stack != otherStack: + assert stack.isConnectedTo(otherStack.name) + + +def checkStackConnected(stack, stacks): + for other in stacks: + assert stack.isConnectedTo(other.name) + + +def checkStackDisonnected(stack, stacks): + for other in stacks: + assert not stack.isConnectedTo(other.name) + + +class MessageSender(Motor): + def __init__(self, numMsgs, fromStack, toName): + super().__init__() + self._numMsgs = numMsgs + self._fromStack = fromStack + self._toName = toName + self.sentMsgCount = 0 + + def _statusChanged(self, old, new): + pass + + def onStopping(self, *args, **kwargs): + pass + + async def prod(self, limit) -> int: + count = 0 + while self.sentMsgCount < self._numMsgs: + msg = json.dumps({'random': randomSeed().decode()}).encode() + if self._fromStack.send(msg, self._toName): + self.sentMsgCount += 1 + count += 1 + else: + break + return count diff --git a/stp_core/types.py b/stp_core/types.py new file mode 100644 index 0000000000..36ce22c4ab --- /dev/null +++ b/stp_core/types.py @@ -0,0 +1,6 @@ +from typing import NamedTuple + +Identifier = str +HA = NamedTuple("HA", [ + ("host", str), + ("port", int)]) \ No newline at end of file diff --git a/stp_raet/__init__.py b/stp_raet/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/stp_raet/rstack.py b/stp_raet/rstack.py new file mode 100644 index 0000000000..88001da066 --- /dev/null +++ b/stp_raet/rstack.py @@ -0,0 +1,709 @@ +import os +import sys +import time +from collections import Callable +from collections import OrderedDict +from typing import Any, Set +from typing import Dict + +from raet.raeting import AutoMode, TrnsKind +from raet.road.estating import RemoteEstate +from raet.road.keeping import RoadKeep +from raet.road.stacking import RoadStack +from raet.road.transacting import Joiner, Allower, Messenger +from stp_core.common.error import error +from stp_core.common.log import getlogger +from stp_core.crypto.nacl_wrappers import Signer + +from stp_core.crypto.util import ed25519SkToCurve25519, \ + getEd25519AndCurve25519Keys, ed25519PkToCurve25519 +from stp_core.network.auth_mode import AuthMode +from stp_core.network.keep_in_touch import KITNetworkInterface +from stp_core.network.network_interface import NetworkInterface +from stp_core.network.util import checkPortAvailable, distributedConnectionMap +from stp_core.types import HA +from stp_raet.util import getLocalKeep, getLocalEstateData + +logger = getlogger() + +# this overrides the defaults +Joiner.RedoTimeoutMin = 1.0 +Joiner.RedoTimeoutMax = 10.0 + +Allower.RedoTimeoutMin = 1.0 +Allower.RedoTimeoutMax = 10.0 + +Messenger.RedoTimeoutMin = 1.0 +Messenger.RedoTimeoutMax = 10.0 + + +class RStack(NetworkInterface): + def __init__(self, *args, **kwargs): + checkPortAvailable(kwargs['ha']) + basedirpath = kwargs.get('basedirpath') + + authMode = kwargs.pop('auth_mode', None) + kwargs['auto'] = self._getAuto(authMode) + + keep = RoadKeep(basedirpath=basedirpath, + stackname=kwargs['name'], + auto=kwargs.get('auto'), + baseroledirpath=basedirpath) # type: RoadKeep + kwargs['keep'] = keep + localRoleData = keep.loadLocalRoleData() + + sighex = kwargs.pop('sighex', None) or localRoleData['sighex'] + if not sighex: + (sighex, _), (prihex, _) = getEd25519AndCurve25519Keys() + else: + prihex = ed25519SkToCurve25519(sighex, toHex=True) + kwargs['sigkey'] = sighex + kwargs['prikey'] = prihex + + self.msgHandler = kwargs.pop('msgHandler', None) # type: Callable + # if no timeout is set then message will never timeout + self.messageTimeout = kwargs.pop('messageTimeout', 0) + + self.raetStack = RoadStack(*args, **kwargs) + + if self.ha[1] != kwargs['ha'].port: + error("the stack port number has changed, likely due to " + "information in the keep. {} passed {}, actual {}". + format(kwargs['name'], kwargs['ha'].port, self.ha[1])) + self._created = time.perf_counter() + self.coro = None + + self._conns = set() # type: Set[str] + + def _getAuto(self, authMode): + if authMode == AuthMode.ALLOW_ANY.value: + return AutoMode.always + if authMode == AuthMode.RESTRICTED.value: + return AutoMode.never + return None + + def __repr__(self): + return self.name + + @property + def name(self): + return self.raetStack.name + + @property + def remotes(self): + return self.raetStack.remotes + + @property + def created(self): + return self._created + + @property + def rxMsgs(self): + return self.raetStack.rxMsgs + + @staticmethod + def isRemoteConnected(r) -> bool: + """ + A node is considered to be connected if it is joined, allowed and alived. + + :param r: the remote to check + """ + return r.joined and r.allowed and r.alived + + @staticmethod + def initLocalKeys(name, baseDir, sigseed, override=False): + """ + Initialize RAET local keep. Write local role data to file. + + :param name: name of the node + :param baseDir: base directory + :param pkseed: seed to generate public and private key pair + :param sigseed: seed to generate signing and verification key pair + :param override: overwrite the local role.json file if already exists + :return: tuple(public key, verification key) + """ + rolePath = os.path.join(baseDir, name, "role", "local", "role.json") + if os.path.isfile(rolePath): + if not override: + raise FileExistsError("Keys exists for local role {}".format(name)) + + if sigseed and not isinstance(sigseed, bytes): + sigseed = sigseed.encode() + + signer = Signer(sigseed) + keep = RoadKeep(stackname=name, baseroledirpath=baseDir) + sigkey, verkey = signer.keyhex, signer.verhex + prikey, pubkey = ed25519SkToCurve25519(sigkey, toHex=True), \ + ed25519PkToCurve25519(verkey, toHex=True) + data = OrderedDict([ + ("role", name), + ("prihex", prikey), + ("sighex", sigkey) + ]) + keep.dumpLocalRoleData(data) + return pubkey.decode(), verkey.decode() + + @staticmethod + def initRemoteKeys(name, remoteName, baseDir, verkey, override=False): + """ + Initialize RAET remote keep + + :param name: name of the node + :param remoteName: name of the remote to store keys for + :param baseDir: base directory + :param pubkey: public key of the remote + :param verkey: private key of the remote + :param override: overwrite the role.remoteName.json file if it already + exists. + """ + rolePath = os.path.join(baseDir, name, "role", "remote", "role.{}.json". + format(remoteName)) + if os.path.isfile(rolePath): + if not override: + raise FileExistsError("Keys exists for remote role {}". + format(remoteName)) + + keep = RoadKeep(stackname=name, baseroledirpath=baseDir) + data = OrderedDict([ + ('role', remoteName), + ('acceptance', 1), + ('pubhex', ed25519PkToCurve25519(verkey, toHex=True)), + ('verhex', verkey) + ]) + keep.dumpRemoteRoleData(data, role=remoteName) + + def onHostAddressChanged(self): + logger.debug("{} clearing local data in keep as host address changed". + format(self.name)) + self.raetStack.keep.clearLocalData() + + @staticmethod + def areKeysSetup(name, baseDir): + """ + Check that the local RAET keep has the values of role, sighex and prihex + populated for the given node + + :param name: the name of the node to check the keys for + :param baseDir: base directory of Plenum + :return: whether the keys are setup + """ + localRoleData = getLocalKeep(name=name, baseDir=baseDir) + + for key in ['role', 'sighex', 'prihex']: + if localRoleData.get(key) is None: + return False + return True + + @staticmethod + def learnKeysFromOthers(baseDir, name, others): + pass + + @staticmethod + def getHaFromLocal(name, basedirpath): + localEstate = getLocalEstateData(name, basedirpath) + if localEstate: + return localEstate.get("ha") + + def tellKeysToOthers(self, others): + pass + + def getRemote(self, name: str = None, ha: HA = None): + """ + Find the remote by name or ha. + + :param name: the name of the remote to find + :param ha: host address pair the remote to find + :raises: RemoteNotFound + """ + return self.findInRemotesByHA(ha) if ha else \ + self.findInRemotesByName(name) + + def connect(self, name=None, remoteId=None, ha=None, verKeyRaw=None, publicKeyRaw=None): + """ + Connect to the node specified by name. + + :param name: name of the node to connect to + :type name: str or (HA, tuple) + :return: the uid of the remote estate, or None if a connect is not + attempted + """ + # if not self.isKeySharing: + # logger.debug("{} skipping join with {} because not key sharing". + # format(self, name)) + # return None + if not remoteId and not ha: + raise ValueError('Either Host Address or Remote ID must be provided to connect to a node in Raet stack') + + if remoteId: + remote = self.remotes[remoteId] + return self._doConnectRemote(remote, name) + else: + return self._doConnectByHA(ha, name) + + + def _doConnectByHA(self, ha, name=None): + remote = RemoteEstate(stack=self.raetStack, + ha=ha) + self.raetStack.addRemote(remote) + return self._doConnectRemote(remote, name) + + def _doConnectRemote(self, remote, name=None): + # updates the store time so the join timer is accurate + self.updateStamp() + self.raetStack.join(uid=remote.uid, cascade=True, timeout=30) + logger.info("{} looking for {} at {}:{}". + format(self, name or remote.name, *remote.ha), + extra={"cli": "PLAIN", "tags": ["node-looking"]}) + return remote.uid + + def removeRemote(self, r): + self.raetStack.removeRemote(r) + + def transmit(self, msg, uid, timeout=None): + self.raetStack.transmit(msg, uid, timeout=timeout) + + @property + def ha(self): + return self.raetStack.ha + + def start(self): + if not self.opened: + self.open() + logger.info("stack {} starting at {} in {} mode" + .format(self, self.ha, self.raetStack.keep.auto), + extra={"cli": False}) + # self.coro = self._raetcoro() + self.coro = self._raetcoro + + def stop(self): + if self.opened: + self.close() + self.coro = None + logger.info("stack {} stopped".format(self.name), extra={"cli": False}) + + async def service(self, limit=None) -> int: + """ + Service `limit` number of received messages in this stack. + + :param limit: the maximum number of messages to be processed. If None, + processes all of the messages in rxMsgs. + :return: the number of messages processed. + """ + pracLimit = limit if limit else sys.maxsize + if self.coro: + # x = next(self.coro) + x = await self.coro() + if x > 0: + for x in range(pracLimit): + try: + self.msgHandler(self.raetStack.rxMsgs.popleft()) + except IndexError: + break + return x + else: + logger.debug("{} is stopped".format(self)) + return 0 + + # def _raetcoro(self): + # """ + # Generator to service all messages. + # Yields the length of rxMsgs queue of this stack. + # """ + # while True: + # try: + # self._serviceStack(self.age) + # l = len(self.rxMsgs) + # except Exception as ex: + # if isinstance(ex, OSError) and \ + # len(ex.args) > 0 and \ + # ex.args[0] == 22: + # logger.error("Error servicing stack {}: {}. This could be " + # "due to binding to an internal network " + # "and trying to route to an external one.". + # format(self.name, ex), extra={'cli': 'WARNING'}) + # else: + # logger.error("Error servicing stack {}: {} {}". + # format(self.name, ex, ex.args), + # extra={'cli': 'WARNING'}) + # + # l = 0 + # yield l + + async def _raetcoro(self): + try: + await self._serviceStack(self.age) + l = len(self.raetStack.rxMsgs) + except Exception as ex: + if isinstance(ex, OSError) and \ + len(ex.args) > 0 and \ + ex.args[0] == 22: + logger.error("Error servicing stack {}: {}. This could be " + "due to binding to an internal network " + "and trying to route to an external one.". + format(self.name, ex), extra={'cli': 'WARNING'}) + else: + logger.error("Error servicing stack {}: {} {}". + format(self.name, ex, ex.args), + extra={'cli': 'WARNING'}) + + l = 0 + return l + + async def _serviceStack(self, age): + """ + Update stacks clock and service all tx and rx messages. + + :param age: update timestamp of this RoadStack to this value + """ + self.updateStamp(age) + self.raetStack.serviceAll() + + def updateStamp(self, age=None): + """ + Change the timestamp of this stack's test store. + + :param age: the timestamp will be set to this value + """ + self.raetStack.store.changeStamp(age if age else self.age) + + @property + def opened(self): + return self.raetStack.server.opened + + def open(self): + """ + Open the UDP socket of this stack's server. + """ + self.raetStack.server.open() # close the UDP socket + + def close(self): + """ + Close the UDP socket of this stack's server. + """ + self.raetStack.server.close() # close the UDP socket + + @property + def isKeySharing(self): + return self.raetStack.keep.auto != AutoMode.never + + @property + def verhex(self): + return self.raetStack.local.signer.verhex + + @property + def keyhex(self): + return self.raetStack.local.signer.keyhex + + @property + def pubhex(self): + return self.raetStack.local.priver.pubhex + + @property + def prihex(self): + return self.raetStack.local.priver.keyhex + + def send(self, msg: Any, remoteName: str, ha=None): + """ + Transmit the specified message to the remote specified by `remoteName`. + + :param msg: a message + :param remoteName: the name of the remote + """ + rid = self.getRemote(remoteName, ha).uid + # Setting timeout to never expire + self.raetStack.transmit(msg, rid, timeout=self.messageTimeout) + + +class SimpleRStack(RStack): + def __init__(self, stackParams: Dict, msgHandler: Callable, sighex: str=None): + self.stackParams = stackParams + self.msgHandler = msgHandler + super().__init__(**stackParams, msgHandler=self.msgHandler, sighex=sighex) + + def start(self): + super().start() + + + +class KITRStack(SimpleRStack, KITNetworkInterface): + # Keep In Touch RStack. RStack which maintains connections mentioned in + # its registry + def __init__(self, stackParams: dict, msgHandler: Callable, + registry: Dict[str, HA], sighex: str=None): + SimpleRStack.__init__(self, stackParams, msgHandler, sighex) + KITNetworkInterface.__init__(self, registry=registry) + + # courteous bi-directional joins + self.connectNicelyUntil = None + + self.reconnectToMissingIn = 6 + self.reconnectToDisconnectedIn = 6 + + def maintainConnections(self, force=False): + """ + Ensure appropriate connections. + + """ + cur = time.perf_counter() + if cur > self.nextCheck or force: + + self.nextCheck = cur + (6 if self.isKeySharing else 15) + # check again in 15 seconds, + # unless sooner because of retries below + + conns, disconns = self.remotesByConnected() + + for disconn in disconns: + self.handleDisconnectedRemote(cur, disconn) + + # remove items that have been connected + for connected in conns: + self.lastcheck.pop(connected.uid, None) + + self.connectToMissing(cur) + + logger.debug("{} next check for retries in {:.2f} seconds". + format(self, self.nextCheck - cur)) + return True + return False + + def reconcileNodeReg(self): + """ + Handle remotes missing from the node registry and clean up old remotes + no longer in this node's registry. + + 1. nice bootstrap + 2. force bootstrap + 3. retry connections + + 1. not in remotes + 2. in remotes, not joined, not allowed, not join in process + 3. in remotes, not joined, not allowed, join in process + 4. in remotes, joined, not allowed, not allow in process + 5. in remotes, joined, not allowed, allow in process + 6. in remotes, joined, allowed, + + :return: the missing remotes + """ + matches = set() # good matches found in nodestack remotes + legacy = set() # old remotes that are no longer in registry + conflicts = set() # matches found, but the ha conflicts + logger.debug("{} nodereg is {}". + format(self, self.registry.items())) + logger.debug("{} remotes are {}". + format(self, [r.name for r in self.remotes.values()])) + + for r in self.remotes.values(): + if r.name in self.registry: + if self.sameAddr(r.ha, self.registry[r.name]): + matches.add(r.name) + logger.debug("{} matched remote is {} {}". + format(self, r.uid, r.ha)) + else: + conflicts.add((r.name, r.ha)) + # error("{} ha for {} doesn't match. ha of remote is {} but " + # "should be {}". + # format(self, r.name, r.ha, self.registry[r.name])) + logger.error("{} ha for {} doesn't match. ha of remote is {} but " + "should be {}". + format(self, r.name, r.ha, self.registry[r.name])) + else: + regName = self.findInNodeRegByHA(r.ha) + + # This change fixes test + # `testNodeConnectionAfterKeysharingRestarted` in + # `test_node_connection` + # regName = [nm for nm, ha in self.nodeReg.items() if ha == + # r.ha and (r.joined or r.joinInProcess())] + logger.debug("{} unmatched remote is {} {}". + format(self, r.uid, r.ha)) + if regName: + logger.debug("{} forgiving name mismatch for {} with same " + "ha {} using another name {}". + format(self, regName, r.ha, r.name)) + matches.add(regName) + else: + logger.debug("{} found a legacy remote {} " + "without a matching ha {}". + format(self, r.name, r.ha)) + logger.info(str(self.registry)) + legacy.add(r) + + # missing from remotes... need to connect + missing = set(self.registry.keys()) - matches + + if len(missing) + len(matches) + len(conflicts) != len(self.registry): + logger.error("Error reconciling nodeReg with remotes") + logger.error("missing: {}".format(missing)) + logger.error("matches: {}".format(matches)) + logger.error("conflicts: {}".format(conflicts)) + logger.error("nodeReg: {}".format(self.registry.keys())) + logger.error("Error reconciling nodeReg with remotes; see logs") + + if conflicts: + logger.error("found conflicting address information {} in registry" + .format(conflicts)) + if legacy: + for l in legacy: + logger.error("{} found legacy entry [{}, {}] in remotes, " + "that were not in registry". + format(self, l.name, l.ha)) + self.removeRemote(l) + return missing + + def start(self): + super().start() + if self.name in self.registry: + # remove this node's registration from the Registry + # (no need to connect to itself) + del self.registry[self.name] + + + def addRemote(self, remote, dump=False): + if not self.findInNodeRegByHA(remote.ha): + logger.debug('Remote {} with HA {} not added -> not found in registry'.format(remote.name, remote.ha)) + return + return self.raetStack.addRemote(remote, dump) + + def createRemote(self, ha): + if ha and not self.findInNodeRegByHA(ha): + logger.debug('Remote with HA {} not added -> not found in registry'.format(ha)) + return + return super(KITRStack, self).createRemote(ha) + + def processRx(self, packet): + # Override to add check that in case of join new remote is in registry. This is done to avoid creation + # of unnecessary JSON files for remotes + tk = packet.data['tk'] + + if tk in [TrnsKind.join]: # join transaction + sha = (packet.data['sh'], packet.data['sp']) + if not self.findInNodeRegByHA(sha): + return self.handleJoinFromUnregisteredRemote(sha) + + return super(KITRStack, self).processRx(packet) + + def handleJoinFromUnregisteredRemote(self, sha): + logger.debug('Remote with HA {} not added -> not found in registry'.format(sha)) + return None + + + def connectToMissing(self, currentTime): + """ + Try to connect to the missing node within the time specified by + `reconnectToMissingIn` + + :param currentTime: the current time + """ + missing = self.reconcileNodeReg() + if missing: + logger.debug("{} found the following missing connections: {}". + format(self, ", ".join(missing))) + if self.connectNicelyUntil is None: + self.connectNicelyUntil = \ + currentTime + self.reconnectToMissingIn + if currentTime <= self.connectNicelyUntil: + names = list(self.registry.keys()) + names.append(self.name) + nices = set(distributedConnectionMap(names)[self.name]) + for name in nices: + logger.debug("{} being nice and waiting for {} to join". + format(self, name)) + missing = missing.difference(nices) + + for name in missing: + self.connect(name) + + def handleDisconnectedRemote(self, cur, disconn): + """ + + :param disconn: disconnected remote + """ + + # if disconn.main: + # logger.trace("{} remote {} is main, so skipping". + # format(self, disconn.uid)) + # return + + logger.trace("{} handling disconnected remote {}".format(self, disconn)) + + if disconn.joinInProcess(): + logger.trace("{} join already in process, so " + "waiting to check for reconnects". + format(self)) + self.nextCheck = min(self.nextCheck, + cur + self.reconnectToDisconnectedIn) + return + + if disconn.allowInProcess(): + logger.trace("{} allow already in process, so " + "waiting to check for reconnects". + format(self)) + self.nextCheck = min(self.nextCheck, + cur + self.reconnectToDisconnectedIn) + return + + if disconn.name not in self.registry: + # TODO this is almost identical to line 615; make sure we refactor + regName = self.findInNodeRegByHA(disconn.ha) + if regName: + logger.debug("{} forgiving name mismatch for {} with same " + "ha {} using another name {}". + format(self, regName, disconn.ha, disconn.name)) + else: + logger.debug("{} skipping reconnect on {} because " + "it's not found in the registry". + format(self, disconn.name)) + return + count, last = self.lastcheck.get(disconn.uid, (0, 0)) + dname = self.getRemoteName(disconn) + # TODO come back to ratcheting retries + # secsSinceLastCheck = cur - last + # secsToWait = self.ratchet.get(count) + # secsToWaitNext = self.ratchet.get(count + 1) + # if secsSinceLastCheck > secsToWait: + # extra = "" if not last else "; needed to wait at least {} and " \ + # "waited {} (next try will be {} " \ + # "seconds)".format(round(secsToWait, 2), + # round(secsSinceLastCheck, 2), + # round(secsToWaitNext, 2))) + + logger.debug("{} retrying to connect with {}". + format(self, dname)) + self.lastcheck[disconn.uid] = count + 1, cur + # self.nextCheck = min(self.nextCheck, + # cur + secsToWaitNext) + if disconn.joinInProcess(): + logger.debug("waiting, because join is already in " + "progress") + elif disconn.joined: + self.updateStamp() + self.allow(uid=disconn.uid, cascade=True, timeout=20) + logger.debug("{} disconnected node {} is joined".format( + self, disconn.name), extra={"cli": "STATUS"}) + else: + self.connect(dname, remoteId=disconn.uid) + + + def connect(self, name=None, remoteId=None, ha=None, verKeyRaw=None, publicKeyRaw=None): + """ + Connect to the node specified by name. + + :param name: name of the node to connect to + :type name: str or (HA, tuple) + :return: the uid of the remote estate, or None if a connect is not + attempted + """ + if not remoteId and not ha and not name: + raise ValueError( + 'Either Host Address, Name or Remote ID must be provided to connect to a node in Raet stack') + + if remoteId: + remote = self.remotes[remoteId] + return self._doConnectRemote(remote, name) + elif ha: + return self._doConnectByHA(ha, name) + else: + ha = self.registry[name] + return self._doConnectByHA(ha, name) + diff --git a/stp_raet/test/__init__.py b/stp_raet/test/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/stp_raet/test/conftest.py b/stp_raet/test/conftest.py new file mode 100644 index 0000000000..f7c32fc159 --- /dev/null +++ b/stp_raet/test/conftest.py @@ -0,0 +1,41 @@ +import asyncio + +from stp_core.common.temp_file_util import SafeTemporaryDirectory +from stp_core.loop.looper import Looper +from stp_core.network.port_dispenser import genHa +from stp_core.test.conftest import * + + +@pytest.fixture() +def registry(): + return { + 'Alpha': genHa(), + 'Beta': genHa(), + 'Gamma': genHa(), + 'Delta': genHa() + } + + +@pytest.fixture() +def loop(): + loop = asyncio.get_event_loop() + loop.set_debug(True) + + +@pytest.yield_fixture() +def tdirAndLooper(loop): + asyncio.set_event_loop(loop) + + with SafeTemporaryDirectory() as td: + with Looper(loop=loop, debug=True) as looper: + yield td, looper + + +@pytest.fixture() +def tdir(tdirAndLooper): + return tdirAndLooper[0] + + +@pytest.fixture() +def looper(tdirAndLooper): + return tdirAndLooper[1] \ No newline at end of file diff --git a/stp_raet/test/helper.py b/stp_raet/test/helper.py new file mode 100644 index 0000000000..5733e38b71 --- /dev/null +++ b/stp_raet/test/helper.py @@ -0,0 +1,55 @@ +import time +from typing import NamedTuple +from typing import Optional + +from raet.raeting import TrnsKind, PcktKind + + +RaetDelay = NamedTuple("RaetDelay", [ + ("tk", Optional[TrnsKind]), + ("pk", Optional[PcktKind]), + ("fromPort", Optional[int])]) + + +def handshake(*stacks): + svc(stacks) + print("Finished Handshake\n") + + +def svc(stacks): + while True: + for stack in stacks: + stack.serviceAll() + stack.store.advanceStamp(0.1) + if all([not stack.transactions for stack in stacks]): + break + time.sleep(.1) + + +def cleanup(*stacks): + for stack in stacks: + stack.server.close() # close the UDP socket + stack.keep.clearAllDir() # clear persisted data + print("Finished\n") + + +def sendMsgs(frm, to, toRemote): + stacks = [frm, to] + msg = {'subject': 'Example message {} to {}'.format(frm.name, to.name), + 'content': 'test'} + frm.transmit(msg, toRemote.uid) + svc(stacks) + rx = to.rxMsgs.popleft() + print("{0}\n".format(rx)) + print("Finished Message {} to {}\n".format(frm.name, to.name)) + msg = {'subject': 'Example message {} to {}'.format(to.name, frm.name), + 'content': 'Another test.'} + to.transmit(msg, toRemote.uid) + svc(stacks) + rx = frm.rxMsgs.popleft() + print("{0}\n".format(rx)) + print("Finished Message {} to {}\n".format(to.name, frm.name)) + + +def getRemote(stack, name): + return next(r for r in stack.remotes.values() if r.name == name) diff --git a/stp_raet/test/stack_message_loss.py b/stp_raet/test/stack_message_loss.py new file mode 100644 index 0000000000..c7097792d5 --- /dev/null +++ b/stp_raet/test/stack_message_loss.py @@ -0,0 +1,83 @@ +import raet +import time +from raet.raeting import AutoMode + + +def example2(): + alpha = raet.road.stacking.RoadStack(name='alpha', + ha=('0.0.0.0', 7531), + auto=AutoMode.always) + + beta = raet.road.stacking.RoadStack(name='beta', + ha=('0.0.0.0', 7532), + main=True, + auto=AutoMode.always) + + remote = raet.road.estating.RemoteEstate(stack=alpha, + ha=beta.ha) + + alpha.addRemote(remote) + + alpha.join(uid=remote.uid, cascade=True) + + stacks = [alpha, beta] + while True: + for stack in stacks: + stack.serviceAll() + stack.store.advanceStamp(0.1) + if all([not stack.transactions for stack in stacks]): + break + time.sleep(0.1) + + print("Finished Handshake\n") + + msg = {'subject': 'Example message alpha to beta', + 'content': 'The dict keys in this dict are not special any dict will do.',} + + alpha.transmit(msg, remote.uid) + while True: + for stack in stacks: + stack.serviceAll() + stack.store.advanceStamp(0.1) + if all([not stack.transactions for stack in stacks]): + break + time.sleep(0.1) + + rx = beta.rxMsgs.popleft() + print("{0}\n".format(rx)) + print("Finished Message alpha to beta\n") + + msg = {'subject': 'Example message beta to alpha', + 'content': 'Messages are the core of raet.',} + + beta.transmit(msg, remote.uid) + while True: + for stack in stacks: + stack.serviceAll() + stack.store.advanceStamp(0.1) + if all([not stack.transactions for stack in stacks]): + break + time.sleep(0.1) + + rx = alpha.rxMsgs.popleft() + print("{0}\n".format(rx)) + print("Finished Message beta to alpha\n") + + beta.server.close() + for i in range(10): + alpha.transmit(msg, remote.uid) + time.sleep(3) + for i in range(500): + alpha.serviceAll() + alpha.store.advanceStamp(0.1) + if all([not stack.transactions for stack in stacks]): + break + time.sleep(0.1) + + for stack in stacks: + stack.server.close() # close the UDP socket + stack.keep.clearAllDir() # clear persisted data + + print("Finished\n") + +example2() diff --git a/stp_raet/test/test_communication.py b/stp_raet/test/test_communication.py new file mode 100644 index 0000000000..f6e5c99d71 --- /dev/null +++ b/stp_raet/test/test_communication.py @@ -0,0 +1,197 @@ +from ioflo.base.consoling import getConsole +from stp_core.crypto.nacl_wrappers import Signer as NaclSigner, Privateer +from raet.raeting import AutoMode, Acceptance +from raet.road.estating import RemoteEstate +from raet.road.stacking import RoadStack + +from stp_raet.test.helper import handshake, sendMsgs, cleanup, getRemote +from stp_core.common.log import getlogger +from stp_core.network.port_dispenser import genHa + +logger = getlogger() + + +def testPromiscuousConnection(tdir): + alpha = RoadStack(name='alpha', + ha=genHa(), + auto=AutoMode.always, + basedirpath=tdir) + + beta = RoadStack(name='beta', + ha=genHa(), + main=True, + auto=AutoMode.always, + basedirpath=tdir) + + try: + betaRemote = RemoteEstate(stack=alpha, ha=beta.ha) + alpha.addRemote(betaRemote) + + alpha.join(uid=betaRemote.uid, cascade=True) + + handshake(alpha, beta) + + sendMsgs(alpha, beta, betaRemote) + finally: + cleanup(alpha, beta) + + +def testRaetPreSharedKeysPromiscous(tdir): + alphaSigner = NaclSigner() + betaSigner = NaclSigner() + + logger.debug("Alpha's verkey {}".format(alphaSigner.verhex)) + logger.debug("Beta's verkey {}".format(betaSigner.verhex)) + + alpha = RoadStack(name='alpha', + ha=genHa(), + sigkey=alphaSigner.keyhex, + auto=AutoMode.always, + basedirpath=tdir) + + beta = RoadStack(name='beta', + ha=genHa(), + sigkey=betaSigner.keyhex, + main=True, + auto=AutoMode.always, + basedirpath=tdir) + + try: + + betaRemote = RemoteEstate(stack=alpha, ha=beta.ha, + verkey=betaSigner.verhex) + + alpha.addRemote(betaRemote) + + alpha.allow(uid=betaRemote.uid, cascade=True) + + handshake(alpha, beta) + + sendMsgs(alpha, beta, betaRemote) + + finally: + cleanup(alpha, beta) + + +def testRaetPreSharedKeysNonPromiscous(tdir): + alphaSigner = NaclSigner() + betaSigner = NaclSigner() + + alphaPrivateer = Privateer() + betaPrivateer = Privateer() + + logger.debug("Alpha's verkey {}".format(alphaSigner.verhex)) + logger.debug("Beta's verkey {}".format(betaSigner.verhex)) + + alpha = RoadStack(name='alpha', + ha=genHa(), + sigkey=alphaSigner.keyhex, + prikey=alphaPrivateer.keyhex, + auto=AutoMode.never, + basedirpath=tdir) + + beta = RoadStack(name='beta', + ha=genHa(), + sigkey=betaSigner.keyhex, + prikey=betaPrivateer.keyhex, + main=True, + auto=AutoMode.never, + basedirpath=tdir) + + alpha.keep.dumpRemoteRoleData({ + "acceptance": Acceptance.accepted.value, + "verhex": betaSigner.verhex, + "pubhex": betaPrivateer.pubhex + }, "beta") + + beta.keep.dumpRemoteRoleData({ + "acceptance": Acceptance.accepted.value, + "verhex": alphaSigner.verhex, + "pubhex": alphaPrivateer.pubhex + }, "alpha") + + try: + + betaRemote = RemoteEstate(stack=alpha, ha=beta.ha) + + alpha.addRemote(betaRemote) + + alpha.allow(uid=betaRemote.uid, cascade=True) + + handshake(alpha, beta) + + sendMsgs(alpha, beta, betaRemote) + finally: + cleanup(alpha, beta) + + +def testConnectionWithHaChanged(tdir): + console = getConsole() + console.reinit(verbosity=console.Wordage.verbose) + + alphaSigner = NaclSigner() + betaSigner = NaclSigner() + + alphaPrivateer = Privateer() + betaPrivateer = Privateer() + + logger.debug("Alpha's verkey {}".format(alphaSigner.verhex)) + logger.debug("Beta's verkey {}".format(betaSigner.verhex)) + + alpha = None + + def setupAlpha(ha): + nonlocal alpha + alpha = RoadStack(name='alpha', + ha=ha, + sigkey=alphaSigner.keyhex, + prikey=alphaPrivateer.keyhex, + auto=AutoMode.never, + basedirpath=tdir) + + alpha.keep.dumpRemoteRoleData({ + "acceptance": Acceptance.accepted.value, + "verhex": betaSigner.verhex, + "pubhex": betaPrivateer.pubhex + }, "beta") + + oldHa = genHa() + setupAlpha(oldHa) + + beta = RoadStack(name='beta', + ha=genHa(), + sigkey=betaSigner.keyhex, + prikey=betaPrivateer.keyhex, + main=True, + auto=AutoMode.never, + basedirpath=tdir, mutable=True) + + beta.keep.dumpRemoteRoleData({ + "acceptance": Acceptance.accepted.value, + "verhex": alphaSigner.verhex, + "pubhex": alphaPrivateer.pubhex + }, "alpha") + + try: + betaRemote = RemoteEstate(stack=alpha, ha=beta.ha) + alpha.addRemote(betaRemote) + alpha.join(uid=betaRemote.uid, cascade=True) + handshake(alpha, beta) + sendMsgs(alpha, beta, betaRemote) + logger.debug("beta knows alpha as {}". + format(getRemote(beta, "alpha").ha)) + cleanup(alpha) + + newHa = genHa() + logger.debug("alpha changing ha to {}".format(newHa)) + + setupAlpha(newHa) + betaRemote = RemoteEstate(stack=alpha, ha=beta.ha) + alpha.addRemote(betaRemote) + alpha.join(uid=betaRemote.uid, cascade=True) + handshake(alpha, beta) + sendMsgs(alpha, beta, betaRemote) + logger.debug("beta knows alpha as {}". + format(getRemote(beta, "alpha").ha)) + finally: + cleanup(alpha, beta) diff --git a/stp_raet/test/test_kitrstack.py b/stp_raet/test/test_kitrstack.py new file mode 100644 index 0000000000..8712199cab --- /dev/null +++ b/stp_raet/test/test_kitrstack.py @@ -0,0 +1,52 @@ +from copy import copy + +import pytest +from stp_core.loop.eventually import eventually +from stp_core.network.auth_mode import AuthMode +from stp_raet.rstack import KITRStack +from stp_core.test.helper import Printer, prepStacks, checkStacksConnected, chkPrinted + +@pytest.fixture() +def printers(registry): + printersDict = {} + for name, ha in registry.items(): + printersDict[name] = Printer(name) + return printersDict + +@pytest.fixture() +def stacks(registry, tdir, looper, printers): + rstacks = [] + for name, ha in registry.items(): + printer = printers[name] + stackParams = { + "name": name, + "ha": ha, + "auth_mode": AuthMode.ALLOW_ANY.value, + "main": True, + "mutable": "mutable", + "messageTimeout": 30, + "basedirpath": tdir + } + reg = copy(registry) + reg.pop(name) + stack = KITRStack(stackParams, printer.print, reg) + rstacks.append(stack) + prepStacks(looper, *rstacks, connect=True, useKeys=False) + return rstacks + + +def testKitRStacksConnected(looper, stacks): + looper.run(eventually(checkStacksConnected, stacks, retryWait=1, + timeout=10)) + + +def testKitRStacksSendMesages(looper, stacks, printers): + looper.run(eventually(checkStacksConnected, stacks, retryWait=1, + timeout=10)) + + stacks[0].send({'greetings': 'hi'}, stacks[1].name) + + looper.run(eventually(chkPrinted, printers[stacks[1].name], {'greetings': 'hi'})) + + + diff --git a/stp_raet/test/test_raet_comm_with_one_key.py b/stp_raet/test/test_raet_comm_with_one_key.py new file mode 100644 index 0000000000..4525fb2fb9 --- /dev/null +++ b/stp_raet/test/test_raet_comm_with_one_key.py @@ -0,0 +1,104 @@ +from binascii import hexlify + +import pytest +from raet.raeting import AutoMode, Acceptance +from raet.road.estating import RemoteEstate +from raet.road.stacking import RoadStack + +from stp_raet.test.helper import handshake, sendMsgs, cleanup +from stp_core.crypto.nacl_wrappers import Signer +from stp_core.crypto.util import ed25519SkToCurve25519, ed25519PkToCurve25519 +from stp_core.network.port_dispenser import genHa + + +@pytest.fixture(scope="module") +def keysAndNames(): + alphaSigner = Signer() + betaSigner = Signer() + alphaPrikey = ed25519SkToCurve25519(alphaSigner.keyraw) + betaPrikey = ed25519SkToCurve25519(betaSigner.keyraw) + alphaPubkey = ed25519PkToCurve25519(alphaSigner.verraw) + betaPubkey = ed25519PkToCurve25519(betaSigner.verraw) + alphaName = 'alpha' + betaName = 'beta' + return alphaSigner.keyhex, alphaPrikey, alphaSigner.verhex, alphaPubkey, \ + alphaName, betaSigner.keyhex, betaPrikey, betaSigner.verhex, \ + betaPubkey, betaName + + +def testNonPromiscousConnectionWithOneKey(tdir, keysAndNames): + # Simulating node to node connection + alphaSighex, alphaPrikey, alphaVerhex, alphaPubkey, alphaName, betaSighex,\ + betaPrikey, betaVerhex, betaPubkey, betaName = keysAndNames + alpha = RoadStack(name=alphaName, + ha=genHa(), + sigkey=alphaSighex, + prikey=hexlify(alphaPrikey), + auto=AutoMode.never, + basedirpath=tdir) + + beta = RoadStack(name=betaName, + ha=genHa(), + sigkey=betaSighex, + prikey=hexlify(betaPrikey), + main=True, + auto=AutoMode.never, + basedirpath=tdir) + + alpha.keep.dumpRemoteRoleData({ + "acceptance": Acceptance.accepted.value, + "verhex": betaVerhex, + "pubhex": hexlify(betaPubkey) + }, betaName) + + beta.keep.dumpRemoteRoleData({ + "acceptance": Acceptance.accepted.value, + "verhex": alphaVerhex, + "pubhex": hexlify(alphaPubkey) + }, alphaName) + + try: + + betaRemote = RemoteEstate(stack=alpha, ha=beta.ha) + + alpha.addRemote(betaRemote) + + alpha.allow(uid=betaRemote.uid, cascade=True) + + handshake(alpha, beta) + + sendMsgs(alpha, beta, betaRemote) + finally: + cleanup(alpha, beta) + + +def testPromiscuousConnection(tdir, keysAndNames): + # Simulating node to client connection + alphaSighex, alphaPrikey, alphaVerhex, alphaPubkey, alphaName, betaSighex, \ + betaPrikey, betaVerhex, betaPubkey, betaName = keysAndNames + alpha = RoadStack(name=alphaName, + ha=genHa(), + sigkey=alphaSighex, + prikey=hexlify(alphaPrikey), + auto=AutoMode.always, + basedirpath=tdir) + + beta = RoadStack(name=betaName, + ha=genHa(), + main=True, + sigkey=betaSighex, + prikey=hexlify(betaPrikey), + auto=AutoMode.always, + basedirpath=tdir) + + try: + betaRemote = RemoteEstate(stack=alpha, ha=beta.ha) + alpha.addRemote(betaRemote) + + alpha.join(uid=betaRemote.uid, cascade=True) + + handshake(alpha, beta) + + sendMsgs(alpha, beta, betaRemote) + finally: + cleanup(alpha, beta) diff --git a/stp_raet/test/test_rstack.py b/stp_raet/test/test_rstack.py new file mode 100644 index 0000000000..60aa85d37e --- /dev/null +++ b/stp_raet/test/test_rstack.py @@ -0,0 +1,50 @@ +from stp_core.loop.eventually import eventually +from stp_core.network.auth_mode import AuthMode +from stp_core.network.port_dispenser import genHa +from stp_raet.rstack import SimpleRStack +from stp_core.test.helper import Printer, chkPrinted, prepStacks, checkStacksConnected + + +def test2RStackCommunication(tdir, looper): + names = ['Alpha', 'Beta'] + alphaP = Printer(names[0]) + betaP = Printer(names[1]) + + stackParamsAlpha = { + "name": names[0], + "ha": genHa(), + "auth_mode": AuthMode.ALLOW_ANY.value, + "main": True, + "mutable": "mutable", + "messageTimeout": 30, + "basedirpath" : tdir + } + stackParamsBeta = { + "name": names[1], + "ha": genHa(), + "main": True, + "auth_mode": AuthMode.ALLOW_ANY.value, + "mutable": "mutable", + "messageTimeout": 30, + "basedirpath" : tdir + } + + alpha = SimpleRStack(stackParamsAlpha, msgHandler=alphaP.print) + beta = SimpleRStack(stackParamsBeta, msgHandler=betaP.print) + + alpha.connect(ha=beta.ha) + beta.connect(ha=alpha.ha) + + prepStacks(looper, alpha, beta, connect=False, useKeys=False) + + looper.run(eventually(checkStacksConnected, [alpha, beta], retryWait=1, + timeout=10)) + + alpha.send({'greetings': 'hi Beta'}, beta.name) + beta.send({'greetings': 'hi Alpha'}, alpha.name) + + looper.run(eventually(chkPrinted, betaP, {'greetings': 'hi Beta'})) + looper.run(eventually(chkPrinted, alphaP, {'greetings': 'hi Alpha'})) + + + diff --git a/stp_raet/util.py b/stp_raet/util.py new file mode 100644 index 0000000000..09bf889652 --- /dev/null +++ b/stp_raet/util.py @@ -0,0 +1,78 @@ +import json +import os +from collections import OrderedDict + +from stp_core.crypto.nacl_wrappers import Signer, Privateer +from raet.road.keeping import RoadKeep + +from stp_core.crypto.util import ed25519SkToCurve25519, ed25519PkToCurve25519 + + +def getLocalKeep(name, baseDir=None): + keep = RoadKeep(stackname=name, baseroledirpath=baseDir) + localRoleData = keep.loadLocalRoleData() + return localRoleData + + +def getLocalRoleKeyByName(roleName, baseDir, keyName): + localRoleData = getLocalKeep(roleName, baseDir) + keyhex = localRoleData.get(keyName) + keyhex = str(keyhex) if keyhex is not None else None + if keyhex is None: + raise BaseException("Seems {} keypair is not created yet" + .format(roleName)) + return keyhex + + +def getLocalVerKey(roleName, baseDir=None): + sighex = getLocalRoleKeyByName(roleName, baseDir, 'sighex') + signer = Signer(sighex) + return signer.verhex.decode() + + +def getLocalPubKey(roleName, baseDir=None): + prihex = getLocalRoleKeyByName(roleName, baseDir, 'prihex') + privateer = Privateer(prihex) + return privateer.pubhex.decode() + + +def getLocalEstateData(name, baseDir): + estatePath = os.path.expanduser(os.path.join(baseDir, name, "local", + "estate.json")) + if os.path.isfile(estatePath): + return json.loads(open(estatePath).read()) + + +def getHaFromLocalEstate(name, basedirpath): + localEstate = getLocalEstateData(name, basedirpath) + if localEstate: + return localEstate.get("ha") + + +def isRaetKeepDir(directory): + if os.path.isdir(os.path.join(directory, 'local')) and \ + os.path.isdir(os.path.join(directory, 'remote')) and \ + os.path.isdir(os.path.join(directory, 'role')): + return True + return False + + +def isPortUsedByRaetRemote(keepDir, port): + """ + Checks if the any local remote present in `keepDir` is bound to the given + port + :param keepDir: + :param port: + :return: + """ + for item in os.listdir(keepDir): + itemDir = os.path.join(keepDir, item) + if os.path.isdir(itemDir) and isRaetKeepDir(itemDir): + try: + localRemoteData = json.load(open(os.path.join(itemDir, 'local', + 'estate.json'))) + if localRemoteData['ha'][1] == port: + return True + except: + continue + return False diff --git a/stp_zmq/__init__.py b/stp_zmq/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/stp_zmq/authenticator.py b/stp_zmq/authenticator.py new file mode 100644 index 0000000000..c9a4d89e3f --- /dev/null +++ b/stp_zmq/authenticator.py @@ -0,0 +1,102 @@ +import sys + +import asyncio +import zmq +import zmq.asyncio +from zmq.auth import Authenticator +from zmq.auth.thread import _inherit_docstrings, ThreadAuthenticator, \ + AuthenticationThread + + +# Copying code from zqm classes since no way to inject these dependencies + + +class MultiZapAuthenticator(Authenticator): + """ + `Authenticator` supports only one ZAP socket in a single process, this lets + you have multiple ZAP sockets + """ + count = 0 + + def __init__(self, context=None, encoding='utf-8', log=None): + MultiZapAuthenticator.count += 1 + super().__init__(context=context, encoding=encoding, log=log) + + def start(self): + """Create and bind the ZAP socket""" + self.zap_socket = self.context.socket(zmq.REP) + self.zap_socket.linger = 1 + zapLoc = 'inproc://zeromq.zap.{}'.format(MultiZapAuthenticator.count) + self.zap_socket.bind(zapLoc) + self.log.debug('Starting ZAP at {}'.format(zapLoc)) + + def stop(self): + """Close the ZAP socket""" + if self.zap_socket: + self.log.debug( + 'Stopping ZAP at {}'.format(self.zap_socket.LAST_ENDPOINT)) + super().stop() + + +@_inherit_docstrings +class ThreadMultiZapAuthenticator(ThreadAuthenticator): + def start(self): + """Start the authentication thread""" + # create a socket to communicate with auth thread. + self.pipe = self.context.socket(zmq.PAIR) + self.pipe.linger = 1 + self.pipe.bind(self.pipe_endpoint) + authenticator = MultiZapAuthenticator(self.context, encoding=self.encoding, + log=self.log) + self.thread = AuthenticationThread(self.context, self.pipe_endpoint, + encoding=self.encoding, log=self.log, + authenticator=authenticator) + self.thread.start() + # Event.wait:Changed in version 2.7: Previously, the method always returned None. + if sys.version_info < (2, 7): + self.thread.started.wait(timeout=10) + else: + if not self.thread.started.wait(timeout=10): + raise RuntimeError("Authenticator thread failed to start") + + +class AsyncioAuthenticator(MultiZapAuthenticator): + """ZAP authentication for use in the asyncio IO loop""" + + def __init__(self, context=None, loop=None): + super().__init__(context) + self.loop = loop or asyncio.get_event_loop() + self.__poller = None + self.__task = None + + # TODO: Remove this commented method later + # @asyncio.coroutine + # def __handle_zap(self): + # while True: + # events = yield from self.__poller.poll() + # if self.zap_socket in dict(events): + # msg = yield from self.zap_socket.recv_multipart() + # self.handle_zap_message(msg) + + async def __handle_zap(self): + while True: + events = await self.__poller.poll() + if self.zap_socket in dict(events): + msg = await self.zap_socket.recv_multipart() + self.handle_zap_message(msg) + + def start(self): + """Start ZAP authentication""" + super().start() + self.__poller = zmq.asyncio.Poller() + self.__poller.register(self.zap_socket, zmq.POLLIN) + self.__task = asyncio.ensure_future(self.__handle_zap()) + + def stop(self): + """Stop ZAP authentication""" + if self.__task: + self.__task.cancel() + if self.__poller: + self.__poller.unregister(self.zap_socket) + self.__poller = None + super().stop() diff --git a/stp_zmq/kit_zstack.py b/stp_zmq/kit_zstack.py new file mode 100644 index 0000000000..7f94f21cca --- /dev/null +++ b/stp_zmq/kit_zstack.py @@ -0,0 +1,108 @@ +from stp_core.network.keep_in_touch import KITNetworkInterface +from stp_zmq.simple_zstack import SimpleZStack +from typing import Dict, Mapping, Callable, Tuple, Any, Union +from stp_core.types import HA +import time +from stp_core.common.log import getlogger +logger = getlogger() + + +class KITZStack(SimpleZStack, KITNetworkInterface): + # ZStack which maintains connections mentioned in its registry + + def __init__(self, + stackParams: dict, + msgHandler: Callable, + registry: Dict[str, HA], + seed=None, + sighex: str = None, + config=None): + + SimpleZStack.__init__(self, + stackParams, + msgHandler, + seed=seed, + sighex=sighex, + config=config) + + KITNetworkInterface.__init__(self, + registry=registry) + + self._retry_connect = {} + + def maintainConnections(self, force=False): + """ + Ensure appropriate connections. + + """ + now = time.perf_counter() + if now < self.nextCheck and not force: + return False + self.nextCheck = now + (self.config.RETRY_TIMEOUT_NOT_RESTRICTED + if self.isKeySharing + else self.config.RETRY_TIMEOUT_RESTRICTED) + missing = self.connectToMissing() + self.retryDisconnected(exclude=missing) + logger.debug("{} next check for retries in {:.2f} seconds" + .format(self, self.nextCheck - now)) + return True + + def reconcileNodeReg(self) -> set: + """ + Check whether registry contains some addresses + that were never connected to + + :return: + """ + + matches = set() + for name, remote in self.remotes.items(): + if name not in self.registry: + continue + if self.sameAddr(remote.ha, self.registry[name]): + matches.add(name) + logger.debug("{} matched remote {} {}". + format(self, remote.uid, remote.ha)) + return self.registry.keys() - matches - {self.name} + + def retryDisconnected(self, exclude=None): + exclude = exclude or {} + for name, remote in self.remotes.items(): + if name in exclude or remote.isConnected: + continue + + if not name in self._retry_connect: + self._retry_connect[name] = 0 + + if not remote.socket or self._retry_connect[name] >= \ + self.config.MAX_RECONNECT_RETRY_ON_SAME_SOCKET: + self._retry_connect.pop(name, None) + self.reconnectRemote(remote) + else: + self._retry_connect[name] += 1 + self.sendPingPong(remote, is_ping=True) + + def connectToMissing(self) -> set: + """ + Try to connect to the missing nodes + """ + + missing = self.reconcileNodeReg() + if not missing: + return missing + + logger.debug("{} found the following " + "missing connections: {}" + .format(self, ", ".join(missing))) + + for name in missing: + try: + self.connect(name, ha=self.registry[name]) + except ValueError as ex: + logger.error('{} cannot connect to {} due to {}' + .format(self, name, ex)) + return missing + + async def service(self, limit=None): + c = await super().service(limit) + return c diff --git a/stp_zmq/remote.py b/stp_zmq/remote.py new file mode 100644 index 0000000000..437e784d74 --- /dev/null +++ b/stp_zmq/remote.py @@ -0,0 +1,157 @@ +from stp_core.common.config.util import getConfig +import time +import zmq +from stp_core.common.log import getlogger +import sys +from zmq.utils.monitor import recv_monitor_message +from zmq.sugar.socket import Socket + +logger = getlogger() + + +def set_keepalive(socket: Socket, config): + # This assumes the same TCP_KEEPALIVE configuration for all sockets which + # is not ideal but matches what we do in code + socket.setsockopt(zmq.TCP_KEEPALIVE, 1) + socket.setsockopt(zmq.TCP_KEEPALIVE_INTVL, config.KEEPALIVE_INTVL) + socket.setsockopt(zmq.TCP_KEEPALIVE_IDLE, config.KEEPALIVE_IDLE) + socket.setsockopt(zmq.TCP_KEEPALIVE_CNT, config.KEEPALIVE_CNT) + + +def set_zmq_internal_queue_length(socket: Socket, config): + # set both ZMQ_RCVHWM and ZMQ_SNDHWM + socket.set_hwm(config.ZMQ_INTERNAL_QUEUE_SIZE) + + +class Remote: + def __init__(self, name, ha, verKey, publicKey, config=None): + # TODO, remove *args, **kwargs after removing test + + # Every remote has a unique name per stack, the name can be the + # public key of the other end + self.name = name + self.ha = ha + # self.publicKey is the public key of the other end of the remote + self.publicKey = publicKey + # self.verKey is the verification key of the other end of the remote + self.verKey = verKey + self.socket = None + # TODO: A stack should have a monitor and it should identify remote + # by endpoint + + self._numOfReconnects = 0 + self._isConnected = False + self._lastConnectedAt = None + self.config = config or getConfig() + + # Currently keeping uid field to resemble RAET RemoteEstate + self.uid = name + + def __repr__(self): + return '{}:{}'.format(self.name, self.ha) + + @property + def isConnected(self): + if not self._isConnected: + return False + lost = self.hasLostConnection + if lost: + self._isConnected = False + return False + return True + + def setConnected(self): + self._numOfReconnects += 1 + self._isConnected = True + self._lastConnectedAt = time.perf_counter() + + def firstConnect(self): + return self._numOfReconnects == 0 + + def connect(self, context, localPubKey, localSecKey, typ=None): + typ = typ or zmq.DEALER + sock = context.socket(typ) + sock.curve_publickey = localPubKey + sock.curve_secretkey = localSecKey + sock.curve_serverkey = self.publicKey + sock.identity = localPubKey + set_keepalive(sock, self.config) + set_zmq_internal_queue_length(sock, self.config) + addr = 'tcp://{}:{}'.format(*self.ha) + sock.connect(addr) + self.socket = sock + logger.trace('connecting socket {} {} to remote {}'. + format(self.socket.FD, self.socket.underlying, self)) + + def disconnect(self): + logger.debug('disconnecting remote {}'.format(self)) + if self.socket: + logger.trace('disconnecting socket {} {}'. + format(self.socket.FD, self.socket.underlying)) + + if self.socket._monitor_socket: + logger.trace('{} closing monitor socket'.format(self)) + self.socket._monitor_socket.linger = 0 + self.socket.monitor(None, 0) + self.socket._monitor_socket = None + # self.socket.disable_monitor() + self.socket.close(linger=0) + self.socket = None + else: + logger.debug('{} close was called on a null socket, maybe close is ' + 'being called twice.'.format(self)) + + self._isConnected = False + + @property + def hasLostConnection(self): + + if self.socket is None: + logger.warning('Remote {} already disconnected'.format(self)) + return False + + events = self._lastSocketEvents() + + if events: + logger.trace('Remote {} has monitor events: {}'. + format(self, events)) + + # noinspection PyUnresolvedReferences + if zmq.EVENT_DISCONNECTED in events or zmq.EVENT_CLOSED in events: + logger.debug('{} found disconnected event on monitor'.format(self)) + + # Reverse events list since list has no builtin to get last index + events.reverse() + + def eventIndex(eventName): + try: + return events.index(eventName) + except ValueError: + return sys.maxsize + + connected = eventIndex(zmq.EVENT_CONNECTED) + delayed = eventIndex(zmq.EVENT_CONNECT_DELAYED) + disconnected = min(eventIndex(zmq.EVENT_DISCONNECTED), + eventIndex(zmq.EVENT_CLOSED)) + if disconnected < connected and disconnected < delayed: + return True + + return False + + def _lastSocketEvents(self, nonBlock=True): + return self._get_monitor_events(self.socket, nonBlock) + + @staticmethod + def _get_monitor_events(socket, non_block=True): + monitor = socket.get_monitor_socket() + events = [] + # noinspection PyUnresolvedReferences + flags = zmq.NOBLOCK if non_block else 0 + while True: + try: + # noinspection PyUnresolvedReferences + message = recv_monitor_message(monitor, flags) + events.append(message['event']) + except zmq.Again: + break + return events \ No newline at end of file diff --git a/stp_zmq/simple_zstack.py b/stp_zmq/simple_zstack.py new file mode 100644 index 0000000000..6d3a996160 --- /dev/null +++ b/stp_zmq/simple_zstack.py @@ -0,0 +1,37 @@ +from stp_zmq.zstack import ZStack +from typing import Dict, Mapping, Callable, Tuple, Any, Union +from stp_core.network.auth_mode import AuthMode + + +class SimpleZStack(ZStack): + + def __init__(self, + stackParams: Dict, + msgHandler: Callable, + seed=None, + onlyListener=False, + sighex: str=None, + config=None): + + # TODO: sighex is unused as of now, remove once test is removed or + # maybe use sighex to generate all keys, DECISION DEFERRED + + self.stackParams = stackParams + self.msgHandler = msgHandler + + # TODO: Ignoring `main` param as of now which determines + # if the stack will have a listener socket or not. + name = stackParams['name'] + ha = stackParams['ha'] + basedirpath = stackParams['basedirpath'] + + auto = stackParams.pop('auth_mode', None) + restricted = auto != AuthMode.ALLOW_ANY.value + super().__init__(name, + ha, + basedirpath, + msgHandler=self.msgHandler, + restricted=restricted, + seed=seed, + onlyListener=onlyListener, + config=config) diff --git a/stp_zmq/test/__init__.py b/stp_zmq/test/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/stp_zmq/test/conftest.py b/stp_zmq/test/conftest.py new file mode 100644 index 0000000000..f6dc2e846b --- /dev/null +++ b/stp_zmq/test/conftest.py @@ -0,0 +1,51 @@ +import asyncio + +import pytest +import zmq.asyncio + +from stp_core.common.config.util import getConfig +from stp_core.common.temp_file_util import SafeTemporaryDirectory +from stp_core.loop.looper import Looper + +from stp_core.network.port_dispenser import genHa + + +@pytest.fixture() +def registry(): + return { + 'Alpha': genHa(), + 'Beta': genHa(), + 'Gamma': genHa(), + 'Delta': genHa() + } + + +@pytest.fixture() +def loop(): + loop = zmq.asyncio.ZMQEventLoop() + loop.set_debug(True) + return loop + + +@pytest.yield_fixture() +def tdirAndLooper(loop): + asyncio.set_event_loop(loop) + + with SafeTemporaryDirectory() as td: + with Looper(loop=loop, debug=True) as looper: + yield td, looper + + +@pytest.fixture() +def tdir(tdirAndLooper): + return tdirAndLooper[0] + + +@pytest.fixture() +def looper(tdirAndLooper): + return tdirAndLooper[1] + + +@pytest.fixture() +def tconf(): + return getConfig() diff --git a/stp_zmq/test/helper.py b/stp_zmq/test/helper.py new file mode 100644 index 0000000000..c5a0bd2713 --- /dev/null +++ b/stp_zmq/test/helper.py @@ -0,0 +1,116 @@ +import os +import types +from distutils.dir_util import copy_tree +from stat import ST_MODE + +from copy import deepcopy + +from stp_core.common.util import adict +from stp_core.loop.eventually import eventually +from stp_core.network.port_dispenser import genHa +from stp_core.test.helper import Printer, prepStacks, chkPrinted + +from stp_zmq.util import generate_certificates +from stp_zmq.zstack import ZStack + + +def genKeys(baseDir, names): + generate_certificates(baseDir, *names, clean=True) + for n in names: + d = os.path.join(baseDir, n) + os.makedirs(d, exist_ok=True) + for kd in ZStack.keyDirNames(): + copy_tree(os.path.join(baseDir, kd), os.path.join(d, kd)) + + +def patch_send_ping_counter(stack): + stack.ping_count = 0 + origMethod = stack.sendPingPong + + +def add_counters_to_ping_pong(stack): + stack.sent_ping_count = 0 + stack.sent_pong_count = 0 + stack.recv_ping_count = 0 + stack.recv_pong_count = 0 + orig_send_method = stack.sendPingPong + orig_recv_method = stack.handlePingPong + + def send_ping_pong_counter(self, remote, is_ping=True): + if is_ping: + self.sent_ping_count += 1 + else: + self.sent_pong_count += 1 + + return orig_send_method(remote, is_ping) + + def recv_ping_pong_counter(self, msg, frm, ident): + if msg in (self.pingMessage, self.pongMessage): + if msg == self.pingMessage: + self.recv_ping_count += 1 + if msg == self.pongMessage: + self.recv_pong_count += 1 + + return orig_recv_method(msg, frm, ident) + + stack.sendPingPong = types.MethodType(send_ping_pong_counter, stack) + stack.handlePingPong = types.MethodType(recv_ping_pong_counter, stack) + + +def create_and_prep_stacks(names, basedir, looper, conf): + genKeys(basedir, names) + printers = [Printer(n) for n in names] + # adict is used below to copy the config module since one stack might + # have different config from others + stacks = [ZStack(n, ha=genHa(), basedirpath=basedir, + msgHandler=printers[i].print, + restricted=True, config=adict(**conf.__dict__)) + for i, n in enumerate(names)] + prepStacks(looper, *stacks, connect=True, useKeys=True) + return stacks, printers + + +def check_stacks_communicating(looper, stacks, printers): + """ + Check that `stacks` are able to send and receive messages to each other + Assumes for each stack in `stacks`, there is a printer in `printers`, + at the same index + """ + + # Each sends the same message to all other stacks + for idx, stack in enumerate(stacks): + for other_stack in stacks: + if stack != other_stack: + stack.send({'greetings': '{} here'.format(stack.name)}, + other_stack.name) + + # Each stack receives message from others + for idx, printer in enumerate(printers): + for j, stack in enumerate(stacks): + if idx != j: + looper.run(eventually(chkPrinted, printer, + {'greetings': '{} here'.format(stack.name)})) + + +def get_file_permission_mask(file_path): + return oct(os.stat(file_path)[ST_MODE] & 0o777)[-3:] + + +def get_zstack_key_paths(stack_name, common_path): + home_dir = ZStack.homeDirPath(common_path, stack_name) + # secrets + sigDirPath = ZStack.sigDirPath(home_dir) + secretDirPath = ZStack.secretDirPath(home_dir) + # public + verifDirPath = ZStack.verifDirPath(home_dir) + pubDirPath = ZStack.publicDirPath(home_dir) + return dict( + secret=( + os.path.join(sigDirPath, stack_name) + '.key_secret', + os.path.join(secretDirPath, stack_name) + '.key_secret' + ), + public=( + os.path.join(verifDirPath, stack_name) + '.key', + os.path.join(pubDirPath, stack_name) + '.key' + ), + ) diff --git a/stp_zmq/test/test_heartbeats.py b/stp_zmq/test/test_heartbeats.py new file mode 100644 index 0000000000..b87ec50a93 --- /dev/null +++ b/stp_zmq/test/test_heartbeats.py @@ -0,0 +1,111 @@ +import pytest + +from stp_zmq.test.helper import create_and_prep_stacks, \ + check_stacks_communicating, add_counters_to_ping_pong + + +def sent_ping_counts(*stacks): + return {s.name: s.sent_ping_count for s in stacks} + + +def sent_pong_counts(*stacks): + return {s.name: s.sent_pong_count for s in stacks} + + +def recv_ping_counts(*stacks): + return {s.name: s.recv_ping_count for s in stacks} + + +def recv_pong_counts(*stacks): + return {s.name: s.recv_pong_count for s in stacks} + + +heartbeat_freq = 2 + + +@pytest.fixture() +def setup(tdir, looper, tconf): + names = ['Alpha', 'Beta', 'Gamma'] + (alpha, beta, gamma), (alphaP, betaP, gammaP) = \ + create_and_prep_stacks(names, tdir, looper, tconf) + check_stacks_communicating(looper, (alpha, beta, gamma), + (alphaP, betaP, gammaP)) + return (alpha, beta, gamma) + + +def test_heartbeats_only_one_stack(tdir, looper, tconf, setup): + """ + Only one of several stacks sends periodic heartbeat messages, other stacks + acknowledge it but do not send heartbeats + """ + (alpha, beta, gamma) = setup + + # Only alpha should send heartbeats + alpha.config.ENABLE_HEARTBEATS = True + alpha.config.HEARTBEAT_FREQ = heartbeat_freq + + for s in (alpha, beta, gamma): + add_counters_to_ping_pong(s) + + sent_pings_before = sent_ping_counts(alpha, beta, gamma) + sent_pongs_before = sent_pong_counts(alpha, beta, gamma) + recv_pings_before = recv_ping_counts(alpha, beta, gamma) + recv_pongs_before = recv_pong_counts(alpha, beta, gamma) + + looper.runFor(6*heartbeat_freq) + + # Only alpha should send pings + assert sent_ping_counts(alpha)[alpha.name] - sent_pings_before[alpha.name] >= 5 + assert sent_ping_counts(beta)[beta.name] == sent_pings_before[beta.name] + assert sent_ping_counts(gamma)[gamma.name] == sent_pings_before[gamma.name] + + # All except alpha should receive pings + assert recv_ping_counts(alpha)[alpha.name] == recv_pings_before[alpha.name] + assert recv_ping_counts(beta)[beta.name] - recv_pings_before[beta.name] >= 5 + assert recv_ping_counts(gamma)[gamma.name] - recv_pings_before[gamma.name] >= 5 + + # All except alpha should send pongs + assert sent_pong_counts(alpha)[alpha.name] == sent_pongs_before[alpha.name] + assert sent_pong_counts(beta)[beta.name] - sent_pongs_before[beta.name] >= 5 + assert sent_pong_counts(gamma)[gamma.name] - sent_pongs_before[ + gamma.name] >= 5 + + # Only alpha should receive pongs + assert recv_pong_counts(alpha)[alpha.name] - recv_pongs_before[ + alpha.name] >= 5 + assert recv_pong_counts(beta)[beta.name] == recv_pongs_before[beta.name] + assert recv_pong_counts(gamma)[gamma.name] == recv_pongs_before[gamma.name] + + +def test_heartbeats_all_stacks(tdir, looper, tconf, setup): + """ + All stacks send periodic heartbeat messages and other stacks + acknowledge it + """ + + (alpha, beta, gamma) = setup + + # All stacks should send heartbeats + for stack in (alpha, beta, gamma): + stack.config.ENABLE_HEARTBEATS = True + stack.config.HEARTBEAT_FREQ = heartbeat_freq + + for s in (alpha, beta, gamma): + add_counters_to_ping_pong(s) + + sent_pings_before = sent_ping_counts(alpha, beta, gamma) + sent_pongs_before = sent_pong_counts(alpha, beta, gamma) + recv_pings_before = recv_ping_counts(alpha, beta, gamma) + recv_pongs_before = recv_pong_counts(alpha, beta, gamma) + + looper.runFor(6 * heartbeat_freq) + + for stack in (alpha, beta, gamma): + # All should send pings + assert sent_ping_counts(stack)[stack.name] - sent_pings_before[stack.name] >= 5 + # All should receive pings + assert recv_ping_counts(stack)[stack.name] - recv_pings_before[stack.name] >= 5 + # All except alpha should send pongs + assert sent_pong_counts(stack)[stack.name] - sent_pongs_before[stack.name] >= 5 + # All should receive pongs + assert recv_pong_counts(stack)[stack.name] - recv_pongs_before[stack.name] >= 5 diff --git a/stp_zmq/test/test_kitzstack.py b/stp_zmq/test/test_kitzstack.py new file mode 100644 index 0000000000..5f698f7564 --- /dev/null +++ b/stp_zmq/test/test_kitzstack.py @@ -0,0 +1,27 @@ +from copy import copy + +from stp_core.loop.eventually import eventually +from stp_core.network.auth_mode import AuthMode +from stp_core.test.helper import Printer, prepStacks, \ + checkStacksConnected +from stp_zmq.test.helper import genKeys +from stp_zmq.kit_zstack import KITZStack + + +def testKitZStacksConnected(registry, tdir, looper, tconf): + genKeys(tdir, registry.keys()) + stacks = [] + for name, ha in registry.items(): + printer = Printer(name) + stackParams = dict(name=name, ha=ha, basedirpath=tdir, auth_mode=AuthMode.RESTRICTED.value) + reg = copy(registry) + reg.pop(name) + stack = KITZStack(stackParams, printer.print, reg) + stacks.append(stack) + + prepStacks(looper, *stacks, connect=False, useKeys=True) + # TODO: the connection may not be established for the first try because + # some of the stacks may not have had a remote yet (that is they haven't had yet called connect) + timeout = 2*tconf.RETRY_TIMEOUT_RESTRICTED+1 + looper.run(eventually( + checkStacksConnected, stacks, retryWait=1, timeout=timeout)) diff --git a/stp_zmq/test/test_large_messages.py b/stp_zmq/test/test_large_messages.py new file mode 100644 index 0000000000..bb27ffec94 --- /dev/null +++ b/stp_zmq/test/test_large_messages.py @@ -0,0 +1,65 @@ +import json + +import zmq +from stp_core.crypto.util import randomSeed +from stp_core.network.port_dispenser import genHa +from stp_core.test.helper import SMotor +from stp_zmq.test.helper import genKeys +from stp_zmq.simple_zstack import SimpleZStack + + +def testSimpleZStacksMsgs(tdir, looper): + names = ['Alpha', 'Beta'] + genKeys(tdir, names) + names = ['Alpha', 'Beta'] + aseed = randomSeed() + bseed = randomSeed() + + size = 100000 + msg = json.dumps({'random': randomSeed(size).decode()}).encode() + + def aHandler(m): + print('{} printing... {}'.format(names[0], m)) + d, _ = m + print('Message size is {}'.format(len(d['random']))) + assert len(d['random']) == size + + def bHandler(m): + print(beta.msgHandler) + a = list(beta.peersWithoutRemotes)[0] + try: + beta.listener.send_multipart([a, msg], + flags=zmq.NOBLOCK) + except zmq.Again: + return False + print('{} printing... {}'.format(names[1], m)) + + stackParams = { + "name": names[0], + "ha": genHa(), + "auto": 2, + "basedirpath": tdir + } + alpha = SimpleZStack(stackParams, aHandler, aseed, False) + + stackParams = { + "name": names[1], + "ha": genHa(), + "auto": 2, + "basedirpath": tdir + } + beta = SimpleZStack(stackParams, bHandler, bseed, True) + + amotor = SMotor(alpha) + looper.add(amotor) + + bmotor = SMotor(beta) + looper.add(bmotor) + + alpha.connect(name=beta.name, ha=beta.ha, + verKeyRaw=beta.verKeyRaw, publicKeyRaw=beta.publicKeyRaw) + + looper.runFor(0.25) + alpha.send({'greetings': 'hi'}, beta.name) + looper.runFor(1) + diff --git a/stp_zmq/test/test_quotas.py b/stp_zmq/test/test_quotas.py new file mode 100644 index 0000000000..eff7b4acb8 --- /dev/null +++ b/stp_zmq/test/test_quotas.py @@ -0,0 +1,70 @@ +import json + +from stp_core.loop.eventually import eventually + +from stp_core.crypto.util import randomSeed +from stp_core.network.port_dispenser import genHa +from stp_core.test.helper import Printer, prepStacks, CollectingMsgsHandler, CounterMsgsHandler, MessageSender +from stp_zmq.test.helper import genKeys +from stp_zmq.zstack import ZStack + + +def testMessageQuota(tdir, looper): + names = ['Alpha', 'Beta'] + genKeys(tdir, names) + alphaP = Printer(names[0]) + betaMsgHandler = CollectingMsgsHandler() + + alpha = ZStack(names[0], ha=genHa(), basedirpath=tdir, msgHandler=alphaP.print, + restricted=True) + beta = ZStack(names[1], ha=genHa(), basedirpath=tdir, msgHandler=betaMsgHandler.handler, + restricted=True, onlyListener=True) + + prepStacks(looper, alpha, beta, connect=True, useKeys=True) + + messages = [] + numMessages = 150 * beta.listenerQuota + for i in range(numMessages): + msg = json.dumps({'random': randomSeed().decode()}).encode() + if alpha.send(msg, beta.name): + messages.append(json.loads(msg.decode())) + + def checkAllReceived(): + assert len(messages) == len(betaMsgHandler.receivedMessages) + assert messages == betaMsgHandler.receivedMessages + + looper.run(eventually(checkAllReceived, retryWait=0.5, + timeout=5)) + + +def testManyMessages(tdir, looper): + names = ['Alpha', 'Beta'] + genKeys(tdir, names) + alphaP = Printer(names[0]) + betaMsgHandler = CounterMsgsHandler() + + alpha = ZStack(names[0], + ha=genHa(), + basedirpath=tdir, + msgHandler=alphaP.print, + restricted=True) + beta = ZStack(names[1], + ha=genHa(), + basedirpath=tdir, + msgHandler=betaMsgHandler.handler, + restricted=True) + prepStacks(looper, alpha, beta, connect=True, useKeys=True) + + looper.runFor(1) + + msgNum = 100000 + msgSender = MessageSender(msgNum, alpha, beta.name) + looper.add(msgSender) + + def checkAllReceived(): + assert msgSender.sentMsgCount == msgNum + assert betaMsgHandler.receivedMsgCount == msgNum + + looper.run(eventually(checkAllReceived, + retryWait=1, + timeout=60)) diff --git a/stp_zmq/test/test_reconnect.py b/stp_zmq/test/test_reconnect.py new file mode 100644 index 0000000000..9ec4af0164 --- /dev/null +++ b/stp_zmq/test/test_reconnect.py @@ -0,0 +1,138 @@ +import time +from copy import copy + +import pytest + +from stp_core.loop.eventually import eventually +from stp_core.network.auth_mode import AuthMode +from stp_core.test.helper import Printer, prepStacks, \ + checkStacksConnected, checkStackDisonnected +from stp_zmq.test.helper import genKeys, add_counters_to_ping_pong +from stp_zmq.kit_zstack import KITZStack + + +@pytest.fixture() +def connection_timeout(tconf): + # TODO: the connection may not be established for the first try because + # some of the stacks may not have had a remote yet (that is they haven't had yet called connect) + return 2 * tconf.RETRY_TIMEOUT_RESTRICTED + 1 + + +@pytest.fixture() +def connected_stacks(registry, tdir, looper, connection_timeout): + genKeys(tdir, registry.keys()) + stacks = [] + for name, ha in registry.items(): + printer = Printer(name) + stackParams = dict(name=name, ha=ha, basedirpath=tdir, auth_mode=AuthMode.RESTRICTED.value) + reg = copy(registry) + reg.pop(name) + stack = KITZStack(stackParams, printer.print, reg) + stacks.append(stack) + + motors = prepStacks(looper, *stacks, connect=False, useKeys=True) + + looper.run(eventually( + checkStacksConnected, stacks, retryWait=1, timeout=connection_timeout)) + + return stacks, motors + + +@pytest.fixture() +def disconnect_first_stack(looper, connected_stacks, connection_timeout): + stacks, motors = connected_stacks + + disconnected_motor = motors[0] + other_stacks = stacks[1:] + + looper.removeProdable(disconnected_motor) + disconnected_motor.stop() + + return disconnected_motor, other_stacks + + +def disconnect(looper, disconnected_stack, connection_timeout): + disconnected_motor, other_stacks = disconnected_stack + looper.run(eventually( + checkStackDisonnected, disconnected_motor.stack, other_stacks, + retryWait=1, timeout=connection_timeout)) + looper.run(eventually( + checkStacksConnected, other_stacks, retryWait=1, timeout=connection_timeout)) + + +def connect(looper, disconnected_stack): + disconnected_motor, _ = disconnected_stack + looper.add(disconnected_motor) + + +def check_disconnected_for(disconnect_time, looper, connected_stacks, + connection_timeout, disconnect_first_stack): + stacks, motors = connected_stacks + + # DISCONNECT + disconnect(looper, disconnect_first_stack, connection_timeout) + + looper.runFor(disconnect_time) + + # CONNECT + connect(looper, disconnect_first_stack) + looper.run(eventually( + checkStacksConnected, stacks, retryWait=1, timeout=2*connection_timeout)) + + +def test_reconnect_short(looper, connected_stacks, connection_timeout, + disconnect_first_stack): + """ + Check that if a stack is kept disconnected for a short time, it is able to reconnect + """ + check_disconnected_for(1, + looper, connected_stacks, connection_timeout, + disconnect_first_stack) + + +def test_reconnect_long(looper, connected_stacks, connection_timeout, + disconnect_first_stack): + """ + Check that if a stack is kept disconnected for a long time, it is able to reconnect + """ + check_disconnected_for(5 * 60, + looper, connected_stacks, connection_timeout, + disconnect_first_stack) + + +def test_recreate_sockets_after_ping_retry(looper, tconf, connected_stacks, + connection_timeout, + disconnect_first_stack): + """ + Check that if a stack tries to send PING on re-connect, but not more than MAX_RECONNECT_RETRY_ON_SAME_SOCKET time. + After this sockets must be re-created. + """ + _, other_stacks = disconnect_first_stack + stack = other_stacks[0] + + disconnect(looper, disconnect_first_stack, connection_timeout) + + # do not do automatic re-connect + for stack in other_stacks: + stack.nextCheck = time.perf_counter() + 10000000 + + add_counters_to_ping_pong(stack) + + # check that sockets are not re-created MAX_RECONNECT_RETRY_ON_SAME_SOCKET times + # and PING is called + for i in range(tconf.MAX_RECONNECT_RETRY_ON_SAME_SOCKET): + sockets_before = [remote.socket for name, remote in stack.remotes.items()] + ping_before = stack.sent_ping_count + + stack.retryDisconnected() + + sockets_after = [remote.socket for name, remote in stack.remotes.items()] + ping_after = stack.sent_ping_count + assert sockets_before == sockets_after + assert ping_before == ping_after - 1 + + # check that sockets are re-created on next re-connect + sockets_before = [remote.socket for name, remote in stack.remotes.items()] + stack.retryDisconnected() + sockets_after = [remote.socket for name, remote in stack.remotes.items()] + assert sockets_before != sockets_after diff --git a/stp_zmq/test/test_utils.py b/stp_zmq/test/test_utils.py new file mode 100644 index 0000000000..a37ea75339 --- /dev/null +++ b/stp_zmq/test/test_utils.py @@ -0,0 +1,8 @@ +from stp_zmq.test.helper import get_file_permission_mask +from stp_zmq.util import createCertsFromKeys + + +def test_create_certs_from_fromkeys_sets_600_for_secret_644_for_pub_keys(tdir): + public_key_file, secret_key_file = createCertsFromKeys(tdir, 'akey', b'0123456789') + assert get_file_permission_mask(secret_key_file) == '600' + assert get_file_permission_mask(public_key_file) == '644' diff --git a/stp_zmq/test/test_zstack.py b/stp_zmq/test/test_zstack.py new file mode 100644 index 0000000000..428fa656b6 --- /dev/null +++ b/stp_zmq/test/test_zstack.py @@ -0,0 +1,167 @@ +import os + +import pytest + +from stp_core.crypto.util import randomSeed +from stp_core.loop.eventually import eventually +from stp_core.network.port_dispenser import genHa +from stp_core.test.helper import Printer, prepStacks, chkPrinted +from stp_zmq.test.helper import genKeys, create_and_prep_stacks, \ + check_stacks_communicating, get_file_permission_mask, get_zstack_key_paths +from stp_zmq.zstack import ZStack +import time + + +def testRestricted2ZStackCommunication(tdir, looper, tconf): + """ + Create 2 ZStack and make them send and receive messages. + Both stacks allow communication only when keys are shared + :return: + """ + names = ['Alpha', 'Beta'] + (alpha, beta), (alphaP, betaP) = create_and_prep_stacks(names, tdir, + looper, tconf) + check_stacks_communicating(looper, (alpha, beta), (alphaP, betaP)) + + +def testUnrestricted2ZStackCommunication(tdir, looper, tconf): + """ + Create 2 ZStack and make them send and receive messages. + Both stacks allow communication even when keys are not shared + :return: + """ + names = ['Alpha', 'Beta'] + alphaP = Printer(names[0]) + betaP = Printer(names[1]) + alpha = ZStack(names[0], ha=genHa(), basedirpath=tdir, msgHandler=alphaP.print, + restricted=False, seed=randomSeed(), config=tconf) + beta = ZStack(names[1], ha=genHa(), basedirpath=tdir, msgHandler=betaP.print, + restricted=False, seed=randomSeed(), config=tconf) + + prepStacks(looper, alpha, beta, connect=True, useKeys=True) + alpha.send({'greetings': 'hi'}, beta.name) + beta.send({'greetings': 'hello'}, alpha.name) + + looper.run(eventually(chkPrinted, alphaP, {'greetings': 'hello'})) + looper.run(eventually(chkPrinted, betaP, {'greetings': 'hi'})) + + +def testZStackSendMethodReturnsFalseIfDestinationIsUnknown(tdir, looper, tconf): + """ + Checks: https://evernym.atlassian.net/browse/SOV-971 + 1. Connect two stacks + 2. Disconnect a remote from one side + 3. Send a message from disconnected remote + Expected result: the stack's method 'send' should not + fail just return False + """ + names = ['Alpha', 'Beta'] + (alpha, beta), _ = create_and_prep_stacks(names, tdir, looper, tconf) + # disconnect remote + alpha.getRemote(beta.name).disconnect() + # check send message returns False + assert alpha.send({'greetings': 'hello'}, beta.name) is False + + +def test_zstack_non_utf8(tdir, looper, tconf): + """ + ZStack gets a non utf-8 message and does not hand it over to the + processing method + :return: + """ + names = ['Alpha', 'Beta'] + genKeys(tdir, names) + (alpha, beta), (alphaP, betaP) = create_and_prep_stacks(names, tdir, + looper, tconf) + + # Send a utf-8 message and see its received + for uid in alpha.remotes: + alpha.transmit(b'{"k1": "v1"}', uid, serialized=True) + looper.run(eventually(chkPrinted, betaP, {"k1": "v1"})) + + # Send a non utf-8 message and see its not received (by the receiver method) + for uid in alpha.remotes: + alpha.transmit(b'{"k2": "v2\x9c"}', uid, serialized=True) + with pytest.raises(AssertionError): + looper.run(eventually(chkPrinted, betaP, {"k2": "v2\x9c"})) + # TODO: A better test where the output of the parsing method is checked + # requires spyable methods + + # Again send a utf-8 message and see its received (checks if stack is + # functional after receiving a bad message) + for uid in alpha.remotes: + alpha.transmit(b'{"k3": "v3"}', uid, serialized=True) + looper.run(eventually(chkPrinted, betaP, {"k3": "v3"})) + + +def test_zstack_creates_keys_with_secure_permissions(tdir): + any_seed = b'0'*32 + stack_name = 'aStack' + key_paths = get_zstack_key_paths(stack_name, tdir) + + ZStack.initLocalKeys(stack_name, tdir, any_seed) + + for file_path in key_paths['secret']: + assert get_file_permission_mask(file_path) == '600' + + for file_path in key_paths['public']: + assert get_file_permission_mask(file_path) == '644' + + +""" +TODO: +* Create ZKitStack, which should maintain a registry and method to check for any +disconnections and do reconnections if found. +* Need a way to run current tests against both stack types, or at least a way to +set a fixture parameter to do so. +* ZNodeStack +* ZClientStack +* test_node_connection needs to work with ZMQ +* test/pool_transactions package + +""" + + +def test_high_load(tdir, looper, tconf): + """ + Checks whether ZStack can cope with high message rate + """ + + letters = ['A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', + 'O', 'P', 'Q', 'R', 'S', 'T', 'U', + 'V', 'W', 'X', 'Y', 'Z'] + + num_of_senders = 3 + num_of_requests_per_sender = 100000 + + expected_messages = [] + received_messages = [] + + def handler(wrapped_message): + msg, sender = wrapped_message + received_messages.append(msg) + + def create_stack(name, handler=None): + return ZStack(name, ha=genHa(), basedirpath=tdir, + msgHandler=handler, restricted=False, + seed=randomSeed(), config=tconf) + + senders = [create_stack(letter) for letter in letters[:num_of_senders]] + gamma = create_stack("Gamma", handler) + prepStacks(looper, *senders, gamma, connect=True, useKeys=True) + + for i in range(num_of_requests_per_sender): + for sender in senders: + msg = {sender.name: i} + expected_messages.append(msg) + sender.send(msg, gamma.name) + + looper.runFor(5) + + assert len(received_messages) != 0 + assert len(expected_messages) == len(received_messages), \ + "{} != {}, LAST IS {}"\ + .format(len(expected_messages), + len(received_messages), + received_messages[-1]) diff --git a/stp_zmq/test/test_zstack_communication.py b/stp_zmq/test/test_zstack_communication.py new file mode 100644 index 0000000000..14a5d97d78 --- /dev/null +++ b/stp_zmq/test/test_zstack_communication.py @@ -0,0 +1,63 @@ +from copy import copy +from itertools import combinations + +import pytest + +from stp_core.loop.eventually import eventually +from stp_core.network.auth_mode import AuthMode +from stp_core.test.helper import Printer, prepStacks, \ + checkStacksConnected, chkPrinted +from stp_zmq.test.helper import genKeys +from stp_zmq.kit_zstack import KITZStack + + +@pytest.mark.skip('Use a packet capture tool') +def testKitZStacksCommunication(registry, tdir, looper): + # TODO: Use a packet capture tool + genKeys(tdir, registry.keys()) + stacks = [] + names = [] + for name, ha in registry.items(): + printer = Printer(name) + stackParams = dict(name=name, ha=ha, basedirpath=tdir, + auth_mode=AuthMode.RESTRICTED.value) + reg = copy(registry) + reg.pop(name) + stack = KITZStack(stackParams, printer.print, reg) + stacks.append(stack) + names.append((name, printer)) + + prepStacks(looper, *stacks, connect=False, useKeys=True) + # TODO: the connection may not be established for the first try because + # some of the stacks may not have had a remote yet (that is they haven't had yet called connect) + timeout = 4*KITZStack.RETRY_TIMEOUT_RESTRICTED+1 + looper.run(eventually( + checkStacksConnected, stacks, retryWait=1, timeout=timeout)) + + def send_recv(): + for i, j in combinations(range(len(stacks)), 2): + alpha = stacks[i] + beta = stacks[j] + + alpha.send({'greetings': 'hi'}, beta.name) + beta.send({'greetings': 'hello'}, alpha.name) + + looper.run( + eventually(chkPrinted, names[i][1], {'greetings': 'hello'}, timeout=15)) + looper.run(eventually(chkPrinted, names[j][1], {'greetings': 'hi'}, timeout=15)) + + names[i][1].reset() + names[j][1].reset() + + def pr(): + for stack in stacks: + for r in stack.remotes.values(): + print(r._lastSocketEvents()) + + for _ in range(100): + # send_recv() + looper.run(eventually( + checkStacksConnected, stacks, retryWait=1, timeout=timeout)) + # pr() + looper.runFor(30) + # pr() diff --git a/stp_zmq/util.py b/stp_zmq/util.py new file mode 100644 index 0000000000..ecd97bf92e --- /dev/null +++ b/stp_zmq/util.py @@ -0,0 +1,149 @@ +# TODO: Contains duplicated code, need to be refactored + + +import datetime +import os +import shutil +from binascii import hexlify, unhexlify + +from libnacl import crypto_sign_seed_keypair +from zmq.auth.certs import _write_key_file, _cert_public_banner, \ + _cert_secret_banner +from zmq.utils import z85 + +from stp_core.crypto.util import ed25519PkToCurve25519 as ep2c, \ + ed25519SkToCurve25519 as es2c, isHex, randomSeed + + +def createCertsFromKeys(key_dir, name, public_key, secret_key=None, + metadata=None, pSuffix='key', sSuffix='key_secret'): + public_key_file, secret_key_file = _get_key_files_paths(key_dir, name, + pSuffix, sSuffix) + + _write_secret_public_keys(public_key_file, secret_key_file, + public_key, secret_key, + metadata + ) + return public_key_file, secret_key_file + + +def _get_key_files_paths(key_dir, name, pSuffix, sSuffix): + base_filename = os.path.join(key_dir, name) + secret_key_file = "{}.{}".format(base_filename, sSuffix) + public_key_file = "{}.{}".format(base_filename, pSuffix) + return public_key_file, secret_key_file + + +def _write_secret_public_keys(public_key_file_path, secret_key_file_path, public_key, + secret_key, metadata): + current_time = datetime.datetime.now() + _write_public_key_file(public_key_file_path, current_time, public_key) + _write_secret_key_file(secret_key_file_path, current_time, public_key, + secret_key, metadata) + + +def _write_public_key_file(key_filename, current_time, public_key): + banner = _cert_public_banner.format(current_time) + _create_file_with_mode(key_filename, 0o644) + _write_key_file(key_filename, + banner, + public_key, + secret_key=None, + metadata=None, + encoding='utf-8') + + +def _write_secret_key_file(key_filename, current_time, + public_key, secret_key, metadata): + banner = _cert_secret_banner.format(current_time) + _create_file_with_mode(key_filename, 0o600) + _write_key_file(key_filename, + banner, + public_key, + secret_key=secret_key, + metadata=metadata, + encoding='utf-8') + + +def _create_file_with_mode(path, mode): + open(path, 'a').close() + os.chmod(path, mode) + + +def createEncAndSigKeys(enc_key_dir, sig_key_dir, name, seed=None): + seed = seed or randomSeed() + if isinstance(seed, str): + seed = seed.encode() + # ATTENTION: Passing `seed` encoded to bytes or not in + # `crypto_sign_seed_keypair` will generate different keypairs + verif_key, sig_key = crypto_sign_seed_keypair(seed) + createCertsFromKeys(sig_key_dir, name, z85.encode(verif_key), + z85.encode(sig_key[:32])) + public_key, secret_key = ep2c(verif_key), es2c(sig_key) + createCertsFromKeys(enc_key_dir, name, z85.encode(public_key), + z85.encode(secret_key)) + return (public_key, secret_key), (verif_key, sig_key) + + +def moveKeyFilesToCorrectLocations(keys_dir, pkdir, skdir): + for key_file in os.listdir(keys_dir): + if key_file.endswith(".key"): + try: + shutil.move(os.path.join(keys_dir, key_file), + os.path.join(pkdir, key_file)) + except shutil.Error as ex: + # print(ex) + pass + if key_file.endswith(".key_secret"): + try: + shutil.move(os.path.join(keys_dir, key_file), + os.path.join(skdir, key_file)) + except shutil.Error as ex: + # print(ex) + pass + + +def generate_certificates(base_dir, *peer_names, pubKeyDir=None, + secKeyDir=None, sigKeyDir=None, + verkeyDir=None, clean=True): + ''' Generate client and server CURVE certificate files''' + pubKeyDir = pubKeyDir or 'public_keys' + secKeyDir = secKeyDir or 'private_keys' + verkeyDir = verkeyDir or 'verif_keys' + sigKeyDir = sigKeyDir or 'sig_keys' + + # keys_dir = os.path.join(base_dir, 'certificates') + e_keys_dir = os.path.join(base_dir, '_enc') + s_keys_dir = os.path.join(base_dir, '_sig') + + public_keys_dir = os.path.join(base_dir, pubKeyDir) + secret_keys_dir = os.path.join(base_dir, secKeyDir) + ver_keys_dir = os.path.join(base_dir, verkeyDir) + sig_keys_dir = os.path.join(base_dir, sigKeyDir) + + # Create directories for certificates, remove old content if necessary + for d in [e_keys_dir, s_keys_dir, public_keys_dir, secret_keys_dir, + ver_keys_dir, sig_keys_dir]: + if clean and os.path.exists(d): + shutil.rmtree(d) + os.makedirs(d, exist_ok=True) + + # create new keys in certificates dir + for peer_name in peer_names: + createEncAndSigKeys(e_keys_dir, s_keys_dir, peer_name) + + # move public keys to appropriate directory + for keys_dir, pkdir, skdir in [ + (e_keys_dir, public_keys_dir, secret_keys_dir), + (s_keys_dir, ver_keys_dir, sig_keys_dir) + ]: + moveKeyFilesToCorrectLocations(keys_dir, pkdir, skdir) + + shutil.rmtree(e_keys_dir) + shutil.rmtree(s_keys_dir) + + print('Public keys in {}'.format(public_keys_dir)) + print('Private keys in {}'.format(secret_keys_dir)) + print('Verification keys in {}'.format(ver_keys_dir)) + print('Signing keys in {}'.format(sig_keys_dir)) + diff --git a/stp_zmq/zstack.py b/stp_zmq/zstack.py new file mode 100644 index 0000000000..bebb95cdf2 --- /dev/null +++ b/stp_zmq/zstack.py @@ -0,0 +1,938 @@ +import inspect + +from stp_core.common.config.util import getConfig + +try: + import ujson as json +except ImportError: + import json + +import os +import shutil +import sys +import time +from binascii import hexlify, unhexlify +from collections import deque +from typing import Dict, Mapping, Tuple, Any, Union +from typing import Set + +# import stp_zmq.asyncio +import zmq.auth +from stp_core.crypto.nacl_wrappers import Signer, Verifier +from stp_core.crypto.util import isHex, ed25519PkToCurve25519 +from stp_core.network.exceptions import PublicKeyNotFoundOnDisk, VerKeyNotFoundOnDisk +from stp_zmq.authenticator import MultiZapAuthenticator +from zmq.utils import z85 + +import zmq +from stp_core.common.log import getlogger +from stp_core.network.network_interface import NetworkInterface +from stp_zmq.util import createEncAndSigKeys, \ + moveKeyFilesToCorrectLocations, createCertsFromKeys +from stp_zmq.remote import Remote, set_keepalive, set_zmq_internal_queue_length + +logger = getlogger() + + +# TODO: Use Async io +# TODO: There a number of methods related to keys management, they can be moved to some class like KeysManager +class ZStack(NetworkInterface): + # Assuming only one listener per stack for now. + + PublicKeyDirName = 'public_keys' + PrivateKeyDirName = 'private_keys' + VerifKeyDirName = 'verif_keys' + SigKeyDirName = 'sig_keys' + + sigLen = 64 + pingMessage = 'pi' + pongMessage = 'po' + healthMessages = {pingMessage.encode(), pongMessage.encode()} + + # TODO: This is not implemented, implement this + messageTimeout = 3 + + def __init__(self, name, ha, basedirpath, msgHandler, restricted=True, + seed=None, onlyListener=False, config=None): + self._name = name + self.ha = ha + self.basedirpath = basedirpath + self.msgHandler = msgHandler + self.seed = seed + self.config = config or getConfig() + + self.listenerQuota = self.config.DEFAULT_LISTENER_QUOTA + self.senderQuota = self.config.DEFAULT_SENDER_QUOTA + + self.homeDir = None + # As of now there would be only one file in secretKeysDir and sigKeyDir + self.publicKeysDir = None + self.secretKeysDir = None + self.verifKeyDir = None + self.sigKeyDir = None + + self.signer = None + self.verifiers = {} + + self.setupDirs() + self.setupOwnKeysIfNeeded() + self.setupSigning() + + # self.poller = test.asyncio.Poller() + + self.restricted = restricted + + self.ctx = None # type: Context + self.listener = None + self.auth = None + + # Each remote is identified uniquely by the name + self._remotes = {} # type: Dict[str, Remote] + + self.remotesByKeys = {} + + # Indicates if this stack will maintain any remotes or will + # communicate simply to listeners. Used in ClientZStack + self.onlyListener = onlyListener + self.peersWithoutRemotes = set() + + self._conns = set() # type: Set[str] + + self.rxMsgs = deque() + self._created = time.perf_counter() + + self.last_heartbeat_at = None + + @property + def remotes(self): + return self._remotes + + @property + def created(self): + return self._created + + @property + def name(self): + return self._name + + @staticmethod + def isRemoteConnected(r) -> bool: + return r.isConnected + + def removeRemote(self, remote: Remote, clear=True): + """ + Currently not using clear + """ + name = remote.name + pkey = remote.publicKey + vkey = remote.verKey + if name in self.remotes: + self.remotes.pop(name) + self.remotesByKeys.pop(pkey, None) + self.verifiers.pop(vkey, None) + else: + logger.warning('No remote named {} present') + + @staticmethod + def initLocalKeys(name, baseDir, sigseed, override=False): + sDir = os.path.join(baseDir, '__sDir') + eDir = os.path.join(baseDir, '__eDir') + os.makedirs(sDir, exist_ok=True) + os.makedirs(eDir, exist_ok=True) + (public_key, secret_key), (verif_key, sig_key) = createEncAndSigKeys(eDir, + sDir, + name, + seed=sigseed) + + homeDir = ZStack.homeDirPath(baseDir, name) + verifDirPath = ZStack.verifDirPath(homeDir) + sigDirPath = ZStack.sigDirPath(homeDir) + secretDirPath = ZStack.secretDirPath(homeDir) + pubDirPath = ZStack.publicDirPath(homeDir) + for d in (homeDir, verifDirPath, sigDirPath, secretDirPath, pubDirPath): + os.makedirs(d, exist_ok=True) + + moveKeyFilesToCorrectLocations(sDir, verifDirPath, sigDirPath) + moveKeyFilesToCorrectLocations(eDir, pubDirPath, secretDirPath) + + shutil.rmtree(sDir) + shutil.rmtree(eDir) + return hexlify(public_key).decode(), hexlify(verif_key).decode() + + @staticmethod + def initRemoteKeys(name, remoteName, baseDir, verkey, override=False): + homeDir = ZStack.homeDirPath(baseDir, name) + verifDirPath = ZStack.verifDirPath(homeDir) + pubDirPath = ZStack.publicDirPath(homeDir) + for d in (homeDir, verifDirPath, pubDirPath): + os.makedirs(d, exist_ok=True) + + if isHex(verkey): + verkey = unhexlify(verkey) + + createCertsFromKeys(verifDirPath, remoteName, z85.encode(verkey)) + public_key = ed25519PkToCurve25519(verkey) + createCertsFromKeys(pubDirPath, remoteName, z85.encode(public_key)) + + def onHostAddressChanged(self): + # we don't store remote data like ip, port, domain name, etc, so + # nothing to do here + pass + + @staticmethod + def areKeysSetup(name, baseDir): + homeDir = ZStack.homeDirPath(baseDir, name) + verifDirPath = ZStack.verifDirPath(homeDir) + pubDirPath = ZStack.publicDirPath(homeDir) + sigDirPath = ZStack.sigDirPath(homeDir) + secretDirPath = ZStack.secretDirPath(homeDir) + for d in (verifDirPath, pubDirPath): + if not os.path.isfile(os.path.join(d, '{}.key'.format(name))): + return False + for d in (sigDirPath, secretDirPath): + if not os.path.isfile(os.path.join(d, '{}.key_secret'.format(name))): + return False + return True + + @staticmethod + def keyDirNames(): + return ZStack.PublicKeyDirName, ZStack.PrivateKeyDirName, \ + ZStack.VerifKeyDirName, ZStack.SigKeyDirName + + @staticmethod + def getHaFromLocal(name, basedirpath): + return None + + def __repr__(self): + return self.name + + @staticmethod + def homeDirPath(baseDirPath, name): + return os.path.join(baseDirPath, name) + + @staticmethod + def publicDirPath(homeDirPath): + return os.path.join(homeDirPath, ZStack.PublicKeyDirName) + + @staticmethod + def secretDirPath(homeDirPath): + return os.path.join(homeDirPath, ZStack.PrivateKeyDirName) + + @staticmethod + def verifDirPath(homeDirPath): + return os.path.join(homeDirPath, ZStack.VerifKeyDirName) + + @staticmethod + def sigDirPath(homeDirPath): + return os.path.join(homeDirPath, ZStack.SigKeyDirName) + + @staticmethod + def learnKeysFromOthers(baseDir, name, others): + homeDir = ZStack.homeDirPath(baseDir, name) + verifDirPath = ZStack.verifDirPath(homeDir) + pubDirPath = ZStack.publicDirPath(homeDir) + for d in (homeDir, verifDirPath, pubDirPath): + os.makedirs(d, exist_ok=True) + + for other in others: + createCertsFromKeys(verifDirPath, other.name, other.verKey) + createCertsFromKeys(pubDirPath, other.name, other.publicKey) + + def tellKeysToOthers(self, others): + for other in others: + createCertsFromKeys(other.verifKeyDir, self.name, self.verKey) + createCertsFromKeys(other.publicKeysDir, self.name, self.publicKey) + + def setupDirs(self): + self.homeDir = self.homeDirPath(self.basedirpath, self.name) + self.publicKeysDir = self.publicDirPath(self.homeDir) + self.secretKeysDir = self.secretDirPath(self.homeDir) + self.verifKeyDir = self.verifDirPath(self.homeDir) + self.sigKeyDir = self.sigDirPath(self.homeDir) + + for d in (self.homeDir, self.publicKeysDir, self.secretKeysDir, + self.verifKeyDir, self.sigKeyDir): + os.makedirs(d, exist_ok=True) + + def setupOwnKeysIfNeeded(self): + if not os.listdir(self.sigKeyDir): + # If signing keys are not present, secret (private keys) should + # not be present since they should be converted keys. + assert not os.listdir(self.secretKeysDir) + # Seed should be present + assert self.seed, 'Keys are not setup for {}'.format(self) + logger.info("Signing and Encryption keys were not found for {}. " + "Creating them now".format(self), + extra={"cli": False}) + tdirS = os.path.join(self.homeDir, '__skeys__') + tdirE = os.path.join(self.homeDir, '__ekeys__') + os.makedirs(tdirS, exist_ok=True) + os.makedirs(tdirE, exist_ok=True) + createEncAndSigKeys(tdirE, tdirS, self.name, self.seed) + moveKeyFilesToCorrectLocations(tdirE, self.publicKeysDir, + self.secretKeysDir) + moveKeyFilesToCorrectLocations(tdirS, self.verifKeyDir, + self.sigKeyDir) + shutil.rmtree(tdirE) + shutil.rmtree(tdirS) + + def setupAuth(self, restricted=True, force=False): + if self.auth and not force: + raise RuntimeError('Listener already setup') + location = self.publicKeysDir if restricted else zmq.auth.CURVE_ALLOW_ANY + # self.auth = AsyncioAuthenticator(self.ctx) + self.auth = MultiZapAuthenticator(self.ctx) + self.auth.start() + self.auth.allow('0.0.0.0') + self.auth.configure_curve(domain='*', location=location) + + def teardownAuth(self): + if self.auth: + self.auth.stop() + + def setupSigning(self): + # Setup its signer from the signing key stored at disk and for all + # verification keys stored at disk, add Verifier + _, sk = self.selfSigKeys + self.signer = Signer(z85.decode(sk)) + for vk in self.getAllVerKeys(): + self.addVerifier(vk) + + def addVerifier(self, verkey): + self.verifiers[verkey] = Verifier(z85.decode(verkey)) + + def start(self, restricted=None, reSetupAuth=True): + # self.ctx = test.asyncio.Context.instance() + self.ctx = zmq.Context.instance() + if self.config.MAX_SOCKETS: + self.ctx.MAX_SOCKETS = self.config.MAX_SOCKETS + restricted = self.restricted if restricted is None else restricted + logger.info('{} starting with restricted as {} and reSetupAuth ' + 'as {}'.format(self, restricted, reSetupAuth), + extra={"cli": False, "demo": False}) + self.setupAuth(restricted, force=reSetupAuth) + self.open() + + def stop(self): + if self.opened: + logger.info('stack {} closing its listener'.format(self), + extra={"cli": False, "demo": False}) + self.close() + self.teardownAuth() + logger.info("stack {} stopped".format(self), + extra={"cli": False, "demo": False}) + + @property + def opened(self): + return self.listener is not None + + def open(self): + # noinspection PyUnresolvedReferences + self.listener = self.ctx.socket(zmq.ROUTER) + # noinspection PyUnresolvedReferences + # self.poller.register(self.listener, test.POLLIN) + public, secret = self.selfEncKeys + self.listener.curve_secretkey = secret + self.listener.curve_publickey = public + self.listener.curve_server = True + self.listener.identity = self.publicKey + logger.debug('{} will bind its listener at {}'.format(self, self.ha[1])) + set_keepalive(self.listener, self.config) + set_zmq_internal_queue_length(self.listener, self.config) + self.listener.bind( + 'tcp://*:{}'.format(self.ha[1])) + + def close(self): + self.listener.unbind(self.listener.LAST_ENDPOINT) + self.listener.close(linger=0) + self.listener = None + logger.debug('{} starting to disconnect remotes'.format(self)) + for r in self.remotes.values(): + r.disconnect() + self.remotesByKeys.pop(r.publicKey, None) + + self._remotes = {} + if self.remotesByKeys: + logger.warning('{} found remotes that were only in remotesByKeys and ' + 'not in remotes. This is suspicious') + for r in self.remotesByKeys.values(): + r.disconnect() + self.remotesByKeys = {} + self._conns = set() + + @property + def selfEncKeys(self): + serverSecretFile = os.path.join(self.secretKeysDir, + "{}.key_secret".format(self.name)) + return zmq.auth.load_certificate(serverSecretFile) + + @property + def selfSigKeys(self): + serverSecretFile = os.path.join(self.sigKeyDir, + "{}.key_secret".format(self.name)) + return zmq.auth.load_certificate(serverSecretFile) + + @property + def isRestricted(self): + return not self.auth.allow_any if self.auth is not None \ + else self.restricted + + @property + def isKeySharing(self): + # TODO: Change name after removing test + return not self.isRestricted + + def isConnectedTo(self, name: str = None, ha: Tuple = None): + if self.onlyListener: + return self.hasRemote(name) + return super().isConnectedTo(name, ha) + + def hasRemote(self, name): + if self.onlyListener: + if isinstance(name, str): + name = name.encode() + if name in self.peersWithoutRemotes: + return True + return super().hasRemote(name) + + def removeRemoteByName(self, name: str): + if self.onlyListener: + if name in self.peersWithoutRemotes: + self.peersWithoutRemotes.remove(name) + return True + else: + return super().removeRemoteByName(name) + + def getHa(self, name): + # Return HA as None when its a `peersWithoutRemote` + if self.onlyListener: + if isinstance(name, str): + name = name.encode() + if name in self.peersWithoutRemotes: + return None + return super().getHa(name) + + async def service(self, limit=None) -> int: + """ + Service `limit` number of received messages in this stack. + + :param limit: the maximum number of messages to be processed. If None, + processes all of the messages in rxMsgs. + :return: the number of messages processed. + """ + if self.listener: + await self._serviceStack(self.age) + else: + logger.debug("{} is stopped".format(self)) + + r = len(self.rxMsgs) + if r > 0: + pracLimit = limit if limit else sys.maxsize + return self.processReceived(pracLimit) + return 0 + + def _verifyAndAppend(self, msg, ident): + # if self.verify(msg, ident): + # self.rxMsgs.append((msg[:-self.sigLen].decode(), ident)) + # else: + # logger.error('{} got error while ' + # 'verifying message {} from {}' + # .format(self, msg, ident)) + try: + decoded = msg.decode() + except UnicodeDecodeError as ex: + logger.error('{} got exception while decoding {} to utf-8: {}' + .format(self, msg, ex)) + return False + self.rxMsgs.append((decoded, ident)) + return True + + def _receiveFromListener(self, quota) -> int: + """ + Receives messages from listener + :param quota: number of messages to receive + :return: number of received messages + """ + assert quota + i = 0 + while i < quota: + try: + ident, msg = self.listener.recv_multipart(flags=zmq.NOBLOCK) + if not msg: + # Router probing sends empty message on connection + continue + i += 1 + if self.onlyListener and ident not in self.remotesByKeys: + self.peersWithoutRemotes.add(ident) + self._verifyAndAppend(msg, ident) + except zmq.Again: + break + if i > 0: + logger.trace('{} got {} messages through listener'. + format(self, i)) + return i + + def _receiveFromRemotes(self, quotaPerRemote) -> int: + """ + Receives messages from remotes + :param quotaPerRemote: number of messages to receive from one remote + :return: number of received messages + """ + + assert quotaPerRemote + totalReceived = 0 + for ident, remote in self.remotesByKeys.items(): + if not remote.socket: + continue + i = 0 + sock = remote.socket + while i < quotaPerRemote: + try: + msg, = sock.recv_multipart(flags=zmq.NOBLOCK) + if not msg: + # Router probing sends empty message on connection + continue + i += 1 + self._verifyAndAppend(msg, ident) + except zmq.Again: + break + if i > 0: + logger.trace('{} got {} messages through remote {}'. + format(self, i, remote)) + totalReceived += i + return totalReceived + + async def _serviceStack(self, age): + # TODO: age is unused + + # These checks are kept here and not moved to a function since + # `_serviceStack` is called very often and function call is an overhead + if self.config.ENABLE_HEARTBEATS and ( + self.last_heartbeat_at is None or + (time.perf_counter() - self.last_heartbeat_at) >= + self.config.HEARTBEAT_FREQ): + self.send_heartbeats() + + self._receiveFromListener(quota=self.listenerQuota) + self._receiveFromRemotes(quotaPerRemote=self.senderQuota) + return len(self.rxMsgs) + + def processReceived(self, limit): + if limit <= 0: + return 0 + + for x in range(limit): + try: + msg, ident = self.rxMsgs.popleft() + + frm = self.remotesByKeys[ident].name \ + if ident in self.remotesByKeys else ident + + r = self.handlePingPong(msg, frm, ident) + if r: + continue + + try: + msg = self.deserializeMsg(msg) + except Exception as e: + logger.error('Error {} while converting message {} ' + 'to JSON from {}'.format(e, msg, ident)) + continue + + msg = self.doProcessReceived(msg, frm, ident) + if msg: + self.msgHandler((msg, frm)) + except IndexError: + break + return x + 1 + + def doProcessReceived(self, msg, frm, ident): + return msg + + def connect(self, + name=None, + remoteId=None, + ha=None, + verKeyRaw=None, + publicKeyRaw=None): + """ + Connect to the node specified by name. + """ + if not name: + raise ValueError('Remote name should be specified') + + if name in self.remotes: + remote = self.remotes[name] + else: + publicKey = z85.encode(publicKeyRaw) if publicKeyRaw else self.getPublicKey(name) + verKey = z85.encode(verKeyRaw) if verKeyRaw else self.getVerKey(name) + if not ha or not publicKey or (self.isRestricted and not verKey): + raise ValueError('{} doesnt have enough info to connect. ' + 'Need ha, public key and verkey. {} {} {}'. + format(name, ha, verKey, publicKey)) + remote = self.addRemote(name, ha, verKey, publicKey) + + public, secret = self.selfEncKeys + remote.connect(self.ctx, public, secret) + + logger.info("{} looking for {} at {}:{}". + format(self, name or remote.name, *remote.ha), + extra={"cli": "PLAIN", "tags": ["node-looking"]}) + + # This should be scheduled as an async task + self.sendPingPong(remote, is_ping=True) + return remote.uid + + def reconnectRemote(self, remote): + """ + Disconnect remote and connect to it again + + :param remote: instance of Remote from self.remotes + :param remoteName: name of remote + :return: + """ + assert remote + logger.debug('{} reconnecting to {}'.format(self, remote)) + public, secret = self.selfEncKeys + remote.disconnect() + remote.connect(self.ctx, public, secret) + self.sendPingPong(remote, is_ping=True) + + def reconnectRemoteWithName(self, remoteName): + assert remoteName + assert remoteName in self.remotes + self.reconnectRemote(self.remotes[remoteName]) + + def disconnectByName(self, name: str): + assert name + remote = self.remotes.get(name) + if not remote: + logger.warning('{} did not find any remote ' + 'by name {} to disconnect' + .format(self, name)) + return None + remote.disconnect() + return remote + + def addRemote(self, name, ha, remoteVerkey, remotePublicKey): + remote = Remote(name, ha, remoteVerkey, remotePublicKey) + self.remotes[name] = remote + # TODO: Use weakref to remote below instead + self.remotesByKeys[remotePublicKey] = remote + if remoteVerkey: + self.addVerifier(remoteVerkey) + else: + logger.debug('{} adding a remote {}({}) without a verkey'. + format(self, name, ha)) + return remote + + def sendPingPong(self, remote: Union[str, Remote], is_ping=True): + msg = self.pingMessage if is_ping else self.pongMessage + action = 'ping' if is_ping else 'pong' + name = remote if isinstance(remote, (str, bytes)) else remote.name + r = self.send(msg, name) + if r is True: + logger.debug('{} {}ed {}'.format(self.name, action, name)) + elif r is False: + # TODO: This fails the first time as socket is not established, + # need to make it retriable + logger.info('{} failed to {} {}'. + format(self.name, action, name), + extra={"cli": False}) + elif r is None: + logger.debug('{} will be sending in batch'.format(self)) + else: + logger.warning('{} got an unexpected return value {} while sending'. + format(self, r)) + return r + + def handlePingPong(self, msg, frm, ident): + if msg in (self.pingMessage, self.pongMessage): + if msg == self.pingMessage: + logger.debug('{} got ping from {}'.format(self, frm)) + self.sendPingPong(frm, is_ping=False) + + if msg == self.pongMessage: + if ident in self.remotesByKeys: + self.remotesByKeys[ident].setConnected() + logger.debug('{} got pong from {}'.format(self, frm)) + return True + return False + + def send_heartbeats(self): + # Sends heartbeat (ping) to all + logger.info('{} sending heartbeat to all remotes'.format(self)) + for remote in self.remotes: + self.sendPingPong(remote) + self.last_heartbeat_at = time.perf_counter() + + def send(self, msg: Any, remoteName: str = None, ha=None): + if self.onlyListener: + return self.transmitThroughListener(msg, remoteName) + else: + if remoteName is None: + r = [] + # Serializing beforehand since to avoid serializing for each + # remote + msg = self.serializeMsg(msg) + for uid in self.remotes: + r.append(self.transmit(msg, uid, serialized=True)) + return all(r) + else: + return self.transmit(msg, remoteName) + + def transmit(self, msg, uid, timeout=None, serialized=False): + remote = self.remotes.get(uid) + if not remote: + logger.debug("Remote {} does not exist!".format(uid)) + return False + socket = remote.socket + if not socket: + logger.warning('{} has uninitialised socket ' + 'for remote {}'.format(self, uid)) + return False + try: + msg = self.serializeMsg(msg) if not serialized else msg + # socket.send(self.signedMsg(msg), flags=zmq.NOBLOCK) + socket.send(msg, flags=zmq.NOBLOCK) + logger.debug('{} transmitting message {} to {}' + .format(self, msg, uid)) + if not remote.isConnected and msg not in self.healthMessages: + logger.warning('Remote {} is not connected - ' + 'message will not be sent immediately.' + 'If this problem does not resolve itself - ' + 'check your firewall settings'.format(uid)) + return True + except zmq.Again: + logger.info('{} could not transmit message to {}' + .format(self, uid)) + return False + + def transmitThroughListener(self, msg, ident): + if isinstance(ident, str): + ident = ident.encode() + if ident not in self.peersWithoutRemotes: + logger.debug('{} not sending message {} to {}'. + format(self, msg, ident)) + logger.debug("This is a temporary workaround for not being able to " + "disconnect a ROUTER's remote") + return False + msg = self.serializeMsg(msg) + try: + # noinspection PyUnresolvedReferences + # self.listener.send_multipart([ident, self.signedMsg(msg)], + # flags=zmq.NOBLOCK) + logger.trace('{} transmitting {} to {} through listener socket'. + format(self, msg, ident)) + self.listener.send_multipart([ident, msg], flags=zmq.NOBLOCK) + return True + except zmq.Again: + return False + except Exception as e: + logger.error('{} got error {} while sending through listener to {}'. + format(self, e, ident)) + + @staticmethod + def serializeMsg(msg): + if isinstance(msg, Mapping): + msg = json.dumps(msg) + if isinstance(msg, str): + msg = msg.encode() + assert isinstance(msg, bytes) + return msg + + @staticmethod + def deserializeMsg(msg): + if isinstance(msg, bytes): + msg = msg.decode() + msg = json.loads(msg) + return msg + + def signedMsg(self, msg: bytes, signer: Signer=None): + sig = self.signer.signature(msg) + return msg + sig + + def verify(self, msg, by): + if self.isKeySharing: + return True + if by not in self.remotesByKeys: + return False + verKey = self.remotesByKeys[by].verKey + r = self.verifiers[verKey].verify(msg[-self.sigLen:], msg[:-self.sigLen]) + return r + + @staticmethod + def loadPubKeyFromDisk(directory, name): + filePath = os.path.join(directory, + "{}.key".format(name)) + try: + public, _ = zmq.auth.load_certificate(filePath) + return public + except (ValueError, IOError) as ex: + raise KeyError from ex + + @staticmethod + def loadSecKeyFromDisk(directory, name): + filePath = os.path.join(directory, + "{}.key_secret".format(name)) + try: + _, secret = zmq.auth.load_certificate(filePath) + return secret + except (ValueError, IOError) as ex: + raise KeyError from ex + + @property + def publicKey(self): + return self.getPublicKey(self.name) + + @property + def publicKeyRaw(self): + return z85.decode(self.publicKey) + + @property + def pubhex(self): + return hexlify(z85.decode(self.publicKey)) + + def getPublicKey(self, name): + try: + return self.loadPubKeyFromDisk(self.publicKeysDir, name) + except KeyError: + raise PublicKeyNotFoundOnDisk(self.name, name) + + @property + def verKey(self): + return self.getVerKey(self.name) + + @property + def verKeyRaw(self): + if self.verKey: + return z85.decode(self.verKey) + return None + + @property + def verhex(self): + if self.verKey: + return hexlify(z85.decode(self.verKey)) + return None + + def getVerKey(self, name): + try: + return self.loadPubKeyFromDisk(self.verifKeyDir, name) + except KeyError: + if self.isRestricted: + raise VerKeyNotFoundOnDisk(self.name, name) + return None + + @property + def sigKey(self): + return self.loadSecKeyFromDisk(self.sigKeyDir, self.name) + + # TODO: Change name to sighex after removing test + @property + def keyhex(self): + return hexlify(z85.decode(self.sigKey)) + + @property + def priKey(self): + return self.loadSecKeyFromDisk(self.secretKeysDir, self.name) + + @property + def prihex(self): + return hexlify(z85.decode(self.priKey)) + + def getAllVerKeys(self): + keys = [] + for key_file in os.listdir(self.verifKeyDir): + if key_file.endswith(".key"): + serverVerifFile = os.path.join(self.verifKeyDir, + key_file) + serverPublic, _ = zmq.auth.load_certificate(serverVerifFile) + keys.append(serverPublic) + return keys + + def setRestricted(self, restricted: bool): + if self.isRestricted != restricted: + logger.debug('{} setting restricted to {}'. + format(self, restricted)) + self.stop() + + # TODO: REMOVE, it will make code slow, only doing to allow the + # socket to become available again + time.sleep(1) + + self.start(restricted, reSetupAuth=True) + + def _safeRemove(self, filePath): + try: + os.remove(filePath) + except Exception as ex: + logger.info('{} could delete file {} due to {}'. + format(self, filePath, ex)) + + def clearLocalRoleKeep(self): + for d in (self.secretKeysDir, self.sigKeyDir): + filePath = os.path.join(d, "{}.key_secret".format(self.name)) + self._safeRemove(filePath) + + for d in (self.publicKeysDir, self.verifKeyDir): + filePath = os.path.join(d, "{}.key".format(self.name)) + self._safeRemove(filePath) + + def clearRemoteRoleKeeps(self): + for d in (self.secretKeysDir, self.sigKeyDir): + for key_file in os.listdir(d): + if key_file != '{}.key_secret'.format(self.name): + self._safeRemove(os.path.join(d, key_file)) + + for d in (self.publicKeysDir, self.verifKeyDir): + for key_file in os.listdir(d): + if key_file != '{}.key'.format(self.name): + self._safeRemove(os.path.join(d, key_file)) + + def clearAllDir(self): + shutil.rmtree(self.homeDir) + + # TODO: Members below are just for the time till RAET replacement is + # complete, they need to be removed then. + @property + def nameRemotes(self): + logger.debug('{} proxy method used on {}'. + format(inspect.stack()[0][3], self)) + return self.remotes + + @property + def keep(self): + logger.debug('{} proxy method used on {}'. + format(inspect.stack()[0][3], self)) + if not hasattr(self, '_keep'): + self._keep = DummyKeep(self) + return self._keep + + def clearLocalKeep(self): + pass + + def clearRemoteKeeps(self): + pass + + +class DummyKeep: + def __init__(self, stack, *args, **kwargs): + self.stack = stack + self._auto = 2 if stack.isKeySharing else 0 + + @property + def auto(self): + logger.debug('{} proxy method used on {}'. + format(inspect.stack()[0][3], self)) + return self._auto + + @auto.setter + def auto(self, mode): + logger.debug('{} proxy method used on {}'. + format(inspect.stack()[0][3], self)) + # AutoMode.once whose value is 1 is not used os dont care + if mode != self._auto: + if mode == 2: + self.stack.setRestricted(False) + if mode == 0: + self.stack.setRestricted(True) diff --git a/terminology.md b/terminology.md new file mode 100644 index 0000000000..01e23f3515 --- /dev/null +++ b/terminology.md @@ -0,0 +1,3 @@ +1. Ledger - a single, ever-growing, append-only Merkle Tree of identity events +2. MTH - Merkle Tree Hash +3. STH - Signed Tree Head \ No newline at end of file From 95947653edbef86b9d7801e990410214a1910290 Mon Sep 17 00:00:00 2001 From: Devin Fisher Date: Tue, 25 Jul 2017 16:08:06 -0600 Subject: [PATCH 015/100] merge from master Signed-off-by: Devin Fisher Sign-off-executed-by: toktar Approved-at: h-master --- ...come_active_with_less_than_four_servers.py | 8 -- .../logging/TimeAndSizeRotatingFileHandler.py | 77 +++++++++++++++++++ 2 files changed, 77 insertions(+), 8 deletions(-) create mode 100644 stp_core/common/logging/TimeAndSizeRotatingFileHandler.py diff --git a/plenum/test/instances/test_instance_cannot_become_active_with_less_than_four_servers.py b/plenum/test/instances/test_instance_cannot_become_active_with_less_than_four_servers.py index beaec3d59e..cb0b1722b6 100644 --- a/plenum/test/instances/test_instance_cannot_become_active_with_less_than_four_servers.py +++ b/plenum/test/instances/test_instance_cannot_become_active_with_less_than_four_servers.py @@ -17,14 +17,6 @@ logger = getlogger() -@pytest.fixture(scope="function", autouse=True) -def limitTestRunningTime(): - return 200 - -@pytest.fixture(scope="function", autouse=True) -def limitTestRunningTime(): - return 200 - @pytest.fixture(scope="function", autouse=True) def limitTestRunningTime(): diff --git a/stp_core/common/logging/TimeAndSizeRotatingFileHandler.py b/stp_core/common/logging/TimeAndSizeRotatingFileHandler.py new file mode 100644 index 0000000000..0ea0d6ee24 --- /dev/null +++ b/stp_core/common/logging/TimeAndSizeRotatingFileHandler.py @@ -0,0 +1,77 @@ +import os +from logging.handlers import TimedRotatingFileHandler +from logging.handlers import RotatingFileHandler + + +class TimeAndSizeRotatingFileHandler(TimedRotatingFileHandler, RotatingFileHandler): + + def __init__(self, filename, when = 'h', interval = 1, backupCount = 0, + encoding = None, delay = False, utc = False, atTime = None, + maxBytes=0): + + TimedRotatingFileHandler.__init__(self, filename, when, interval, + backupCount, encoding, delay, + utc, atTime) + self.maxBytes = maxBytes + + def shouldRollover(self, record): + return bool(TimedRotatingFileHandler.shouldRollover(self, record)) or \ + bool(RotatingFileHandler.shouldRollover(self, record)) + + def rotation_filename(self, default_name: str): + + if not os.path.exists(default_name): + return default_name + + dir = os.path.dirname(default_name) + defaultFileName = os.path.basename(default_name) + fileNames = os.listdir(dir) + + maxIndex = -1 + for fileName in fileNames: + if fileName.startswith(defaultFileName): + index = self._file_index(fileName) + if index > maxIndex: + maxIndex = index + return "{}.{}".format(default_name, maxIndex + 1) + + @staticmethod + def _file_index(file_name): + split = file_name.split(".") + try: + return int(split[-1]) + except ValueError: + return 0 + + def getFilesToDelete(self): + """ + Determine the files to delete when rolling over. + + Note: This is copied from `TimedRotatingFileHandler`. The reason for + copying is to allow sorting in a custom way (by modified time). + Also minor optimisation to sort only when needed (>self.backupCount) + """ + dirName, baseName = os.path.split(self.baseFilename) + fileNames = os.listdir(dirName) + result = [] + prefix = baseName + "." + plen = len(prefix) + for fileName in fileNames: + if fileName[:plen] == prefix: + suffix = fileName[plen:] + if self.extMatch.match(suffix): + result.append(os.path.join(dirName, fileName)) + if len(result) <= self.backupCount: + result = [] + else: + self._sort_for_removal(result) + result = result[:len(result) - self.backupCount] + return result + + @staticmethod + def _sort_for_removal(result): + """ + Sort files in the order they should be removed. + Currently using last modification time but this method can be overridden + """ + result.sort(key=os.path.getmtime) From 2212f0d95964e7b4ba21bc7cd134c475b5ffd8ad Mon Sep 17 00:00:00 2001 From: spivachuk Date: Tue, 29 Aug 2017 14:38:22 +0300 Subject: [PATCH 016/100] Fixes for wallet and genesis pool txns migration (#363) * INDY-733: Fixed bug in WalletCompatibilityBackend - Fixed a bug with decoding non-string keys of dictionaries in WalletCompatibilityBackend. * Added migration of genesis pool transactions to the client. Signed-off-by: spivachuk Sign-off-executed-by: toktar Approved-at: h-master --- ledger/genesis_txn/genesis_txn_file_util.py | 7 +++++++ plenum/cli/cli.py | 11 ++++++----- plenum/client/pool_manager.py | 4 ++++ plenum/client/wallet.py | 4 ++-- 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/ledger/genesis_txn/genesis_txn_file_util.py b/ledger/genesis_txn/genesis_txn_file_util.py index 8ff3a9aa6a..682ce92858 100644 --- a/ledger/genesis_txn/genesis_txn_file_util.py +++ b/ledger/genesis_txn/genesis_txn_file_util.py @@ -13,6 +13,13 @@ def genesis_txn_path(base_dir, transaction_file): return os.path.join(base_dir, genesis_txn_file(transaction_file)) +def update_genesis_txn_file_name_if_outdated(base_dir, transaction_file): + old_named_path = os.path.join(base_dir, transaction_file) + new_named_path = os.path.join(base_dir, genesis_txn_file(transaction_file)) + if not os.path.exists(new_named_path) and os.path.isfile(old_named_path): + os.rename(old_named_path, new_named_path) + + def create_genesis_txn_init_ledger(data_dir, txn_file): from ledger.genesis_txn.genesis_txn_initiator_from_file import GenesisTxnInitiatorFromFile initiator = GenesisTxnInitiatorFromFile(data_dir, txn_file) diff --git a/plenum/cli/cli.py b/plenum/cli/cli.py index a9d8cfa36b..1e8d7709f2 100644 --- a/plenum/cli/cli.py +++ b/plenum/cli/cli.py @@ -7,7 +7,8 @@ from jsonpickle import json from ledger.compact_merkle_tree import CompactMerkleTree -from ledger.genesis_txn.genesis_txn_file_util import create_genesis_txn_init_ledger +from ledger.genesis_txn.genesis_txn_file_util import create_genesis_txn_init_ledger, \ + update_genesis_txn_file_name_if_outdated from ledger.genesis_txn.genesis_txn_initiator_from_file import GenesisTxnInitiatorFromFile from ledger.ledger import Ledger from plenum.cli.command import helpCmd, statusNodeCmd, statusClientCmd, \ @@ -282,12 +283,12 @@ def __init_registry(self, useNodeReg=False, nodeReg=None, cliNodeReg=None): def __init_registry_from_ledger(self): self.nodeRegLoadedFromFile = True - dataDir = self.basedirpath - + update_genesis_txn_file_name_if_outdated( + self.basedirpath, self.config.poolTransactionsFile) genesis_txn_initiator = GenesisTxnInitiatorFromFile( - dataDir, self.config.poolTransactionsFile) + self.basedirpath, self.config.poolTransactionsFile) ledger = Ledger(CompactMerkleTree(), - dataDir=dataDir, + dataDir=self.basedirpath, fileName=self.config.poolTransactionsFile, genesis_txn_initiator=genesis_txn_initiator, transactionLogStore=KeyValueStorageInMemory()) diff --git a/plenum/client/pool_manager.py b/plenum/client/pool_manager.py index 6747e6614d..6f33802eb8 100644 --- a/plenum/client/pool_manager.py +++ b/plenum/client/pool_manager.py @@ -1,6 +1,8 @@ import collections import json +from ledger.genesis_txn.genesis_txn_file_util import \ + update_genesis_txn_file_name_if_outdated from ledger.util import F from stp_core.network.exceptions import RemoteNotFound @@ -23,6 +25,8 @@ def __init__(self): self._ledgerLocation = None TxnStackManager.__init__(self, self.name, self.basedirpath, isNode=False) + update_genesis_txn_file_name_if_outdated(self.basedirpath, + self.ledgerFile) _, cliNodeReg, nodeKeys = self.parseLedgerForHaAndKeys(self.ledger) self.nodeReg = cliNodeReg self.addRemoteKeysFromLedger(nodeKeys) diff --git a/plenum/client/wallet.py b/plenum/client/wallet.py index 5168379224..6302e4e216 100644 --- a/plenum/client/wallet.py +++ b/plenum/client/wallet.py @@ -427,12 +427,12 @@ class WalletCompatibilityBackend(JSONBackend): def decode(self, string): raw = super().decode(string) # Note that backend.decode may be called not only for the whole object - # representation but also for representations of structured keys of + # representation but also for representations of non-string keys of # dictionaries. # Here we assume that if the string represents a class instance and # this class contains makeRawCompatible method then this class is # a wallet class supporting backward compatibility - if tags.OBJECT in raw: + if isinstance(raw, dict) and tags.OBJECT in raw: clsName = raw[tags.OBJECT] cls = loadclass(clsName) if hasattr(cls, 'makeRawCompatible') \ From d3cc376f99cc6387c0f1a1c30c4a45547dfdacc5 Mon Sep 17 00:00:00 2001 From: Andrey Kononykhin Date: Wed, 6 Sep 2017 18:28:21 +0300 Subject: [PATCH 017/100] Rc 2.1 (#373) * Node status tool base support * Improve node status tool support * Add OOP, renaming * Remove unsupported fields * Renaming * Add dynamic tests * Make node status tool safe * Fix primary selection after primary demotion (INDY-463) (#354) * modified rank routine, added test cases for primary demotion * fixed pep8 * fixed nodes oreder initialization * added checks that pool is functional after recofigurations * added additional view change to ensure original master is skipped * Merged hotfixes from stable for wallet and genesis pool transactions file migration and readme file. * Fix validator info tool (#365) * Don't add ip addresses, one-shot dump on start * Parameterize zmq network protocol * Cherry-pick cb7d068 8642be5 c37e835 from master Signed-off-by: Andrey Kononykhin Sign-off-executed-by: toktar Approved-at: h-master --- plenum/common/batched.py | 52 ++-- plenum/common/prepare_batch.py | 45 +++ plenum/config.py | 3 + plenum/server/node.py | 21 +- plenum/server/pool_manager.py | 80 +++-- plenum/server/primary_decider.py | 7 +- plenum/server/primary_selector.py | 15 +- plenum/server/validator_info_tool.py | 172 +++++++++++ .../test/batching_3pc/test_basic_batching.py | 2 +- plenum/test/cli/test_long_msg_err.py | 9 +- plenum/test/client/test_client.py | 2 +- plenum/test/common/test_prepare_batch.py | 59 ++++ plenum/test/conftest.py | 1 + .../test_split_non_3pc_messages_on_batches.py | 74 +++++ plenum/test/primary_selection/conftest.py | 26 +- plenum/test/primary_selection/helper.py | 18 +- ...after_primary_demotion_and_pool_restart.py | 64 ++++ ...after_primary_demotion_and_view_changes.py | 71 +++++ plenum/test/validator_info/__init__.py | 0 .../validator_info/test_validator_info.py | 277 ++++++++++++++++++ .../test_new_node_joins_after_view_change.py | 3 + stp_core/common/constants.py | 2 + stp_core/config.py | 4 +- .../validators/message_length_validator.py | 5 +- stp_zmq/remote.py | 4 +- stp_zmq/test/test_zstack.py | 27 +- stp_zmq/zstack.py | 6 +- 27 files changed, 960 insertions(+), 89 deletions(-) create mode 100644 plenum/common/prepare_batch.py create mode 100644 plenum/server/validator_info_tool.py create mode 100644 plenum/test/common/test_prepare_batch.py create mode 100644 plenum/test/node_request/test_split_non_3pc_messages_on_batches.py create mode 100644 plenum/test/primary_selection/test_primary_selection_after_primary_demotion_and_pool_restart.py create mode 100644 plenum/test/primary_selection/test_primary_selection_after_primary_demotion_and_view_changes.py create mode 100644 plenum/test/validator_info/__init__.py create mode 100644 plenum/test/validator_info/test_validator_info.py diff --git a/plenum/common/batched.py b/plenum/common/batched.py index 9e3ef7b71b..4fa4e351d7 100644 --- a/plenum/common/batched.py +++ b/plenum/common/batched.py @@ -2,6 +2,7 @@ from typing import Any, Iterable from plenum.common.constants import BATCH, OP_FIELD_NAME +from plenum.common.prepare_batch import split_messages_on_batches from stp_core.common.constants import CONNECTION_PREFIX from stp_core.crypto.signer import Signer from stp_core.common.log import getlogger @@ -63,7 +64,7 @@ def send(self, msg: Any, * # Signing (if required) and serializing before enqueueing otherwise # each call to `_enqueue` will have to sign it and `transmit` will try # to serialize it which is waste of resources - serializedPayload, err_msg = self.signAndSerialize(msg, signer) + serializedPayload, err_msg = self.signSerializeAndCheckLen(msg, signer) if serializedPayload is None: return False, err_msg @@ -98,23 +99,24 @@ def flushOutBoxes(self) -> None: "{} batching {} msgs to {} into one transmission". format(self, len(msgs), dest)) logger.trace(" messages: {}".format(msgs)) - batch = Batch(list(msgs), None) + batches = split_messages_on_batches(list(msgs), + self._make_batch, + self._test_batch_len, + ) msgs.clear() - # don't need to sign the batch, when the composed msgs are - # signed - payload, err_msg = self.signAndSerialize(batch) - if payload is not None: - logger.trace("{} sending payload to {}: {}".format( - self, dest, payload)) - # Setting timeout to never expire - self.transmit( - payload, - rid, - timeout=self.messageTimeout, - serialized=True) + if batches: + for batch in batches: + logger.trace("{} sending payload to {}: {}".format( + self, dest, batch)) + # Setting timeout to never expire + self.transmit( + batch, + rid, + timeout=self.messageTimeout, + serialized=True) else: - logger.warning("{} error {}. tried to {}: {}".format( - self, err_msg, dest, payload)) + logger.warning("Cannot create batch(es) for {}".format( + self, dest)) for rid in removedRemotes: logger.warning("{}{} rid {} has been removed" .format(CONNECTION_PREFIX, self, rid), @@ -127,6 +129,14 @@ def flushOutBoxes(self) -> None: logMethod=logger.debug) del self.outBoxes[rid] + def _make_batch(self, msgs): + batch = Batch(msgs, None) + serialized_batch = self.sign_and_serialize(batch) + return serialized_batch + + def _test_batch_len(self, batch_len): + return self.msg_len_val.is_len_less_than_limit(batch_len) + def doProcessReceived(self, msg, frm, ident): if OP_FIELD_NAME in msg and msg[OP_FIELD_NAME] == BATCH: if f.MSGS.nm in msg and isinstance(msg[f.MSGS.nm], list): @@ -142,9 +152,8 @@ def doProcessReceived(self, msg, frm, ident): msg[f.MSGS.nm] = relevantMsgs return msg - def signAndSerialize(self, msg, signer=None): - payload = self.prepForSending(msg, signer) - msg_bytes = self.serializeMsg(payload) + def signSerializeAndCheckLen(self, msg, signer=None): + msg_bytes = self.sign_and_serialize(msg, signer) err_msg = None try: self.msg_len_val.validate(msg_bytes) @@ -153,3 +162,8 @@ def signAndSerialize(self, msg, signer=None): logger.warning(err_msg) msg_bytes = None return msg_bytes, err_msg + + def sign_and_serialize(self, msg, signer=None): + payload = self.prepForSending(msg, signer) + msg_bytes = self.serializeMsg(payload) + return msg_bytes diff --git a/plenum/common/prepare_batch.py b/plenum/common/prepare_batch.py new file mode 100644 index 0000000000..1d955255b9 --- /dev/null +++ b/plenum/common/prepare_batch.py @@ -0,0 +1,45 @@ +from stp_core.common.log import getlogger + +SPLIT_STEPS_LIMIT = 8 + +logger = getlogger() + + +def split_messages_on_batches(msgs, make_batch_func, is_batch_len_under_limit, step_num=0): + + def split(rec_depth): + l = len(msgs) // 2 + left_batch = split_messages_on_batches(msgs[:l], make_batch_func, is_batch_len_under_limit, rec_depth) + right_batch = split_messages_on_batches(msgs[l:], make_batch_func, is_batch_len_under_limit, rec_depth) + return left_batch + right_batch if left_batch and right_batch else None + + if step_num > SPLIT_STEPS_LIMIT: + logger.warning('Too many split steps ' + 'were done {}. Batches were not created'.format(step_num)) + return None + + # precondition for case when total length is greater than limit + # helps skip extra serialization step + tt_len = sum(len(m) for m in msgs) + if not is_batch_len_under_limit(tt_len): + for m in msgs: + if not is_batch_len_under_limit(len(m)): + logger.warning('The message {} is to long ({}). ' + 'Batches were not created'.format(m, len(m))) + return + step_num += 1 + return split(step_num) + + # make a batch and check its length + batch = make_batch_func(msgs) + if is_batch_len_under_limit(len(batch)): + return [batch] # success split + else: + if len(msgs) == 1: + # a batch with this message greater than limit so split fails + logger.warning('The message {} is less than limit ' + 'but the batch which contains only this ' + 'message is greater than limit'.format(msgs)) + return None + step_num += 1 + return split(step_num) diff --git a/plenum/config.py b/plenum/config.py index 68ed28764e..05d525df1b 100644 --- a/plenum/config.py +++ b/plenum/config.py @@ -99,6 +99,9 @@ STATS_SERVER_PORT = 30000 STATS_SERVER_MESSAGE_BUFFER_MAX_SIZE = 1000 +# Node status configuration +DUMP_VALIDATOR_INFO_PERIOD_SEC = 60 + RAETLogLevel = "terse" RAETLogLevelCli = "mute" RAETLogFilePath = os.path.join(os.path.expanduser(baseDir), "raet.log") diff --git a/plenum/server/node.py b/plenum/server/node.py index 501714eddb..e1c4c6c079 100644 --- a/plenum/server/node.py +++ b/plenum/server/node.py @@ -4,6 +4,7 @@ from binascii import unhexlify from collections import deque, defaultdict from contextlib import closing +from functools import partial from typing import Dict, Any, Mapping, Iterable, List, Optional, Set, Tuple from intervaltree import IntervalTree @@ -61,6 +62,7 @@ from plenum.server.message_req_processor import MessageReqProcessor from plenum.server.models import InstanceChanges from plenum.server.monitor import Monitor +from plenum.server.validator_info_tool import ValidatorNodeInfoTool from plenum.server.notifier_plugin_manager import notifierPluginTriggerEvents, \ PluginManager from plenum.server.plugin.has_plugin_loader_helper import PluginLoaderHelper @@ -96,6 +98,7 @@ class Node(HasActionQueue, Motor, Propagator, MessageProcessor, HasFileStorage, suspicions = {s.code: s.reason for s in Suspicions.get_list()} keygenScript = "init_plenum_keys" _client_request_class = SafeRequest + _info_tool_class = ValidatorNodeInfoTool ledger_ids = [POOL_LEDGER_ID, DOMAIN_LEDGER_ID] _wallet_class = Wallet @@ -368,6 +371,10 @@ def __init__(self, # between. self._next_view_indications = SortedDict() + # Number of read requests the node has processed + self.total_read_request_number = 0 + self._info_tool = self._info_tool_class(self) + def create_replicas(self) -> Replicas: return Replicas(self, self.monitor) @@ -606,6 +613,8 @@ def start(self, loop): self._schedule(action=self.propose_view_change, seconds=self._view_change_timeout) + self.schedule_node_status_dump() + # if first time running this node if not self.nodestack.remotes: logger.info("{} first time running..." "".format(self), extra={ @@ -624,8 +633,17 @@ def start(self, loop): self.logNodeInfo() + def schedule_node_status_dump(self): + # one-shot dump right after start + self._schedule(action=self._info_tool.dump_json_file, + seconds=3) + self.startRepeating( + self._info_tool.dump_json_file, + seconds=self.config.DUMP_VALIDATOR_INFO_PERIOD_SEC, + ) + @property - def rank(self) -> int: + def rank(self) -> Optional[int]: return self.poolManager.rank def get_name_by_rank(self, rank): @@ -1708,6 +1726,7 @@ def processRequest(self, request: Request, frm: str): if request.operation[TXN_TYPE] == GET_TXN: self.handle_get_txn_req(request, frm) + self.total_read_request_number += 1 else: reply = self.getReplyFromLedger(ledger, request) if reply: diff --git a/plenum/server/pool_manager.py b/plenum/server/pool_manager.py index 289339c55c..e3db2ecea8 100644 --- a/plenum/server/pool_manager.py +++ b/plenum/server/pool_manager.py @@ -54,7 +54,10 @@ def _get_rank(needle_id: str, haystack_ids: List[str]): # Return the rank of the node where rank is defined by the order in # which node was added to the pool or on the alphabetical order of name # if using RegistryPoolManager - return haystack_ids.index(needle_id) + try: + return haystack_ids.index(needle_id) + except ValueError: + return None @property @abstractmethod @@ -63,24 +66,28 @@ def id(self): """ @abstractmethod - def get_rank_of(self, node_id) -> int: - """ + def get_rank_of(self, node_id) -> Optional[int]: + """Return node rank among active pool validators by id + + :param node_id: node's id + :return: rank of the node or None if not found """ @property def rank(self) -> Optional[int]: # Nodes have a total order defined in them, rank is the node's # position in that order - if self._rank is None: - self._rank = self.get_rank_of(self.id) - return self._rank + return self.get_rank_of(self.id) @abstractmethod - def get_name_by_rank(self, rank): + def get_name_by_rank(self, rank) -> Optional[str]: # Needed for communicating primary name to others and also nodeReg # uses node names (alias) and not ids # TODO: Should move to using node ids and not node names (alias) - """ + """Return node name (alias) by rank among active pool validators + + :param rank: rank of the node + :return: name of the node or None if not found """ @@ -105,15 +112,17 @@ def __init__(self, node, ha=None, cliname=None, cliha=None): self.basedirpath = node.basedirpath self._ledger = None self._id = None - self._rank = None + TxnStackManager.__init__( self, self.name, self.basedirpath, isNode=True) self.state = self.loadState() self.reqHandler = self.getPoolReqHandler() self.initPoolState() + self._load_nodes_order_from_ledger() self.nstack, self.cstack, self.nodeReg, self.cliNodeReg = \ self.getStackParamsAndNodeReg(self.name, self.basedirpath, ha=ha, cliname=cliname, cliha=cliha) + self._dataFieldsValidators = ( (NODE_IP, self._isIpAddressValid), (CLIENT_IP, self._isIpAddressValid), @@ -207,6 +216,8 @@ def onPoolMembershipChange(self, txn): nodeName = txn[DATA][ALIAS] nodeNym = txn[TARGET_NYM] + self._order_node(nodeNym, nodeName) + def _updateNode(txn): if {NODE_IP, NODE_PORT, CLIENT_IP, CLIENT_PORT}. \ intersection(set(txn[DATA].keys())): @@ -369,33 +380,46 @@ def id(self): self._id = txn[TARGET_NYM] return self._id - @property - def node_ids_in_ordered_by_rank(self) -> List: - ids = OrderedDict() + def _load_nodes_order_from_ledger(self): + self._ordered_node_ids = OrderedDict() for _, txn in self.ledger.getAllTxn(): - ids[txn[TARGET_NYM]] = True - return list(ids.keys()) + if txn[TXN_TYPE] == NODE: + self._order_node(txn[TARGET_NYM], txn[DATA][ALIAS]) + + def _order_node(self, nodeNym, nodeName): + assert self._ordered_node_ids.get(nodeNym) in (nodeName, None), ( + "{} trying to order already ordered node {} ({}) " + "with other alias {}".format( + self.name, self._ordered_node_ids.get(nodeNym), nodeNym)) + + self._ordered_node_ids[nodeNym] = nodeName + + @property + def node_ids_ordered_by_rank(self) -> List: + return [nym for nym, name in self._ordered_node_ids.items() + if name in self.nodeReg] def get_rank_of(self, node_id) -> Optional[int]: if self.id is None: # This can happen if a non-genesis node starts return None - return self._get_rank(node_id, self.node_ids_in_ordered_by_rank) + return self._get_rank(node_id, self.node_ids_ordered_by_rank) - def get_name_by_rank(self, rank): - # This is expensive but only required while start or view change - id = self.node_ids_in_ordered_by_rank[rank] - # We don't allow changing ALIAS - for _, txn in self.ledger.getAllTxn(): - if txn[TARGET_NYM] == id and DATA in txn and ALIAS in txn[DATA]: - return txn[DATA][ALIAS] + def get_name_by_rank(self, rank) -> Optional[str]: + try: + nym = self.node_ids_ordered_by_rank[rank] + except IndexError: + return None + else: + return self._ordered_node_ids[nym] class RegistryPoolManager(PoolManager): # This is the old way of managing the pool nodes information and # should be deprecated. def __init__(self, name, basedirpath, nodeRegistry, ha, cliname, cliha): - self._rank = None + self._ordered_node_names = None + self.nstack, self.cstack, self.nodeReg, self.cliNodeReg = \ self.getStackParamsAndNodeReg(name=name, basedirpath=basedirpath, nodeRegistry=nodeRegistry, ha=ha, @@ -491,8 +515,12 @@ def id(self): def node_names_ordered_by_rank(self) -> List: return sorted(self.nodeReg.keys()) - def get_rank_of(self, node_id) -> int: + def get_rank_of(self, node_id) -> Optional[int]: + # TODO node_id here has got another meaning return self._get_rank(node_id, self.node_names_ordered_by_rank) - def get_name_by_rank(self, rank): - return self.node_names_ordered_by_rank[rank] + def get_name_by_rank(self, rank) -> Optional[str]: + try: + return self.node_names_ordered_by_rank[rank] + except IndexError: + return None diff --git a/plenum/server/primary_decider.py b/plenum/server/primary_decider.py index 801ad7a026..8540dd5ef3 100644 --- a/plenum/server/primary_decider.py +++ b/plenum/server/primary_decider.py @@ -1,4 +1,4 @@ -from typing import Iterable +from typing import Iterable, Optional from collections import deque from plenum.common.constants import VIEW_CHANGE_PREFIX @@ -23,7 +23,6 @@ def __init__(self, node): self.f = node.f self.replicas = node.replicas self.viewNo = node.viewNo - self.rank = node.rank self.nodeNames = node.allNodeNames self.nodeCount = 0 self.inBox = deque() @@ -40,6 +39,10 @@ def __init__(self, node): def __repr__(self): return "{}".format(self.name) + @property + def rank(self) -> Optional[int]: + return self.node.rank + @property def was_master_primary_in_prev_view(self): return self.previous_master_primary == self.name diff --git a/plenum/server/primary_selector.py b/plenum/server/primary_selector.py index 10e8ac6051..b79f21842c 100644 --- a/plenum/server/primary_selector.py +++ b/plenum/server/primary_selector.py @@ -204,8 +204,9 @@ def has_view_change_from_primary(self) -> bool: if next_primary_name not in self._view_change_done: logger.debug( "{} has not received ViewChangeDone from the next " - "primary {}". format( - self.name, next_primary_name)) + "primary {} (viewNo: {}, totalNodes: {})". format( + self.name, next_primary_name, + self.viewNo, self.node.totalNodes)) return False else: self._has_view_change_from_primary = True @@ -317,8 +318,14 @@ def _get_primary_id(self, view_no, instance_id): return (view_no + instance_id) % self.node.totalNodes def next_primary_node_name(self, instance_id): - return self.node.get_name_by_rank(self._get_primary_id( - self.viewNo, instance_id)) + rank = self._get_primary_id(self.viewNo, instance_id) + name = self.node.get_name_by_rank(rank) + + assert name, ("{} failed to get node name for rank {}: " + "view_no {}, instance_id {}, totalNodes {}".format( + self, rank, self.viewNo, instance_id, + self.node.totalNodes)) + return name def next_primary_replica_name(self, instance_id): """ diff --git a/plenum/server/validator_info_tool.py b/plenum/server/validator_info_tool.py new file mode 100644 index 0000000000..0c100523ee --- /dev/null +++ b/plenum/server/validator_info_tool.py @@ -0,0 +1,172 @@ +import json +import time + +import os + +import base58 + +from stp_core.common.constants import ZMQ_NETWORK_PROTOCOL +from stp_core.common.log import getlogger + +logger = getlogger() + + +def none_on_fail(func): + + def wrap(*args, **kwargs): + try: + return func(*args, **kwargs) + except Exception as ex: + logger.debug('Validator info tool fails to ' + 'execute {} because {}'.format(func.__name__, repr(ex))) + return None + return wrap + + +class ValidatorNodeInfoTool: + JSON_SCHEMA_VERSION = '0.0.1' + FILE_NAME_TEMPLATE = '{node_name}_info.json' + + def __init__(self, node): + self._node = node + self.__name = self._node.name + self.__base_path = self._node.basedirpath + + @property + def info(self): + return { + 'alias': self.__alias, + 'bindings': { + 'client': { + # ip address is going to be set in + # validator-info script + # 'ip': self.__client_ip, + 'port': self.__client_port, + 'protocol': ZMQ_NETWORK_PROTOCOL, + }, + 'node': { + # ip address is going to be set in + # validator-info script + # 'ip': self.__node_ip, + 'port': self.__node_port, + 'protocol': ZMQ_NETWORK_PROTOCOL, + } + }, + 'did': self.__did, + 'response-version': self.JSON_SCHEMA_VERSION, + 'timestamp': int(time.time()), + 'verkey': self.__verkey, + 'metrics': { + 'average-per-second': { + 'read-transactions': self.__avg_read, + 'write-transactions': self.__avg_write, + }, + 'transaction-count': { + 'ledger': self.__domain_ledger_size, + 'pool': self.__pool_ledger_size, + }, + 'uptime': self.__uptime, + }, + 'pool': { + 'reachable': { + 'count': self.__reachable_count, + 'list': self.__reachable_list, + }, + 'unreachable': { + 'count': self.__unreachable_count, + 'list': self.__unreachable_list, + }, + 'total-count': self.__total_count, + }, + } + + @property + @none_on_fail + def __alias(self): + return self._node.name + + @property + @none_on_fail + def __client_ip(self): + return self._node.clientstack.ha.host + + @property + @none_on_fail + def __client_port(self): + return self._node.clientstack.ha.port + + @property + @none_on_fail + def __node_ip(self): + return self._node.nodestack.ha.host + + @property + @none_on_fail + def __node_port(self): + return self._node.nodestack.ha.port + + @property + @none_on_fail + def __did(self): + return self._node.wallet.defaultId + + @property + @none_on_fail + def __verkey(self): + return base58.b58encode(self._node.nodestack.verKey) + + @property + @none_on_fail + def __avg_read(self): + return self._node.total_read_request_number / (time.time() - self._node.created) + + @property + @none_on_fail + def __avg_write(self): + return self._node.monitor.totalRequests / (time.time() - self._node.created) + + @property + @none_on_fail + def __domain_ledger_size(self): + return self._node.domainLedger.size + + @property + @none_on_fail + def __pool_ledger_size(self): + return self._node.poolLedger.size if self._node.poolLedger else 0 + + @property + @none_on_fail + def __uptime(self): + return int(time.time() - self._node.created) + + @property + @none_on_fail + def __reachable_count(self): + return self._node.connectedNodeCount + + @property + @none_on_fail + def __reachable_list(self): + return sorted(list(self._node.nodestack.conns) + [self._node.name]) + + @property + @none_on_fail + def __unreachable_count(self): + return len(self._node.nodestack.remotes) - len(self._node.nodestack.conns) + + @property + @none_on_fail + def __unreachable_list(self): + return list(set(self._node.nodestack.remotes.keys()) - self._node.nodestack.conns) + + @property + @none_on_fail + def __total_count(self): + return len(self._node.nodestack.remotes) + 1 + + def dump_json_file(self): + file_name = self.FILE_NAME_TEMPLATE.format(node_name=self.__name.lower()) + path = os.path.join(self.__base_path, file_name) + with open(path, 'w') as fd: + json.dump(self.info, fd) diff --git a/plenum/test/batching_3pc/test_basic_batching.py b/plenum/test/batching_3pc/test_basic_batching.py index 65193d52f1..1bc1935759 100644 --- a/plenum/test/batching_3pc/test_basic_batching.py +++ b/plenum/test/batching_3pc/test_basic_batching.py @@ -2,11 +2,11 @@ import pytest -from stp_core.loop.eventually import eventually from plenum.common.exceptions import UnauthorizedClientRequest from plenum.test.batching_3pc.helper import checkNodesHaveSameRoots from plenum.test.helper import checkReqNackWithReason, sendRandomRequests, \ checkRejectWithReason, waitForSufficientRepliesForRequests +from stp_core.loop.eventually import eventually def testRequestStaticValidation(tconf, looper, txnPoolNodeSet, client, diff --git a/plenum/test/cli/test_long_msg_err.py b/plenum/test/cli/test_long_msg_err.py index 7261ccbc49..e6e846db73 100644 --- a/plenum/test/cli/test_long_msg_err.py +++ b/plenum/test/cli/test_long_msg_err.py @@ -3,16 +3,9 @@ import pytest from stp_core.common.log import Logger -@pytest.fixture(scope="function") -def patch_msg_len(tconf): - old_value = tconf.MSG_LEN_LIMIT - tconf.MSG_LEN_LIMIT = 128 * 1024 - yield tconf.MSG_LEN_LIMIT - print(old_value) - tconf.MSG_LEN_LIMIT = old_value def test_error_if_long_message( - patch_msg_len, cli, tconf, createAllNodes, validNodeNames, set_info_log_level): + cli, tconf, createAllNodes, validNodeNames, set_info_log_level): operation = '{{"Hello": "{}"}}'.format("T" * tconf.MSG_LEN_LIMIT) createClientAndConnect(cli, validNodeNames, "Alice") diff --git a/plenum/test/client/test_client.py b/plenum/test/client/test_client.py index f70c5dccd2..d2d194dd24 100644 --- a/plenum/test/client/test_client.py +++ b/plenum/test/client/test_client.py @@ -206,7 +206,7 @@ def testReplyWhenRequestAlreadyExecuted(looper, nodeSet, client1, sent1): originalRequestResponsesLen = nodeCount * 2 duplicateRequestRepliesLen = nodeCount # for a duplicate request we need to - serializedPayload, _ = client1.nodestack.signAndSerialize(sent1, None) + serializedPayload, _ = client1.nodestack.signSerializeAndCheckLen(sent1, None) client1.nodestack._enqueueIntoAllRemotes(serializedPayload, None) def chk(): diff --git a/plenum/test/common/test_prepare_batch.py b/plenum/test/common/test_prepare_batch.py new file mode 100644 index 0000000000..db0f226a52 --- /dev/null +++ b/plenum/test/common/test_prepare_batch.py @@ -0,0 +1,59 @@ +from plenum.common.prepare_batch import split_messages_on_batches, SPLIT_STEPS_LIMIT + + +LEN_LIMIT_BYTES = 100 +SERIALIZATION_OTHER_HEAD_BYTES = 10 +MAX_ONE_MSG_LEN = LEN_LIMIT_BYTES - SERIALIZATION_OTHER_HEAD_BYTES + + +def make_batch_func(msgs): + overhead = b'1' * SERIALIZATION_OTHER_HEAD_BYTES + return b''.join(msgs + [overhead]) + + +def check_batch_len_func(length): + return length <= LEN_LIMIT_BYTES + + +def split_ut(msgs): + return split_messages_on_batches(msgs, make_batch_func, check_batch_len_func) + + +def test_empty_msgs_returns_one_batch(): + assert len(split_ut([])) == 1 + + +def test_less_than_limit_returns_one_batch(): + msgs = [b'1'] * 10 + assert len(split_ut(msgs)) == 1 + + +def test_total_len_excesses_limit_two_batches(): + msgs = [b'1'] * (LEN_LIMIT_BYTES + 1) + assert len(split_ut(msgs)) == 2 + + +def test_each_msg_almost_excesses_limit_one_msg_per_batch(): + count = 100 + msgs = [b'1' * MAX_ONE_MSG_LEN] * count + assert len(split_ut(msgs)) == count + + +def test_small_msgs_with_one_huge_more_than_one_batch(): + msgs = [b'1', b'1', b'1', b'1' * MAX_ONE_MSG_LEN, b'1'] + assert len(split_ut(msgs)) == 4 + + +def test_one_msg_excesses_limit_split_fails(): + msgs = [b'1' * (LEN_LIMIT_BYTES + 1)] + assert split_ut(msgs) is None + + +def test_one_msg_almost_excesses_limit_split_fails(): + msgs = [b'1' * (MAX_ONE_MSG_LEN + 1)] + assert split_ut(msgs) is None + + +def test_excesses_limit_of_split_steps_split_fails(): + msgs = [b'1' * MAX_ONE_MSG_LEN] * 2**(SPLIT_STEPS_LIMIT + 1) + assert split_ut(msgs) is None diff --git a/plenum/test/conftest.py b/plenum/test/conftest.py index 6f3a5f84bf..e151528f45 100644 --- a/plenum/test/conftest.py +++ b/plenum/test/conftest.py @@ -22,6 +22,7 @@ from plenum.common.keygen_utils import initNodeKeysForBothStacks from plenum.test.greek import genNodeNames from plenum.test.grouped_load_scheduling import GroupedLoadScheduling +from plenum.test.pool_transactions.helper import buildPoolClientAndWallet from stp_core.common.logging.handlers import TestingHandler from stp_core.crypto.util import randomSeed from stp_core.network.port_dispenser import genHa diff --git a/plenum/test/node_request/test_split_non_3pc_messages_on_batches.py b/plenum/test/node_request/test_split_non_3pc_messages_on_batches.py new file mode 100644 index 0000000000..e69091b6de --- /dev/null +++ b/plenum/test/node_request/test_split_non_3pc_messages_on_batches.py @@ -0,0 +1,74 @@ +from functools import partial + +import pytest + +from plenum.test import waits + +from plenum.test.helper import sendRandomRequests, waitForSufficientRepliesForRequests, checkReqAck +from plenum.test.pool_transactions.helper import buildPoolClientAndWallet +from stp_core.loop.eventually import eventuallyAll +from stp_core.validators.message_length_validator import MessageLenValidator + +from plenum.test.pool_transactions.conftest import looper, client1Connected # noqa +from plenum.test.pool_transactions.conftest import clientAndWallet1, client1, wallet1 # noqa + + +def test_msg_max_length_check_node_to_node(tconf, + tdir, + looper, + txnPoolNodeSet, + client1, + wallet1, + client1Connected, + clientAndWallet2): + """ + Two clients send 2*N requests each at the same time. + N < MSG_LEN_LIMIT but 2*N > MSG_LEN_LIMIT so the requests pass the max + length check for client-node requests but do not pass the check + for node-node requests. + """ + N = 10 + # it is an empirical value for N random requests + # it has to be adjusted if the world changed (see pydoc) + max_len_limit = 3000 + + patch_msg_len_validators(max_len_limit, txnPoolNodeSet) + + client2, wallet2 = clientAndWallet2 + + reqs1 = sendRandomRequests(wallet1, client1, N) + reqs2 = sendRandomRequests(wallet2, client2, N) + + check_reqacks(client1, looper, reqs1, txnPoolNodeSet) + check_reqacks(client2, looper, reqs2, txnPoolNodeSet) + + waitForSufficientRepliesForRequests(looper, client1, requests=reqs1) + waitForSufficientRepliesForRequests(looper, client2, requests=reqs2) + + +def patch_msg_len_validators(max_len_limit, txnPoolNodeSet): + for node in txnPoolNodeSet: + assert hasattr(node.nodestack, 'msgLenVal') + assert hasattr(node.nodestack, 'msg_len_val') + node.nodestack.msgLenVal = MessageLenValidator(max_len_limit) + node.nodestack.msg_len_val = MessageLenValidator(max_len_limit) + + +def check_reqacks(client, looper, reqs, txnPoolNodeSet): + reqack_coros = [] + for req in reqs: + reqack_coros.extend([partial(checkReqAck, client, node, req.identifier, + req.reqId, None) for node in txnPoolNodeSet]) + timeout = waits.expectedReqAckQuorumTime() + looper.run(eventuallyAll(*reqack_coros, totalTimeout=timeout)) + + +@pytest.fixture(scope="module") +def clientAndWallet2(looper, poolTxnClientData, tdirWithPoolTxns): + client, wallet = buildPoolClientAndWallet(poolTxnClientData, + tdirWithPoolTxns) + + looper.add(client) + looper.run(client.ensureConnectedToNodes()) + yield client, wallet + client.stop() diff --git a/plenum/test/primary_selection/conftest.py b/plenum/test/primary_selection/conftest.py index 43c832c8f9..36455f005f 100644 --- a/plenum/test/primary_selection/conftest.py +++ b/plenum/test/primary_selection/conftest.py @@ -1,10 +1,12 @@ import pytest from plenum.test.node_catchup.helper import waitNodeDataEquality -from plenum.test.primary_selection.helper import check_newly_added_nodes +from plenum.test.primary_selection.helper import check_newly_added_nodes, \ + getPrimaryNodesIdxs from plenum.test.pool_transactions.conftest import clientAndWallet1, \ client1, wallet1, client1Connected, looper, nodeThetaAdded, \ stewardAndWallet1, steward1, stewardWallet +from plenum.test.pool_transactions.helper import buildPoolClientAndWallet @pytest.fixture(scope="module") @@ -15,3 +17,25 @@ def one_node_added(looper, txnPoolNodeSet, nodeThetaAdded): waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) check_newly_added_nodes(looper, txnPoolNodeSet, [new_node]) return new_node + + +@pytest.fixture(scope="module") +def txnPoolMasterNodes(txnPoolNodeSet): + primariesIdxs = getPrimaryNodesIdxs(txnPoolNodeSet) + return txnPoolNodeSet[primariesIdxs[0]], txnPoolNodeSet[primariesIdxs[1]] + + +@pytest.fixture(scope="module") +def stewardAndWalletForMasterNode(looper, poolTxnData, poolTxnStewardNames, + tdirWithPoolTxns, txnPoolNodeSet, txnPoolMasterNodes): + primariesIdxs = getPrimaryNodesIdxs(txnPoolNodeSet) + master_node = txnPoolMasterNodes[0] + stewardName = poolTxnStewardNames[primariesIdxs[0]] + stewardsSeed = poolTxnData["seeds"][stewardName].encode() + + stewardClient, stewardWallet = buildPoolClientAndWallet( + (stewardName, stewardsSeed), tdirWithPoolTxns) + looper.add(stewardClient) + looper.run(stewardClient.ensureConnectedToNodes()) + + return stewardClient, stewardWallet diff --git a/plenum/test/primary_selection/helper.py b/plenum/test/primary_selection/helper.py index 729aad3918..64d55a26ae 100644 --- a/plenum/test/primary_selection/helper.py +++ b/plenum/test/primary_selection/helper.py @@ -1,5 +1,7 @@ +from typing import Sequence, List + from plenum.server.pool_manager import RegistryPoolManager, TxnPoolManager -from plenum.test.test_node import checkProtocolInstanceSetup +from plenum.test.test_node import TestNode, checkProtocolInstanceSetup def check_rank_consistent_across_each_node(nodes): @@ -23,7 +25,7 @@ def check_rank_consistent_across_each_node(nodes): if isinstance(node.poolManager, RegistryPoolManager): order.append(node.poolManager.node_names_ordered_by_rank) elif isinstance(node.poolManager, TxnPoolManager): - order.append(node.poolManager.node_ids_in_ordered_by_rank) + order.append(node.poolManager.node_ids_ordered_by_rank) else: RuntimeError('Dont know this pool manager {}'. format(node.poolManager)) @@ -41,3 +43,15 @@ def check_newly_added_nodes(looper, all_nodes, new_nodes): assert all(new_node.rank > n.rank for n in old_nodes) old_nodes.append(new_node) checkProtocolInstanceSetup(looper, all_nodes, retryWait=1) + + +def getPrimaryNodesIdxs(nodes: Sequence[TestNode]) -> List[TestNode]: + primariesIdxs = [] + for instId in range(len(nodes[0].replicas)): + for idx, node in enumerate(nodes): + if node.replicas[instId].isPrimary: + assert instId == len(primariesIdxs) + primariesIdxs.append(idx) + + assert len(set(primariesIdxs)) == len(nodes[0].replicas) + return primariesIdxs diff --git a/plenum/test/primary_selection/test_primary_selection_after_primary_demotion_and_pool_restart.py b/plenum/test/primary_selection/test_primary_selection_after_primary_demotion_and_pool_restart.py new file mode 100644 index 0000000000..0bd873a4f2 --- /dev/null +++ b/plenum/test/primary_selection/test_primary_selection_after_primary_demotion_and_pool_restart.py @@ -0,0 +1,64 @@ +from stp_core.common.log import getlogger + +from plenum.common.constants import ALIAS, SERVICES +from plenum.test.pool_transactions.conftest import looper +from plenum.test.pool_transactions.helper import updateNodeData + +from plenum.test.test_node import TestNode, checkNodesConnected, \ + ensureElectionsDone +from plenum.test.helper import checkViewNoForNodes, \ + sendReqsToNodesAndVerifySuffReplies + +from plenum.test.primary_selection.helper import getPrimaryNodesIdxs + +logger = getlogger() + +def test_primary_selection_after_primary_demotion_and_pool_restart(looper, + txnPoolNodeSet, stewardAndWalletForMasterNode, txnPoolMasterNodes, + tconf, tdirWithPoolTxns): + """ + Demote primary and restart the pool. + Pool should select new primary and have viewNo=0 after restart. + """ + + logger.info("1. turn off the node which has primary replica for master instanse") + master_node = txnPoolMasterNodes[0] + client, wallet = stewardAndWalletForMasterNode + + node_data = { + ALIAS: master_node.name, + SERVICES: [] + } + updateNodeData(looper, client, wallet, master_node, node_data) + + restNodes = [node for node in txnPoolNodeSet if node.name != master_node.name] + ensureElectionsDone(looper, restNodes) + + # ensure pool is working properly + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, numReqs=3) + + logger.info("2. restart pool") + # Stopping existing nodes + for node in txnPoolNodeSet: + node.stop() + looper.removeProdable(node) + + # Starting nodes again by creating `Node` objects since that simulates + # what happens when starting the node with script + restartedNodes = [] + for node in txnPoolNodeSet: + restartedNode = TestNode(node.name, basedirpath=tdirWithPoolTxns, + config=tconf, ha=node.nodestack.ha, + cliha=node.clientstack.ha) + looper.add(restartedNode) + restartedNodes.append(restartedNode) + + restNodes = [node for node in restartedNodes if node.name != master_node.name] + + looper.run(checkNodesConnected(restNodes)) + ensureElectionsDone(looper, restNodes) + checkViewNoForNodes(restNodes, 0) + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, numReqs=3) + + primariesIdxs = getPrimaryNodesIdxs(restNodes) + assert restNodes[primariesIdxs[0]].name != master_node.name diff --git a/plenum/test/primary_selection/test_primary_selection_after_primary_demotion_and_view_changes.py b/plenum/test/primary_selection/test_primary_selection_after_primary_demotion_and_view_changes.py new file mode 100644 index 0000000000..228af4111f --- /dev/null +++ b/plenum/test/primary_selection/test_primary_selection_after_primary_demotion_and_view_changes.py @@ -0,0 +1,71 @@ +from stp_core.common.log import getlogger + +from plenum.common.constants import ALIAS, SERVICES + +from plenum.test.pool_transactions.conftest import looper +from plenum.test.pool_transactions.helper import updateNodeData + +from plenum.test.helper import checkViewNoForNodes, \ + sendReqsToNodesAndVerifySuffReplies +from plenum.test.test_node import ensureElectionsDone +from plenum.test.view_change.helper import ensure_view_change_complete + +logger = getlogger() + +def test_primary_selection_after_primary_demotion_and_view_changes(looper, txnPoolNodeSet, + stewardAndWalletForMasterNode, txnPoolMasterNodes): + """ + Demote primary and do multiple view changes forcing primaries rotation. + Demoted primary should be skipped without additional view changes. + """ + + viewNo0 = checkViewNoForNodes(txnPoolNodeSet) + + logger.info("1. turn off the node which has primary replica for master instanse, " + " this should trigger view change") + master_node = txnPoolMasterNodes[0] + client, wallet = stewardAndWalletForMasterNode + node_data = { + ALIAS: master_node.name, + SERVICES: [] + } + updateNodeData(looper, client, wallet, master_node, node_data) + + restNodes = [node for node in txnPoolNodeSet \ + if node.name != master_node.name] + ensureElectionsDone(looper, restNodes) + + viewNo1 = checkViewNoForNodes(restNodes) + + assert viewNo1 == viewNo0 + 1 + assert master_node.viewNo == viewNo0 + assert len(restNodes[0].replicas) == 1 # only one instance left + assert restNodes[0].replicas[0].primaryName != master_node.name + + # ensure pool is working properly + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, numReqs=3) + + logger.info("2. force view change 2 and check final viewNo") + ensure_view_change_complete(looper, restNodes) + + viewNo2 = checkViewNoForNodes(restNodes) + assert restNodes[0].replicas[0].primaryName != master_node.name + assert viewNo2 == viewNo1 + 1 + + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, numReqs=3) + + logger.info("3. force view change 3 and check final viewNo") + ensure_view_change_complete(looper, restNodes) + viewNo3 = checkViewNoForNodes(restNodes) + assert restNodes[0].replicas[0].primaryName != master_node.name + assert viewNo3 == viewNo2 + 1 + + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, numReqs=3) + + logger.info("4. force view change 4 and check final viewNo") + ensure_view_change_complete(looper, restNodes) + viewNo4 = checkViewNoForNodes(restNodes) + assert restNodes[0].replicas[0].primaryName != master_node.name + assert viewNo4 == viewNo3 + 1 + + sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, numReqs=3) diff --git a/plenum/test/validator_info/__init__.py b/plenum/test/validator_info/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/plenum/test/validator_info/test_validator_info.py b/plenum/test/validator_info/test_validator_info.py new file mode 100644 index 0000000000..c26d6d2c79 --- /dev/null +++ b/plenum/test/validator_info/test_validator_info.py @@ -0,0 +1,277 @@ +import json +import os +from random import randint + +import base58 +import pytest +import re + +import time + +from plenum.common.constants import TXN_TYPE, GET_TXN, DATA, NODE +from plenum.common.request import Request +from plenum.common.util import getTimeBasedId +from plenum.server.validator_info_tool import ValidatorNodeInfoTool +from plenum.test import waits +from plenum.test.helper import sendRandomRequests, waitForSufficientRepliesForRequests, checkSufficientRepliesReceived, \ + sendRandomRequest +# noinspection PyUnresolvedReferences +from plenum.test.node_catchup.helper import ensureClientConnectedToNodesAndPoolLedgerSame +from plenum.test.pool_transactions.conftest import steward1, stewardWallet, client1Connected # noqa +from plenum.test.pool_transactions.helper import disconnect_node_and_ensure_disconnected +from plenum.test.test_client import genTestClient +from stp_core.common.constants import ZMQ_NETWORK_PROTOCOL +from stp_core.loop.eventually import eventually + + +TEST_NODE_NAME = 'Alpha' +INFO_FILENAME = '{}_info.json'.format(TEST_NODE_NAME.lower()) +PERIOD_SEC = 1 +nodeCount = 5 + + +def test_validator_info_file_schema_is_valid(info): + assert isinstance(info, dict) + assert 'alias' in info + + assert 'bindings' in info + assert 'client' in info['bindings'] + assert 'ip' not in info['bindings']['client'] + assert 'port' in info['bindings']['client'] + assert 'protocol' in info['bindings']['client'] + assert 'node' in info['bindings'] + assert 'ip' not in info['bindings']['node'] + assert 'port' in info['bindings']['node'] + assert 'protocol' in info['bindings']['node'] + + assert 'did' in info + assert 'response-version' in info + assert 'timestamp' in info + assert 'verkey' in info + + assert 'metrics' in info + assert 'average-per-second' in info['metrics'] + assert 'read-transactions' in info['metrics']['average-per-second'] + assert 'write-transactions' in info['metrics']['average-per-second'] + assert 'transaction-count' in info['metrics'] + assert 'ledger' in info['metrics']['transaction-count'] + assert 'pool' in info['metrics']['transaction-count'] + assert 'uptime' in info['metrics'] + + assert 'pool' in info + assert 'reachable' in info['pool'] + assert 'count' in info['pool']['reachable'] + assert 'list' in info['pool']['reachable'] + assert 'unreachable' in info['pool'] + assert 'count' in info['pool']['unreachable'] + assert 'list' in info['pool']['unreachable'] + assert 'total-count' in info['pool'] + + +def test_validator_info_file_alias_field_valid(info): + assert info['alias'] == 'Alpha' + + +def test_validator_info_file_bindings_field_valid(info, node): + # don't forget enable this check if ip comes back + # assert info['bindings']['client']['ip'] == node.clientstack.ha.host + assert 'ip' not in info['bindings']['client'] + assert info['bindings']['client']['port'] == node.clientstack.ha.port + assert info['bindings']['client']['protocol'] == ZMQ_NETWORK_PROTOCOL + + # don't forget enable this check if ip comes back + # assert info['bindings']['node']['ip'] == node.nodestack.ha.host + assert 'ip' not in info['bindings']['node'] + assert info['bindings']['node']['port'] == node.nodestack.ha.port + assert info['bindings']['node']['protocol'] == ZMQ_NETWORK_PROTOCOL + + +def test_validator_info_file_did_field_valid(info): + assert info['did'] == 'JpYerf4CssDrH76z7jyQPJLnZ1vwYgvKbvcp16AB5RQ' + + +def test_validator_info_file_response_version_field_valid(info): + assert info['response-version'] == ValidatorNodeInfoTool.JSON_SCHEMA_VERSION + + +def test_validator_info_file_timestamp_field_valid(load_latest_info, + info): + assert re.match('\d{10}', str(info['timestamp'])) + latest_info = load_latest_info() + assert latest_info['timestamp'] > info['timestamp'] + + +def test_validator_info_file_verkey_field_valid(node, info): + assert info['verkey'] == base58.b58encode(node.nodestack.verKey) + + +def test_validator_info_file_metrics_avg_write_field_valid(info, + write_txn_and_get_latest_info): + assert info['metrics']['average-per-second']['write-transactions'] == 0 + latest_info = write_txn_and_get_latest_info() + assert latest_info['metrics']['average-per-second']['write-transactions'] > 0 + + +def test_validator_info_file_metrics_avg_read_field_valid(info, + read_txn_and_get_latest_info + ): + assert info['metrics']['average-per-second']['read-transactions'] == 0 + latest_info = read_txn_and_get_latest_info(GET_TXN) + assert latest_info['metrics']['average-per-second']['read-transactions'] > 0 + + +def test_validator_info_file_metrics_count_ledger_field_valid(poolTxnData, info): + txns_num = sum(1 for item in poolTxnData["txns"] if item.get(TXN_TYPE) != NODE) + assert info['metrics']['transaction-count']['ledger'] == txns_num + + +def test_validator_info_file_metrics_count_pool_field_valid(info): + assert info['metrics']['transaction-count']['pool'] == nodeCount + + +def test_validator_info_file_metrics_uptime_field_valid(load_latest_info, + info): + assert info['metrics']['uptime'] > 0 + latest_info = load_latest_info() + assert latest_info['metrics']['uptime'] > info['metrics']['uptime'] + + +def test_validator_info_file_pool_fields_valid(txnPoolNodesLooper, txnPoolNodeSet, + info, + load_latest_info): + assert info['pool']['reachable']['count'] == nodeCount + assert info['pool']['reachable']['list'] == sorted(list(node.name for node in txnPoolNodeSet)) + assert info['pool']['unreachable']['count'] == 0 + assert info['pool']['unreachable']['list'] == [] + assert info['pool']['total-count'] == nodeCount + + others, disconnected = txnPoolNodeSet[:-1], txnPoolNodeSet[-1] + disconnect_node_and_ensure_disconnected(txnPoolNodesLooper, others, disconnected) + latest_info = load_latest_info() + + assert latest_info['pool']['reachable']['count'] == nodeCount - 1 + assert latest_info['pool']['reachable']['list'] == sorted(list(node.name for node in others)) + assert latest_info['pool']['unreachable']['count'] == 1 + assert latest_info['pool']['unreachable']['list'] == [txnPoolNodeSet[-1].name] + assert latest_info['pool']['total-count'] == nodeCount + + +def test_validator_info_file_handle_fails(info, + node, + load_latest_info): + node._info_tool._node = None + latest_info = load_latest_info() + + assert latest_info['alias'] is None + # assert latest_info['bindings']['client']['ip'] is None + assert 'ip' not in info['bindings']['client'] + assert latest_info['bindings']['client']['port'] is None + # assert latest_info['bindings']['node']['ip'] is None + assert 'ip' not in info['bindings']['node'] + assert latest_info['bindings']['node']['port'] is None + assert latest_info['did'] is None + assert latest_info['timestamp'] is not None + assert latest_info['verkey'] is None + assert latest_info['metrics']['average-per-second']['read-transactions'] is None + assert latest_info['metrics']['average-per-second']['write-transactions'] is None + assert latest_info['metrics']['transaction-count']['ledger'] is None + assert latest_info['metrics']['transaction-count']['pool'] is None + assert latest_info['metrics']['uptime'] is None + assert latest_info['pool']['reachable']['count'] is None + assert latest_info['pool']['reachable']['list'] is None + assert latest_info['pool']['unreachable']['count'] is None + assert latest_info['pool']['unreachable']['list'] is None + assert latest_info['pool']['total-count'] is None + + +@pytest.fixture(scope='module') +def info(info_path): + return load_info(info_path) + + +def load_info(path): + with open(path) as fd: + info = json.load(fd) + return info + + +@pytest.fixture(scope='module') +def info_path(tdirWithPoolTxns, patched_dump_info_period, txnPoolNodesLooper, txnPoolNodeSet): + path = os.path.join(tdirWithPoolTxns, INFO_FILENAME) + txnPoolNodesLooper.runFor(patched_dump_info_period) + assert os.path.exists(path), '{} exists'.format(path) + return path + + +@pytest.fixture(scope='module') +def patched_dump_info_period(tconf): + old_period = tconf.DUMP_VALIDATOR_INFO_PERIOD_SEC + tconf.DUMP_VALIDATOR_INFO_PERIOD_SEC = PERIOD_SEC + yield tconf.DUMP_VALIDATOR_INFO_PERIOD_SEC + tconf.DUMP_VALIDATOR_INFO_PERIOD_SEC = old_period + + +@pytest.fixture(scope='module') +def node(txnPoolNodeSet): + for n in txnPoolNodeSet: + if n.name == TEST_NODE_NAME: + return n + assert False, 'Pool does not have "{}" node'.format(TEST_NODE_NAME) + + +@pytest.fixture +def read_txn_and_get_latest_info(txnPoolNodesLooper, patched_dump_info_period, + client_and_wallet, info_path): + client, wallet = client_and_wallet + + def read_wrapped(txn_type): + op = { + TXN_TYPE: txn_type, + DATA: 1 + } + req = Request(identifier=wallet.defaultId, + operation=op, reqId=getTimeBasedId()) + client.submitReqs(req) + + timeout = waits.expectedTransactionExecutionTime( + len(client.inBox)) + txnPoolNodesLooper.run( + eventually(checkSufficientRepliesReceived, client.inBox, + req.reqId, 1, + retryWait=1, timeout=timeout)) + txnPoolNodesLooper.runFor(patched_dump_info_period) + return load_info(info_path) + return read_wrapped + + +@pytest.fixture +def write_txn_and_get_latest_info(txnPoolNodesLooper, + client_and_wallet, + patched_dump_info_period, + info_path): + client, wallet = client_and_wallet + + def write_wrapped(): + req = sendRandomRequest(wallet, client) + waitForSufficientRepliesForRequests(txnPoolNodesLooper, client, requests=[req]) + txnPoolNodesLooper.runFor(patched_dump_info_period) + return load_info(info_path) + return write_wrapped + + +@pytest.fixture(scope="function") +def load_latest_info(txnPoolNodesLooper, patched_dump_info_period, info_path): + def wrapped(): + txnPoolNodesLooper.runFor(patched_dump_info_period + 1) + return load_info(info_path) + return wrapped + + +@pytest.fixture +def client_and_wallet(txnPoolNodesLooper, tdirWithPoolTxns, txnPoolNodeSet): + client, wallet = genTestClient(tmpdir=tdirWithPoolTxns, nodes=txnPoolNodeSet, + name='reader', usePoolLedger=True) + txnPoolNodesLooper.add(client) + ensureClientConnectedToNodesAndPoolLedgerSame(txnPoolNodesLooper, client, + *txnPoolNodeSet) + return client, wallet diff --git a/plenum/test/view_change/test_new_node_joins_after_view_change.py b/plenum/test/view_change/test_new_node_joins_after_view_change.py index 9496254138..b2510c8ef3 100644 --- a/plenum/test/view_change/test_new_node_joins_after_view_change.py +++ b/plenum/test/view_change/test_new_node_joins_after_view_change.py @@ -17,6 +17,8 @@ from plenum.test.primary_selection.conftest import nodeThetaAdded, \ one_node_added +from stp_core.common.log import getlogger +logger = getlogger() @pytest.fixture(scope='module') def all_nodes_view_change( @@ -88,4 +90,5 @@ def test_old_non_primary_restart_after_view_change(new_node_in_correct_view, compare_val_to=True)) > 0 ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) + ensureElectionsDone(looper, txnPoolNodeSet) assert not restarted_node._next_view_indications diff --git a/stp_core/common/constants.py b/stp_core/common/constants.py index 3eae4a1323..7e49d54216 100644 --- a/stp_core/common/constants.py +++ b/stp_core/common/constants.py @@ -1 +1,3 @@ CONNECTION_PREFIX = "CONNECTION: " + +ZMQ_NETWORK_PROTOCOL = 'tcp' diff --git a/stp_core/config.py b/stp_core/config.py index ee14262c61..da947a8681 100644 --- a/stp_core/config.py +++ b/stp_core/config.py @@ -44,6 +44,4 @@ # All messages exceeding the limit will be rejected without processing -# TODO: need to think about a lower value; currently low value leads to -# problems with batching on transport level -MSG_LEN_LIMIT = 128 * 1024 * 1024 +MSG_LEN_LIMIT = 128 * 1024 diff --git a/stp_core/validators/message_length_validator.py b/stp_core/validators/message_length_validator.py index df251de02a..948d239383 100644 --- a/stp_core/validators/message_length_validator.py +++ b/stp_core/validators/message_length_validator.py @@ -7,6 +7,9 @@ def __init__(self, max_allowed: int): def validate(self, msg: bytes): has_len = len(msg) - if has_len > self.max_allowed: + if not self.is_len_less_than_limit(has_len): raise InvalidMessageExceedingSizeException( self.max_allowed, has_len) + + def is_len_less_than_limit(self, l): + return l <= self.max_allowed diff --git a/stp_zmq/remote.py b/stp_zmq/remote.py index 33ce2f9e24..873751cc23 100644 --- a/stp_zmq/remote.py +++ b/stp_zmq/remote.py @@ -1,6 +1,8 @@ from stp_core.common.config.util import getConfig import time import zmq + +from stp_core.common.constants import ZMQ_NETWORK_PROTOCOL from stp_core.common.log import getlogger import sys from zmq.utils.monitor import recv_monitor_message @@ -77,7 +79,7 @@ def connect(self, context, localPubKey, localSecKey, typ=None): sock.identity = localPubKey set_keepalive(sock, self.config) set_zmq_internal_queue_length(sock, self.config) - addr = 'tcp://{}:{}'.format(*self.ha) + addr = '{protocol}://{}:{}'.format(*self.ha, protocol=ZMQ_NETWORK_PROTOCOL) sock.connect(addr) self.socket = sock logger.trace('connecting socket {} {} to remote {}'. diff --git a/stp_zmq/test/test_zstack.py b/stp_zmq/test/test_zstack.py index 2470276482..99bbe2c3ca 100644 --- a/stp_zmq/test/test_zstack.py +++ b/stp_zmq/test/test_zstack.py @@ -1,5 +1,5 @@ import pytest -from stp_core.common.util import adict + from stp_core.crypto.util import randomSeed from stp_core.loop.eventually import eventually from stp_core.network.port_dispenser import genHa @@ -7,6 +7,7 @@ from stp_zmq.test.helper import genKeys, create_and_prep_stacks, \ check_stacks_communicating, get_file_permission_mask, get_zstack_key_paths from stp_zmq.zstack import ZStack +from stp_core.common.util import adict def testRestricted2ZStackCommunication(tdir, looper, tconf): @@ -81,7 +82,7 @@ def test_zstack_non_utf8(tdir, looper, tconf): alpha.transmit(b'{"k2": "v2\x9c"}', uid, serialized=True) with pytest.raises(AssertionError): looper.run(eventually(chkPrinted, betaP, {"k2": "v2\x9c"})) - # TODO: A better test where the output of the parsing method is checked + # TODO: A better test where the output of the parsing method is checked # requires spyable methods # Again send a utf-8 message and see its received (checks if stack is @@ -158,21 +159,13 @@ def create_stack(name, handler=None): assert len(received_messages) != 0 assert len(expected_messages) == len(received_messages), \ - "{} != {}, LAST IS {}".format(len(expected_messages), - len(received_messages), - received_messages[-1]) - - -@pytest.fixture(scope="function") -def patch_msg_len(tconf): - old_value = tconf.MSG_LEN_LIMIT - tconf.MSG_LEN_LIMIT = 128 * 1024 - yield tconf.MSG_LEN_LIMIT - print(old_value) - tconf.MSG_LEN_LIMIT = old_value + "{} != {}, LAST IS {}"\ + .format(len(expected_messages), + len(received_messages), + received_messages[-1]) -def testZStackSendRecvHugeDataUnderLimit(patch_msg_len, tdir, looper, tconf): +def testZStackSendRecvHugeDataUnderLimit(tdir, looper, tconf): names = ['Alpha', 'Beta'] genKeys(tdir, names) @@ -209,7 +202,7 @@ def recvHandlerBeta(wrpMsg): assert betaHandler[0] is True -def testZStackSendHugeDataOverLimit(patch_msg_len, tdir, looper, tconf): +def testZStackSendHugeDataOverLimit(tdir, looper, tconf): names = ['Alpha', 'Beta'] genKeys(tdir, names) @@ -254,7 +247,7 @@ def rejectHandlerBeta(reason, frm): assert betaHandlers[1] is False -def testZStackRecvHugeDataOverLimit(patch_msg_len, tdir, looper, tconf): +def testZStackRecvHugeDataOverLimit(tdir, looper, tconf): names = ['Alpha', 'Beta'] genKeys(tdir, names) diff --git a/stp_zmq/zstack.py b/stp_zmq/zstack.py index 9a1a8b1b65..aa2caf20ed 100644 --- a/stp_zmq/zstack.py +++ b/stp_zmq/zstack.py @@ -1,7 +1,7 @@ import inspect from stp_core.common.config.util import getConfig -from stp_core.common.constants import CONNECTION_PREFIX +from stp_core.common.constants import CONNECTION_PREFIX, ZMQ_NETWORK_PROTOCOL try: import ujson as json @@ -348,7 +348,9 @@ def open(self): set_keepalive(self.listener, self.config) set_zmq_internal_queue_length(self.listener, self.config) self.listener.bind( - 'tcp://*:{}'.format(self.ha[1])) + '{protocol}://*:{port}'.format( + port=self.ha[1], protocol=ZMQ_NETWORK_PROTOCOL) + ) def close(self): self.listener.unbind(self.listener.LAST_ENDPOINT) From aa03bfe45d07fc8a66a2443d2491ab8fdf3b417b Mon Sep 17 00:00:00 2001 From: Dmitry Surnin Date: Tue, 21 Nov 2017 19:12:59 +0300 Subject: [PATCH 018/100] Fix client basedir initialization Signed-off-by: Dmitry Surnin --- plenum/client/client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plenum/client/client.py b/plenum/client/client.py index 4d04b3b386..f736b9caa1 100644 --- a/plenum/client/client.py +++ b/plenum/client/client.py @@ -80,8 +80,8 @@ def __init__(self, :param ha: tuple of host and port """ self.config = config or getConfig() - self.basedirpath = basedirpath or os.path.join(self.config.baseDir, - self.config.NETWORK_NAME) + self.basedirpath = basedirpath or os.path.join(self.config.CLI_NETWORK_DIR, self.config.NETWORK_NAME) + self.basedirpath = os.path.expanduser(self.basedirpath) signer = Signer(sighex) sighex = signer.keyraw @@ -139,7 +139,7 @@ def __init__(self, ha=cha, main=False, # stops incoming vacuous joins auth_mode=AuthMode.ALLOW_ANY.value) - stackargs['basedirpath'] = basedirpath + stackargs['basedirpath'] = self.basedirpath self.created = time.perf_counter() # noinspection PyCallingNonCallable From 73c931cd8dff428d8a461ca523acb8c561ff348f Mon Sep 17 00:00:00 2001 From: ashcherbakov Date: Wed, 21 Feb 2018 14:39:14 +0300 Subject: [PATCH 019/100] fix setting of 3PC key after catch-up Signed-off-by: ashcherbakov --- plenum/common/ledger_info.py | 5 +- plenum/common/ledger_manager.py | 82 ++--- plenum/server/quorums.py | 1 + .../node_catchup/test_get_last_txn_3PC_key.py | 140 +++++++++ .../test_same_ledger_initial_catchup.py | 279 ++++++++++++++++++ 5 files changed, 471 insertions(+), 36 deletions(-) create mode 100644 plenum/test/node_catchup/test_get_last_txn_3PC_key.py create mode 100644 plenum/test/node_catchup/test_same_ledger_initial_catchup.py diff --git a/plenum/common/ledger_info.py b/plenum/common/ledger_info.py index e6154e6188..fdc087585c 100644 --- a/plenum/common/ledger_info.py +++ b/plenum/common/ledger_info.py @@ -47,7 +47,8 @@ def set_defaults(self): # Key of the 3PC-batch ordered by the master instance that contained # the last transaction of this node's ledger - self.last_txn_3PC_key = None + # This is a map of last 3PC for each received LedgerStatus + self.last_txn_3PC_key = {} # Dictionary of consistency proofs received for the ledger # in process of catching up @@ -83,7 +84,7 @@ def done_syncing(self): self.canSync = False self.state = LedgerState.synced self.ledgerStatusOk = set() - self.last_txn_3PC_key = None + self.last_txn_3PC_key = {} self.recvdConsistencyProofs = {} self.receivedCatchUpReplies = [] self.recvdCatchupRepliesFrm = {} diff --git a/plenum/common/ledger_manager.py b/plenum/common/ledger_manager.py index 6ad7193a33..f95ee6fa9d 100644 --- a/plenum/common/ledger_manager.py +++ b/plenum/common/ledger_manager.py @@ -1,42 +1,39 @@ import heapq +import math import operator -from collections import Callable +import time +from collections import Callable, Counter from functools import partial from random import shuffle from typing import Any, List, Dict, Tuple -import math from typing import Optional -import time -from plenum.common.ledger import Ledger from ledger.merkle_verifier import MerkleVerifier from ledger.util import F - -from plenum.common.messages.node_messages import LedgerStatus, CatchupRep, \ - ConsistencyProof, f, CatchupReq +from plenum.common.config_util import getConfig from plenum.common.constants import POOL_LEDGER_ID, LedgerState, DOMAIN_LEDGER_ID, \ CONSISTENCY_PROOF, CATCH_UP_PREFIX -from plenum.common.util import compare_3PC_keys, SortedDict -from plenum.common.config_util import getConfig +from plenum.common.ledger import Ledger +from plenum.common.ledger_info import LedgerInfo +from plenum.common.messages.node_messages import LedgerStatus, CatchupRep, \ + ConsistencyProof, f, CatchupReq +from plenum.common.txn_util import reqToTxn +from plenum.common.util import compare_3PC_keys, SortedDict, mostCommonElement, min_3PC_key +from plenum.server.has_action_queue import HasActionQueue from plenum.server.quorums import Quorums from stp_core.common.constants import CONNECTION_PREFIX from stp_core.common.log import getlogger -from plenum.server.has_action_queue import HasActionQueue -from plenum.common.ledger_info import LedgerInfo -from plenum.common.txn_util import reqToTxn - logger = getlogger() class LedgerManager(HasActionQueue): - def __init__(self, owner, - ownedByNode: bool=True, - postAllLedgersCaughtUp: Optional[Callable]=None, - preCatchupClbk: Optional[Callable]=None, - ledger_sync_order: Optional[List]=None): + ownedByNode: bool = True, + postAllLedgersCaughtUp: Optional[Callable] = None, + preCatchupClbk: Optional[Callable] = None, + ledger_sync_order: Optional[List] = None): # If ledger_sync_order is not provided (is None), it is assumed that # `postCatchupCompleteClbk` of the LedgerInfo will be used self.owner = owner @@ -52,7 +49,7 @@ def __init__(self, # Holds ledgers of different types with # their info like callbacks, state, etc - self.ledgerRegistry = {} # type: Dict[int, LedgerInfo] + self.ledgerRegistry = {} # type: Dict[int, LedgerInfo] # Largest 3 phase key received during catchup. # This field is needed to discard any stashed 3PC messages or @@ -67,11 +64,11 @@ def service(self): return self._serviceActions() def addLedger(self, iD: int, ledger: Ledger, - preCatchupStartClbk: Callable=None, - postCatchupStartClbk: Callable=None, - preCatchupCompleteClbk: Callable=None, - postCatchupCompleteClbk: Callable=None, - postTxnAddedToLedgerClbk: Callable=None): + preCatchupStartClbk: Callable = None, + postCatchupStartClbk: Callable = None, + preCatchupCompleteClbk: Callable = None, + postCatchupCompleteClbk: Callable = None, + postTxnAddedToLedgerClbk: Callable = None): if iD in self.ledgerRegistry: logger.error("{} already present in ledgers " @@ -257,7 +254,7 @@ def processLedgerStatus(self, status: LedgerStatus, frm: str): ledgerStatus = LedgerStatus(*status) if ledgerStatus.txnSeqNo < 0: self.discard(status, reason="Received negative sequence number " - "from {}".format(frm), + "from {}".format(frm), logMethod=logger.warning) return ledgerId = getattr(status, f.LEDGER_ID.nm) @@ -309,10 +306,8 @@ def processLedgerStatus(self, status: LedgerStatus, frm: str): ledgerInfo.recvdConsistencyProofs[frm] = None ledgerInfo.ledgerStatusOk.add(frm) - if self.isLedgerSame(ledgerStatus) \ - and ledgerStatus.viewNo is not None \ - and ledgerStatus.ppSeqNo is not None: - ledgerInfo.last_txn_3PC_key = \ + if self.isLedgerSame(ledgerStatus): + ledgerInfo.last_txn_3PC_key[frm] = \ (ledgerStatus.viewNo, ledgerStatus.ppSeqNo) if self.has_ledger_status_quorum( @@ -329,11 +324,30 @@ def processLedgerStatus(self, status: LedgerStatus, frm: str): self.do_pre_catchup(ledgerId) # Any state cleanup that is part of pre-catchup should be # done - self.catchupCompleted(ledgerId, ledgerInfo.last_txn_3PC_key) + last_3PC_key = self._get_last_txn_3PC_key(ledgerInfo) + self.catchupCompleted(ledgerId, last_3PC_key) else: # Ledger was already synced self.mark_ledger_synced(ledgerId) + def _get_last_txn_3PC_key(self, ledgerInfo): + quorum = Quorums(self.owner.totalNodes) + quorumed_3PC_keys = \ + [ + most_common_element + for most_common_element, freq in + Counter(ledgerInfo.last_txn_3PC_key.values()).most_common() + if quorum.ledger_status_last_3PC.is_reached(freq) and + most_common_element[0] is not None and + most_common_element[1] is not None + ] + + if len(quorumed_3PC_keys) == 0: + return None + + min_quorumed_3PC_key = min_3PC_key(quorumed_3PC_keys) + return min_quorumed_3PC_key + @staticmethod def has_ledger_status_quorum(leger_status_num, total_nodes): quorum = Quorums(total_nodes).ledger_status @@ -577,7 +591,7 @@ def _transform(self, txn): # Certain transactions might need to be # transformed to certain format before applying to the ledger txn = reqToTxn(txn) - z = txn if not self.ownedByNode else \ + z = txn if not self.ownedByNode else \ self.owner.transform_txn_for_ledger(txn) return z @@ -726,7 +740,7 @@ def canStartCatchUpProcess(self, ledgerId: int): logger.debug( "{} cannot start catchup since received only {} " - "consistency proofs but need at least {}". format( + "consistency proofs but need at least {}".format( self, len(recvdConsProof), adjustedQuorum.consistency_proof.value)) @@ -852,7 +866,7 @@ def startCatchUpProcess(self, ledgerId: int, proof: ConsistencyProof): def _getCatchupTimeout(self, numRequest, batchSize): return numRequest * self.config.CatchupTransactionsTimeout - def catchupCompleted(self, ledgerId: int, last_3PC: Optional[Tuple]=None): + def catchupCompleted(self, ledgerId: int, last_3PC: Optional[Tuple] = None): if ledgerId not in self.ledgerRegistry: logger.error("{}{} called catchup completed for ledger {}". format(CATCH_UP_PREFIX, self, ledgerId)) @@ -975,7 +989,7 @@ def _get_merged_catchup_txns(existing_txns, new_txns): key=operator.itemgetter(0))) def getConsistencyProof(self, status: LedgerStatus): - ledger = self.getLedgerForMsg(status) # type: Ledger + ledger = self.getLedgerForMsg(status) # type: Ledger ledgerId = getattr(status, f.LEDGER_ID.nm) seqNoStart = getattr(status, f.TXN_SEQ_NO.nm) seqNoEnd = ledger.size diff --git a/plenum/server/quorums.py b/plenum/server/quorums.py index a034410386..feba2a45ca 100644 --- a/plenum/server/quorums.py +++ b/plenum/server/quorums.py @@ -27,6 +27,7 @@ def __init__(self, n): self.same_consistency_proof = Quorum(f + 1) self.consistency_proof = Quorum(f + 1) self.ledger_status = Quorum(n - f - 1) + self.ledger_status_last_3PC = Quorum(f + 1) self.checkpoint = Quorum(n - f - 1) self.timestamp = Quorum(f + 1) self.bls_signatures = Quorum(n - f) diff --git a/plenum/test/node_catchup/test_get_last_txn_3PC_key.py b/plenum/test/node_catchup/test_get_last_txn_3PC_key.py new file mode 100644 index 0000000000..8ce5332661 --- /dev/null +++ b/plenum/test/node_catchup/test_get_last_txn_3PC_key.py @@ -0,0 +1,140 @@ +import pytest + + +@pytest.yield_fixture(scope="function") +def ledger_manager_and_info(txnPoolNodeSet): + ledger_manager = txnPoolNodeSet[0].ledgerManager + + ledger_info = ledger_manager.getLedgerInfoByType(1) + ledger_info.set_defaults() + + return ledger_manager, ledger_info + + +def test_empty(ledger_manager_and_info): + ledger_manager, ledger_info = ledger_manager_and_info + assert ledger_manager._get_last_txn_3PC_key(ledger_info) is None + + +def test_1_none(ledger_manager_and_info): + ledger_manager, ledger_info = ledger_manager_and_info + ledger_info.last_txn_3PC_key['1'] = (None, None) + assert ledger_manager._get_last_txn_3PC_key(ledger_info) is None + + +def test_non_quorum(ledger_manager_and_info): + ledger_manager, ledger_info = ledger_manager_and_info + ledger_info.last_txn_3PC_key['1'] = (None, None) + ledger_info.last_txn_3PC_key['2'] = (None, None) + assert ledger_manager._get_last_txn_3PC_key(ledger_info) is None + + +def test_semi_none(ledger_manager_and_info): + ledger_manager, ledger_info = ledger_manager_and_info + ledger_info.last_txn_3PC_key['1'] = (1, None) + ledger_info.last_txn_3PC_key['2'] = (None, 1) + assert ledger_manager._get_last_txn_3PC_key(ledger_info) is None + + +def test_quorum_1_value(ledger_manager_and_info): + ledger_manager, ledger_info = ledger_manager_and_info + + ledger_info.last_txn_3PC_key['1'] = (1, 1) + assert ledger_manager._get_last_txn_3PC_key(ledger_info) is None + + ledger_info.last_txn_3PC_key['2'] = (1, 1) + assert (1, 1) == ledger_manager._get_last_txn_3PC_key(ledger_info) + + +def test_quorum_2_values(ledger_manager_and_info): + ledger_manager, ledger_info = ledger_manager_and_info + + ledger_info.last_txn_3PC_key['1'] = (1, 1) + assert ledger_manager._get_last_txn_3PC_key(ledger_info) is None + + ledger_info.last_txn_3PC_key['2'] = (2, 1) + assert ledger_manager._get_last_txn_3PC_key(ledger_info) is None + + ledger_info.last_txn_3PC_key['3'] = (1, 2) + assert ledger_manager._get_last_txn_3PC_key(ledger_info) is None + + ledger_info.last_txn_3PC_key['4'] = (1, 1) + assert (1, 1) == ledger_manager._get_last_txn_3PC_key(ledger_info) + + +def test_quorum_min_value1(ledger_manager_and_info): + ledger_manager, ledger_info = ledger_manager_and_info + + ledger_info.last_txn_3PC_key['1'] = (2, 1) + ledger_info.last_txn_3PC_key['2'] = (2, 1) + ledger_info.last_txn_3PC_key['3'] = (1, 3) + ledger_info.last_txn_3PC_key['4'] = (1, 3) + assert (1, 3) == ledger_manager._get_last_txn_3PC_key(ledger_info) + + +def test_quorum_min_value2(ledger_manager_and_info): + ledger_manager, ledger_info = ledger_manager_and_info + + ledger_info.last_txn_3PC_key['1'] = (1, 1) + ledger_info.last_txn_3PC_key['2'] = (1, 1) + ledger_info.last_txn_3PC_key['3'] = (1, 3) + ledger_info.last_txn_3PC_key['4'] = (1, 3) + assert (1, 1) == ledger_manager._get_last_txn_3PC_key(ledger_info) + + +def test_quorum_min_value3(ledger_manager_and_info): + ledger_manager, ledger_info = ledger_manager_and_info + + ledger_info.last_txn_3PC_key['1'] = (1, 1) + ledger_info.last_txn_3PC_key['2'] = (1, 1) + ledger_info.last_txn_3PC_key['3'] = (1, 3) + ledger_info.last_txn_3PC_key['4'] = (1, 3) + ledger_info.last_txn_3PC_key['5'] = (1, 3) + ledger_info.last_txn_3PC_key['6'] = (1, 3) + assert (1, 1) == ledger_manager._get_last_txn_3PC_key(ledger_info) + + +def test_quorum_with_none1(ledger_manager_and_info): + ledger_manager, ledger_info = ledger_manager_and_info + + ledger_info.last_txn_3PC_key['1'] = (None, None) + ledger_info.last_txn_3PC_key['2'] = (1, None) + ledger_info.last_txn_3PC_key['3'] = (None, 1) + ledger_info.last_txn_3PC_key['4'] = (1, 3) + assert ledger_manager._get_last_txn_3PC_key(ledger_info) is None + + ledger_info.last_txn_3PC_key['5'] = (1, 3) + assert (1, 3) == ledger_manager._get_last_txn_3PC_key(ledger_info) + + +def test_quorum_with_none2(ledger_manager_and_info): + ledger_manager, ledger_info = ledger_manager_and_info + + ledger_info.last_txn_3PC_key['1'] = (None, None) + ledger_info.last_txn_3PC_key['2'] = (1, None) + ledger_info.last_txn_3PC_key['3'] = (None, 1) + ledger_info.last_txn_3PC_key['4'] = (1, 3) + ledger_info.last_txn_3PC_key['5'] = (1, None) + assert ledger_manager._get_last_txn_3PC_key(ledger_info) is None + + +def test_quorum_with_none3(ledger_manager_and_info): + ledger_manager, ledger_info = ledger_manager_and_info + + ledger_info.last_txn_3PC_key['1'] = (None, None) + ledger_info.last_txn_3PC_key['2'] = (1, None) + ledger_info.last_txn_3PC_key['3'] = (None, 1) + ledger_info.last_txn_3PC_key['4'] = (1, 3) + ledger_info.last_txn_3PC_key['5'] = (None, 1) + assert ledger_manager._get_last_txn_3PC_key(ledger_info) is None + + +def test_quorum_with_none4(ledger_manager_and_info): + ledger_manager, ledger_info = ledger_manager_and_info + + ledger_info.last_txn_3PC_key['1'] = (None, None) + ledger_info.last_txn_3PC_key['2'] = (1, None) + ledger_info.last_txn_3PC_key['3'] = (None, 1) + ledger_info.last_txn_3PC_key['4'] = (1, 3) + ledger_info.last_txn_3PC_key['5'] = (None, None) + assert ledger_manager._get_last_txn_3PC_key(ledger_info) is None diff --git a/plenum/test/node_catchup/test_same_ledger_initial_catchup.py b/plenum/test/node_catchup/test_same_ledger_initial_catchup.py new file mode 100644 index 0000000000..dee8237a71 --- /dev/null +++ b/plenum/test/node_catchup/test_same_ledger_initial_catchup.py @@ -0,0 +1,279 @@ +import pytest + +# noinspection PyUnresolvedReferences +from ledger.test.conftest import tempdir, txn_serializer, hash_serializer # noqa +from plenum.common.constants import LedgerState +from plenum.common.messages.node_messages import LedgerStatus + +nodeCount = 7 + +ledger_id = 1 + + +@pytest.yield_fixture(scope="function") +def restarted_pool(txnPoolNodeSet): + ''' + Emulate restart of the pool + ''' + for node in txnPoolNodeSet: + node.viewNo = None + node.master_replica.last_ordered_3pc = (0, 0) + node.view_changer.set_defaults() + return txnPoolNodeSet + + +@pytest.yield_fixture(scope="function") +def node_and_ledger_info(restarted_pool): + ''' + Emulate restart of the pool (clean state) + ''' + node = restarted_pool[0] + ledger_manager = node.ledgerManager + ledger_manager.last_caught_up_3PC = (0, 0) + + ledger_info = ledger_manager.getLedgerInfoByType(ledger_id) + ledger_info.set_defaults() + ledger_info.state = LedgerState.syncing + ledger_info.canSync = True + + ledger_status = node.build_ledger_status(ledger_id) + assert ledger_status.viewNo is None + + return node, ledger_manager, ledger_info, ledger_status + + +def test_same_ledger_status_quorum(restarted_pool, + node_and_ledger_info): + ''' + Check that we require at least n-f-1 (=4) same LedgerStatus msgs + to finish CatchUp + ''' + node, ledger_manager, ledger_info, ledger_status = node_and_ledger_info + + status_from = set() + for i in range(3): + node_name = restarted_pool[i + 1].name + ledger_manager.processLedgerStatus(ledger_status, node_name) + status_from = status_from.union({node_name}) + assert ledger_info.ledgerStatusOk == status_from + assert ledger_info.canSync is True + assert ledger_info.state == LedgerState.syncing + + node_name = restarted_pool[4].name + ledger_manager.processLedgerStatus(ledger_status, node_name) + + assert ledger_info.ledgerStatusOk == set() + assert ledger_info.canSync is False + assert ledger_info.state == LedgerState.synced + + +def test_same_ledger_status_last_ordered_same_3PC(restarted_pool, + node_and_ledger_info): + ''' + Check that last_ordered_3PC is set according to 3PC from LedgerStatus msgs + if all LedgerStatus msgs have the same not None 3PC keys + ''' + node, ledger_manager, ledger_info, ledger_status_none_3PC = node_and_ledger_info + ledger_status_2_40 = LedgerStatus(ledger_status_none_3PC.ledgerId, + ledger_status_none_3PC.txnSeqNo, + 2, 20, + ledger_status_none_3PC.merkleRoot) + + ledger_manager.processLedgerStatus(ledger_status_2_40, restarted_pool[1].name) + ledger_manager.processLedgerStatus(ledger_status_2_40, restarted_pool[2].name) + ledger_manager.processLedgerStatus(ledger_status_2_40, restarted_pool[3].name) + assert node.master_last_ordered_3PC == (0, 0) + assert ledger_info.state == LedgerState.syncing + + ledger_manager.processLedgerStatus(ledger_status_2_40, restarted_pool[4].name) + assert node.master_last_ordered_3PC == (2, 20) + assert ledger_info.state == LedgerState.synced + + +def test_same_ledger_status_last_ordered_same_None_3PC(restarted_pool, + node_and_ledger_info): + ''' + Check that last_ordered_3PC is set according to 3PC from LedgerStatus msgs + if all LedgerStatus msgs have the same None 3PC keys (like at the initial start of the pool) + ''' + node, ledger_manager, ledger_info, ledger_status_none_3PC = node_and_ledger_info + + ledger_manager.processLedgerStatus(ledger_status_none_3PC, restarted_pool[1].name) + ledger_manager.processLedgerStatus(ledger_status_none_3PC, restarted_pool[2].name) + ledger_manager.processLedgerStatus(ledger_status_none_3PC, restarted_pool[3].name) + assert node.master_last_ordered_3PC == (0, 0) + assert ledger_info.state == LedgerState.syncing + + ledger_manager.processLedgerStatus(ledger_status_none_3PC, restarted_pool[4].name) + assert node.master_last_ordered_3PC == (0, 0) + assert ledger_info.state == LedgerState.synced + + +def test_same_ledger_status_last_ordered_one_not_none_3PC_last(restarted_pool, + node_and_ledger_info): + ''' + Check that last_ordered_3PC is set according to 3PC from LedgerStatus msgs + if all LedgerStatus msgs have the same None 3PC keys except the last one. + The last msg contains not None 3PC, but it's not enough for setting last_ordered_3PC + since the quorum is f+1 (=3) + ''' + node, ledger_manager, ledger_info, ledger_status_none_3PC = node_and_ledger_info + + ledger_status_3_40 = LedgerStatus(ledger_status_none_3PC.ledgerId, + ledger_status_none_3PC.txnSeqNo, + 3, 40, + ledger_status_none_3PC.merkleRoot) + + ledger_manager.processLedgerStatus(ledger_status_none_3PC, restarted_pool[1].name) + ledger_manager.processLedgerStatus(ledger_status_none_3PC, restarted_pool[2].name) + ledger_manager.processLedgerStatus(ledger_status_none_3PC, restarted_pool[3].name) + assert node.master_last_ordered_3PC == (0, 0) + assert ledger_info.state == LedgerState.syncing + + ledger_manager.processLedgerStatus(ledger_status_3_40, restarted_pool[4].name) + assert node.master_last_ordered_3PC == (0, 0) + assert ledger_info.state == LedgerState.synced + + +def test_same_ledger_status_last_ordered_one_not_none_3PC_first(restarted_pool, + node_and_ledger_info): + ''' + Check that last_ordered_3PC is set according to 3PC from LedgerStatus msgs + if all LedgerStatus msgs have the same None 3PC keys except the first one. + The first msg contains not None 3PC, but it's not enough for setting last_ordered_3PC + since the quorum is f+1 (=3) + ''' + node, ledger_manager, ledger_info, ledger_status_none_3PC = node_and_ledger_info + + ledger_status_3_40 = LedgerStatus(ledger_status_none_3PC.ledgerId, + ledger_status_none_3PC.txnSeqNo, + 3, 40, + ledger_status_none_3PC.merkleRoot) + + ledger_manager.processLedgerStatus(ledger_status_3_40, restarted_pool[1].name) + ledger_manager.processLedgerStatus(ledger_status_none_3PC, restarted_pool[2].name) + ledger_manager.processLedgerStatus(ledger_status_none_3PC, restarted_pool[3].name) + assert node.master_last_ordered_3PC == (0, 0) + assert ledger_info.state == LedgerState.syncing + + ledger_manager.processLedgerStatus(ledger_status_none_3PC, restarted_pool[4].name) + assert node.master_last_ordered_3PC == (0, 0) + assert ledger_info.state == LedgerState.synced + + +def test_same_ledger_status_last_ordered_not_none_3PC_quorum_with_none(restarted_pool, + node_and_ledger_info): + ''' + Check that last_ordered_3PC is set according to 3PC from LedgerStatus msgs + if all LedgerStatus msgs have the same not None 3PC keys except the last one. + The last msg contains None 3PC, but not None from the previous msgs is used + since we have a quorum of f+1 (=3) + ''' + node, ledger_manager, ledger_info, ledger_status_none_3PC = node_and_ledger_info + + ledger_status_3_40 = LedgerStatus(ledger_status_none_3PC.ledgerId, + ledger_status_none_3PC.txnSeqNo, + 3, 40, + ledger_status_none_3PC.merkleRoot) + + ledger_manager.processLedgerStatus(ledger_status_3_40, restarted_pool[1].name) + ledger_manager.processLedgerStatus(ledger_status_3_40, restarted_pool[2].name) + ledger_manager.processLedgerStatus(ledger_status_3_40, restarted_pool[3].name) + assert node.master_last_ordered_3PC == (0, 0) + assert ledger_info.state == LedgerState.syncing + + ledger_manager.processLedgerStatus(ledger_status_none_3PC, restarted_pool[4].name) + assert node.master_last_ordered_3PC == (3, 40) + assert ledger_info.state == LedgerState.synced + + +def test_same_ledger_status_last_ordered_not_none_3PC_quorum1(restarted_pool, + node_and_ledger_info): + ''' + Check that last_ordered_3PC is set according to 3PC from LedgerStatus msgs + if all LedgerStatus msgs have the same not None 3PC keys except the last one. + The last msg contains a different not None 3PC, but 3PC from the previous msgs is used + since we have a quorum of f+1 (=3) + ''' + node, ledger_manager, ledger_info, ledger_status_none_3PC = node_and_ledger_info + + ledger_status_1_10 = LedgerStatus(ledger_status_none_3PC.ledgerId, + ledger_status_none_3PC.txnSeqNo, + 1, 10, + ledger_status_none_3PC.merkleRoot) + + ledger_status_3_40 = LedgerStatus(ledger_status_none_3PC.ledgerId, + ledger_status_none_3PC.txnSeqNo, + 3, 40, + ledger_status_none_3PC.merkleRoot) + + ledger_manager.processLedgerStatus(ledger_status_1_10, restarted_pool[1].name) + ledger_manager.processLedgerStatus(ledger_status_1_10, restarted_pool[2].name) + ledger_manager.processLedgerStatus(ledger_status_1_10, restarted_pool[3].name) + assert node.master_last_ordered_3PC == (0, 0) + assert ledger_info.state == LedgerState.syncing + + ledger_manager.processLedgerStatus(ledger_status_3_40, restarted_pool[4].name) + assert node.master_last_ordered_3PC == (1, 10) + assert ledger_info.state == LedgerState.synced + + +def test_same_ledger_status_last_ordered_not_none_3PC_quorum2(restarted_pool, + node_and_ledger_info): + ''' + Check that last_ordered_3PC is set according to 3PC from LedgerStatus msgs + if all LedgerStatus msgs have the same not None 3PC keys except the last one. + The last msg contains a different not None 3PC, but 3PC from the previous msgs is used + since we have a quorum of f+1 (=3) + ''' + node, ledger_manager, ledger_info, ledger_status_none_3PC = node_and_ledger_info + + ledger_status_1_10 = LedgerStatus(ledger_status_none_3PC.ledgerId, + ledger_status_none_3PC.txnSeqNo, + 1, 10, + ledger_status_none_3PC.merkleRoot) + + ledger_status_3_40 = LedgerStatus(ledger_status_none_3PC.ledgerId, + ledger_status_none_3PC.txnSeqNo, + 3, 40, + ledger_status_none_3PC.merkleRoot) + + ledger_manager.processLedgerStatus(ledger_status_3_40, restarted_pool[1].name) + ledger_manager.processLedgerStatus(ledger_status_3_40, restarted_pool[2].name) + ledger_manager.processLedgerStatus(ledger_status_3_40, restarted_pool[3].name) + assert node.master_last_ordered_3PC == (0, 0) + assert ledger_info.state == LedgerState.syncing + + ledger_manager.processLedgerStatus(ledger_status_1_10, restarted_pool[4].name) + assert node.master_last_ordered_3PC == (3, 40) + assert ledger_info.state == LedgerState.synced + + +def test_same_ledger_status_last_ordered_not_none_3PC_no_quorum_equal(restarted_pool, + node_and_ledger_info): + ''' + Check that last_ordered_3PC is set according to 3PC from LedgerStatus msgs. + Check that if we have no quorum (2 different keys, but 3 is required ror quorum), then + they are not used. + ''' + node, ledger_manager, ledger_info, ledger_status_none_3PC = node_and_ledger_info + + ledger_status_1_10 = LedgerStatus(ledger_status_none_3PC.ledgerId, + ledger_status_none_3PC.txnSeqNo, + 1, 10, + ledger_status_none_3PC.merkleRoot) + + ledger_status_3_40 = LedgerStatus(ledger_status_none_3PC.ledgerId, + ledger_status_none_3PC.txnSeqNo, + 3, 40, + ledger_status_none_3PC.merkleRoot) + + ledger_manager.processLedgerStatus(ledger_status_3_40, restarted_pool[1].name) + ledger_manager.processLedgerStatus(ledger_status_3_40, restarted_pool[2].name) + ledger_manager.processLedgerStatus(ledger_status_1_10, restarted_pool[3].name) + assert node.master_last_ordered_3PC == (0, 0) + assert ledger_info.state == LedgerState.syncing + + ledger_manager.processLedgerStatus(ledger_status_1_10, restarted_pool[4].name) + assert node.master_last_ordered_3PC == (0, 0) + assert ledger_info.state == LedgerState.synced From fc46d4111beb8ecb6dce896ad0cc589be7dc68ca Mon Sep 17 00:00:00 2001 From: ashcherbakov Date: Thu, 22 Feb 2018 13:49:03 +0300 Subject: [PATCH 020/100] reset last_ordered_3PC for future views during view change. Signed-off-by: ashcherbakov --- plenum/server/node.py | 3 +- plenum/server/replica.py | 16 ++-- .../test_last_ordered_reset_for_new_view.py | 78 +++++++++++++++++++ 3 files changed, 91 insertions(+), 6 deletions(-) create mode 100644 plenum/test/view_change/test_last_ordered_reset_for_new_view.py diff --git a/plenum/server/node.py b/plenum/server/node.py index 73f76a5b2a..093efec8b9 100644 --- a/plenum/server/node.py +++ b/plenum/server/node.py @@ -525,7 +525,8 @@ def on_view_change_start(self): Notifies node about the fact that view changed to let it prepare for election """ - self.master_replica.on_view_change_start() + for replica in self.replicas: + replica.on_view_change_start() logger.debug("{} resetting monitor stats at view change start". format(self)) self.monitor.reset() diff --git a/plenum/server/replica.py b/plenum/server/replica.py index e2aa214396..b0e7b276b2 100644 --- a/plenum/server/replica.py +++ b/plenum/server/replica.py @@ -482,11 +482,17 @@ def shouldParticipate(self, viewNo: int, ppSeqNo: int) -> bool: ((viewNo, ppSeqNo) not in self.stashingWhileCatchingUp)) def on_view_change_start(self): - assert self.isMaster - lst = self.last_prepared_certificate_in_view() - self.last_prepared_before_view_change = lst - logger.debug( - '{} setting last prepared for master to {}'.format(self, lst)) + if self.isMaster: + lst = self.last_prepared_certificate_in_view() + self.last_prepared_before_view_change = lst + logger.debug( + '{} setting last prepared for master to {}'.format(self, lst)) + # It can be that last_ordered_3pc was set for the previous view, since it's set during catch-up + # Example: a Node has last_ordered = (1, 300), and then the whole pool except this node restarted + # The new viewNo is 0, but last_ordered is (1, 300), so all new requests will be discarded by this Node + # if we don't reset last_ordered_3pc + if self.viewNo <= self.last_ordered_3pc[0]: + self.last_ordered_3pc = (self.viewNo, 0) def on_view_change_done(self): assert self.isMaster diff --git a/plenum/test/view_change/test_last_ordered_reset_for_new_view.py b/plenum/test/view_change/test_last_ordered_reset_for_new_view.py new file mode 100644 index 0000000000..d553629bac --- /dev/null +++ b/plenum/test/view_change/test_last_ordered_reset_for_new_view.py @@ -0,0 +1,78 @@ +from plenum.test.helper import sdk_send_random_and_check, checkViewNoForNodes +from plenum.test.node_catchup.helper import ensure_all_nodes_have_same_data +from plenum.test.pool_transactions.conftest import looper +from plenum.test.view_change.helper import ensure_view_change_complete + + +def test_last_ordered_3pc_reset_if_more_than_new_view(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): + """ + Check that if last_ordered_3pc's viewNo on a Replica is greater than the new viewNo after view change, + then last_ordered_3pc is reset to (0,0). + It can be that last_ordered_3pc was set for the previous view, since it's set during catch-up + + Example: a Node has last_ordered = (1, 300), and then the whole pool except this node restarted. + The new viewNo is 0, but last_ordered is (1, 300), so all new requests will be discarded by this Node + if we don't reset last_ordered_3pc + """ + old_view_no = checkViewNoForNodes(txnPoolNodeSet) + for node in txnPoolNodeSet: + node.master_replica.last_ordered_3pc = (old_view_no + 2, 100) + + ensure_view_change_complete(looper, txnPoolNodeSet, customTimeout=60) + view_no = checkViewNoForNodes(txnPoolNodeSet) + + for node in txnPoolNodeSet: + assert (view_no, 0) == node.master_replica.last_ordered_3pc + + # Make sure the pool is working + sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) + ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) + + +def test_last_ordered_3pc_reset_if_equal_to_new_view(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): + """ + Check that if last_ordered_3pc's viewNo on a Replica is equal to the new viewNo after view change, + then last_ordered_3pc is reset to (0,0). + It can be that last_ordered_3pc was set for the previous view, since it's set during catch-up + + Example: a Node has last_ordered = (1, 300), and then the whole pool except this node restarted. + The new viewNo is 0, but last_ordered is (1, 300), so all new requests will be discarded by this Node + if we don't reset last_ordered_3pc + """ + old_view_no = checkViewNoForNodes(txnPoolNodeSet) + for node in txnPoolNodeSet: + node.master_replica.last_ordered_3pc = (old_view_no + 1, 100) + + ensure_view_change_complete(looper, txnPoolNodeSet, customTimeout=60) + view_no = checkViewNoForNodes(txnPoolNodeSet) + + for node in txnPoolNodeSet: + assert (view_no, 0) == node.master_replica.last_ordered_3pc + + # Make sure the pool is working + sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) + ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) + + +def test_last_ordered_3pc_not_reset_if_less_than_new_view(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): + """ + Check that if last_ordered_3pc's viewNo on a Replica is equal to the new viewNo after view change, + then last_ordered_3pc is reset to (0,0). + It can be that last_ordered_3pc was set for the previous view, since it's set during catch-up + + Example: a Node has last_ordered = (1, 300), and then the whole pool except this node restarted. + The new viewNo is 0, but last_ordered is (1, 300), so all new requests will be discarded by this Node + if we don't reset last_ordered_3pc + """ + old_view_no = checkViewNoForNodes(txnPoolNodeSet) + for node in txnPoolNodeSet: + node.master_replica.last_ordered_3pc = (old_view_no, 100) + + ensure_view_change_complete(looper, txnPoolNodeSet, customTimeout=60) + + for node in txnPoolNodeSet: + assert (old_view_no, 100) == node.master_replica.last_ordered_3pc + + # Make sure the pool is working + sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) + ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) From 5a527f3466958880b3f9b7b840e1cd5f7ad2d0e2 Mon Sep 17 00:00:00 2001 From: Lovesh Date: Sat, 24 Feb 2018 01:52:14 +0300 Subject: [PATCH 021/100] fix KeyError for "services" when pool ledger contains node update txns Signed-off-by: Lovesh (cherry picked from commit 5db7629) Signed-off-by: ashcherbakov --- plenum/common/stack_manager.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/plenum/common/stack_manager.py b/plenum/common/stack_manager.py index 76244cad05..27354b8e96 100644 --- a/plenum/common/stack_manager.py +++ b/plenum/common/stack_manager.py @@ -278,7 +278,8 @@ def getNodesServices(self): # Returns services for each node srvs = dict() for _, txn in self.ledger.getAllTxn(): - if txn[TXN_TYPE] == NODE: + if txn[TXN_TYPE] == NODE and \ + txn.get(DATA, {}).get(SERVICES) is not None: srvs.update({txn[TARGET_NYM]: txn[DATA][SERVICES]}) return srvs From 62adb6f22c78cf83e874cc58c92b9e0a89299936 Mon Sep 17 00:00:00 2001 From: Andrew Nikitin Date: Fri, 13 Apr 2018 18:07:09 +0300 Subject: [PATCH 022/100] [Increment libindy-crypto] Bumped libindy-crypto version to 0.4.0 Signed-off-by: Andrew Nikitin --- ci/ubuntu.dockerfile | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/ubuntu.dockerfile b/ci/ubuntu.dockerfile index 4056d4caed..1641d22ca1 100644 --- a/ci/ubuntu.dockerfile +++ b/ci/ubuntu.dockerfile @@ -9,7 +9,7 @@ RUN echo "To invalidate cache" RUN apt-get update -y && apt-get install -y \ python3-nacl \ - libindy-crypto=0.2.0 \ + libindy-crypto=0.4.0 \ libindy=1.3.1~454 \ # rocksdb python wrapper libbz2-dev \ diff --git a/setup.py b/setup.py index 3afc932c4a..a45b2aed16 100644 --- a/setup.py +++ b/setup.py @@ -57,7 +57,7 @@ 'sortedcontainers==1.5.7', 'psutil', 'pip', 'portalocker==0.5.7', 'pyzmq', 'libnacl==1.6.1', 'six==1.11.0', 'psutil', 'intervaltree', - 'msgpack-python==0.4.6', 'indy-crypto==0.2.0', + 'msgpack-python==0.4.6', 'indy-crypto==0.4.0', 'python-rocksdb==0.6.9'], setup_requires=['pytest-runner'], extras_require={ From 778c31d2854308b32aef94d2ff964ff986c5ef0a Mon Sep 17 00:00:00 2001 From: Andrew Nikitin Date: Mon, 16 Apr 2018 15:40:23 +0300 Subject: [PATCH 023/100] [Increment-crypto] Increment libindy version with new indy-crypto Signed-off-by: Andrew Nikitin --- ci/ubuntu.dockerfile | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/ubuntu.dockerfile b/ci/ubuntu.dockerfile index 1641d22ca1..97c5b01417 100644 --- a/ci/ubuntu.dockerfile +++ b/ci/ubuntu.dockerfile @@ -10,7 +10,7 @@ RUN echo "To invalidate cache" RUN apt-get update -y && apt-get install -y \ python3-nacl \ libindy-crypto=0.4.0 \ - libindy=1.3.1~454 \ + libindy=1.3.1~469 \ # rocksdb python wrapper libbz2-dev \ zlib1g-dev \ diff --git a/setup.py b/setup.py index a45b2aed16..3ef46f437d 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ # Load the metadata using exec() so we don't trigger an import of ioflo.__init__ exec(compile(open(METADATA).read(), METADATA, 'exec')) -tests_require = ['pytest', 'pytest-xdist', 'python3-indy==1.3.1-dev-454'] +tests_require = ['pytest', 'pytest-xdist', 'python3-indy==1.3.1-dev-469'] setup( name='indy-plenum', From 110f4dd4a8ce7bdc1245b5ba86a03dae1ec05892 Mon Sep 17 00:00:00 2001 From: Andrew Nikitin Date: Mon, 16 Apr 2018 16:10:22 +0300 Subject: [PATCH 024/100] [Increment crypto] Disable "pip3 install pip" Signed-off-by: Andrew Nikitin --- ci/code-validation.dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ci/code-validation.dockerfile b/ci/code-validation.dockerfile index 11bf098cf8..163600b47a 100644 --- a/ci/code-validation.dockerfile +++ b/ci/code-validation.dockerfile @@ -11,8 +11,7 @@ RUN apt-get update -y && apt-get install -y \ python3-pip \ python-setuptools \ python3-nacl -RUN pip3 install -U \ - pip \ +RUN pip3 install -U \ setuptools \ pep8 \ pep8-naming \ From 2edb8ec28591283fce607d019ea9e1b474db31f2 Mon Sep 17 00:00:00 2001 From: Andrew Nikitin Date: Mon, 16 Apr 2018 16:26:24 +0300 Subject: [PATCH 025/100] [Increment crypto] Added pip version Signed-off-by: Andrew Nikitin --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3ef46f437d..d517412160 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ 'prompt_toolkit==0.57', 'pygments', 'rlp', 'sha3', 'leveldb', 'ioflo==1.5.4', 'semver', 'base58', 'orderedset', - 'sortedcontainers==1.5.7', 'psutil', 'pip', + 'sortedcontainers==1.5.7', 'psutil', 'pip==9.0.3', 'portalocker==0.5.7', 'pyzmq', 'libnacl==1.6.1', 'six==1.11.0', 'psutil', 'intervaltree', 'msgpack-python==0.4.6', 'indy-crypto==0.4.0', From f0200306858eae574407df4f8a280666766d4a9c Mon Sep 17 00:00:00 2001 From: Andrew Nikitin Date: Tue, 17 Apr 2018 11:33:56 +0300 Subject: [PATCH 026/100] [Freeze pip] Pip version < 10.0.0 Signed-off-by: Andrew Nikitin --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 3afc932c4a..090fad363a 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ 'prompt_toolkit==0.57', 'pygments', 'rlp', 'sha3', 'leveldb', 'ioflo==1.5.4', 'semver', 'base58', 'orderedset', - 'sortedcontainers==1.5.7', 'psutil', 'pip', + 'sortedcontainers==1.5.7', 'psutil', 'pip<10.0.0', 'portalocker==0.5.7', 'pyzmq', 'libnacl==1.6.1', 'six==1.11.0', 'psutil', 'intervaltree', 'msgpack-python==0.4.6', 'indy-crypto==0.2.0', @@ -74,4 +74,4 @@ 'scripts/udp_sender', 'scripts/udp_receiver', 'scripts/filter_log', 'scripts/log_stats', 'scripts/init_bls_keys'] -) \ No newline at end of file +) From 3a19f4ff51dad424a86d413bbccb17ea34f214a3 Mon Sep 17 00:00:00 2001 From: dsurnin Date: Thu, 19 Apr 2018 17:35:52 +0300 Subject: [PATCH 027/100] Hotfix https://jira.hyperledger.org/browse/INDY-1256 Add from current state flag to be able to process veiw change done on zero view Signed-off-by: dsurnin --- plenum/server/node.py | 12 +-- plenum/server/view_change/view_changer.py | 4 +- ...est_add_node_to_pool_with_large_ppseqno.py | 2 - ...te_primary_after_primary_restart_view_0.py | 78 ++++++++++++++++++ ...te_primary_after_primary_restart_view_1.py | 82 +++++++++++++++++++ 5 files changed, 168 insertions(+), 10 deletions(-) create mode 100644 plenum/test/primary_selection/test_propagate_primary_after_primary_restart_view_0.py create mode 100644 plenum/test/primary_selection/test_propagate_primary_after_primary_restart_view_1.py diff --git a/plenum/server/node.py b/plenum/server/node.py index d044f8e53c..c88ad58956 100644 --- a/plenum/server/node.py +++ b/plenum/server/node.py @@ -1203,7 +1203,7 @@ def process_current_state_message(self, msg: CurrentState, frm): messages = [ViewChangeDone(**message) for message in msg.primary] for message in messages: # TODO DRY, view change done messages are managed by routes - self.sendToViewChanger(message, frm) + self.sendToViewChanger(message, frm, from_current_state=True) except TypeError: self.discard(msg, reason="{}invalid election messages".format( @@ -1422,7 +1422,7 @@ def msgHasAcceptableInstId(self, msg, frm) -> bool: return False return True - def msgHasAcceptableViewNo(self, msg, frm) -> bool: + def msgHasAcceptableViewNo(self, msg, frm, from_current_state: bool = False) -> bool: """ Return true if the view no of message corresponds to the current view no or a view no in the future @@ -1436,7 +1436,7 @@ def msgHasAcceptableViewNo(self, msg, frm) -> bool: if self.viewNo - view_no > 1: self.discard(msg, "un-acceptable viewNo {}" .format(view_no), logMethod=logger.warning) - elif view_no > self.viewNo: + elif (view_no > self.viewNo) or (self.viewNo == 0 and from_current_state): if view_no not in self.msgsForFutureViews: self.msgsForFutureViews[view_no] = deque() logger.info('{} stashing a message for a future view: {}'. @@ -1444,7 +1444,7 @@ def msgHasAcceptableViewNo(self, msg, frm) -> bool: self.msgsForFutureViews[view_no].append((msg, frm)) if isinstance(msg, ViewChangeDone): # TODO this is put of the msgs queue scope - self.view_changer.on_future_view_vchd_msg(view_no, frm) + self.view_changer.on_future_view_vchd_msg(view_no, frm, from_current_state=from_current_state) else: return True return False @@ -1462,7 +1462,7 @@ def sendToReplica(self, msg, frm): if self.msgHasAcceptableViewNo(msg, frm): self.replicas.pass_message((msg, frm), msg.instId) - def sendToViewChanger(self, msg, frm): + def sendToViewChanger(self, msg, frm, from_current_state: bool = False): """ Send the message to the intended view changer. @@ -1470,7 +1470,7 @@ def sendToViewChanger(self, msg, frm): :param frm: the name of the node which sent this `msg` """ if (isinstance(msg, InstanceChange) or - self.msgHasAcceptableViewNo(msg, frm)): + self.msgHasAcceptableViewNo(msg, frm, from_current_state=from_current_state)): logger.debug("{} sending message to view changer: {}". format(self, (msg, frm))) self.msgsToViewChanger.append((msg, frm)) diff --git a/plenum/server/view_change/view_changer.py b/plenum/server/view_change/view_changer.py index 99d891fae4..4683343bee 100644 --- a/plenum/server/view_change/view_changer.py +++ b/plenum/server/view_change/view_changer.py @@ -278,8 +278,8 @@ def on_catchup_complete(self): self._start_selection() - def on_future_view_vchd_msg(self, view_no, frm): - assert view_no > self.view_no + def on_future_view_vchd_msg(self, view_no, frm, from_current_state: bool = False): + assert (view_no > self.view_no) or (self.view_no == 0 and from_current_state) if view_no not in self._next_view_indications: self._next_view_indications[view_no] = set() self._next_view_indications[view_no].add(frm) diff --git a/plenum/test/primary_selection/test_add_node_to_pool_with_large_ppseqno.py b/plenum/test/primary_selection/test_add_node_to_pool_with_large_ppseqno.py index 854bb9c644..5d60fe69a7 100644 --- a/plenum/test/primary_selection/test_add_node_to_pool_with_large_ppseqno.py +++ b/plenum/test/primary_selection/test_add_node_to_pool_with_large_ppseqno.py @@ -37,8 +37,6 @@ def test_add_node_to_pool_with_large_ppseqno_diff_views(do_view_change, looper, are functional. The test is run with several starting view_no, including 0 """ - # TODO: for now this test will use old client api, after moving node txn to sdk it will be rewritten - ensure_several_view_change(looper, txnPoolNodeSet, do_view_change, custom_timeout=tconf.VIEW_CHANGE_TIMEOUT) big_ppseqno = tconf.LOG_SIZE * 2 + 2345 diff --git a/plenum/test/primary_selection/test_propagate_primary_after_primary_restart_view_0.py b/plenum/test/primary_selection/test_propagate_primary_after_primary_restart_view_0.py new file mode 100644 index 0000000000..046d0aa461 --- /dev/null +++ b/plenum/test/primary_selection/test_propagate_primary_after_primary_restart_view_0.py @@ -0,0 +1,78 @@ +from plenum.test.delayers import icDelay +from plenum.test.node_request.helper import sdk_ensure_pool_functional +from plenum.test.helper import checkViewNoForNodes +from plenum.test.view_change.helper import start_stopped_node +from plenum.test.pool_transactions.helper import disconnect_node_and_ensure_disconnected +from plenum.test.test_node import checkNodesConnected, get_master_primary_node, ensureElectionsDone + +from stp_core.common.log import getlogger + +logger = getlogger() + + +def delay_instance_change(txnPoolNodeSet, val): + for n in txnPoolNodeSet: + n.nodeIbStasher.delay(icDelay(val)) + + +def _get_ppseqno(nodes): + res = set() + for node in nodes: + for repl in node.replicas: + if repl.isMaster: + res.add(repl.lastPrePrepareSeqNo) + assert (len(res) == 1) + return min(res) + + +IC_DELAY_SEC = 100 + + +def test_propagate_primary_after_primary_restart_view_0( + looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward, tdir, allPluginsPath): + """ + Delay instance change msgs to prevent view change during primary restart + to test propagate primary for primary node. + ppSeqNo should be > 0 to be able to check that propagate primary restores all + indexes correctly + case viewNo == 0 + """ + sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) + + old_ppseqno = _get_ppseqno(txnPoolNodeSet) + assert (old_ppseqno > 0) + + old_viewNo = checkViewNoForNodes(txnPoolNodeSet) + old_primary = get_master_primary_node(txnPoolNodeSet) + + delay_instance_change(txnPoolNodeSet, IC_DELAY_SEC) + + disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, old_primary, stopNode=True) + + looper.removeProdable(old_primary) + + logger.info("Restart node {}".format(old_primary)) + + restartedNode = start_stopped_node(old_primary, looper, tconf, tdir, allPluginsPath, + delay_instance_change_msgs=False) + idx = [i for i, n in enumerate(txnPoolNodeSet) if n.name == restartedNode.name][0] + txnPoolNodeSet[idx] = restartedNode + + restartedNode.nodeIbStasher.delay(icDelay(IC_DELAY_SEC)) + + looper.run(checkNodesConnected(txnPoolNodeSet)) + ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) + + new_viewNo = checkViewNoForNodes(txnPoolNodeSet) + assert (new_viewNo == old_viewNo) + + new_primary = get_master_primary_node(txnPoolNodeSet) + assert (new_primary.name == old_primary.name) + + # check ppSeqNo the same + _get_ppseqno(txnPoolNodeSet) + + sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) + + new_ppseqno = _get_ppseqno(txnPoolNodeSet) + assert (new_ppseqno > old_ppseqno) diff --git a/plenum/test/primary_selection/test_propagate_primary_after_primary_restart_view_1.py b/plenum/test/primary_selection/test_propagate_primary_after_primary_restart_view_1.py new file mode 100644 index 0000000000..8b52f53cf8 --- /dev/null +++ b/plenum/test/primary_selection/test_propagate_primary_after_primary_restart_view_1.py @@ -0,0 +1,82 @@ +from plenum.test.delayers import icDelay +from plenum.test.node_request.helper import sdk_ensure_pool_functional +from plenum.test.helper import checkViewNoForNodes +from plenum.test.view_change.helper import start_stopped_node, ensure_view_change +from plenum.test.pool_transactions.helper import disconnect_node_and_ensure_disconnected +from plenum.test.test_node import checkNodesConnected, get_master_primary_node, ensureElectionsDone + +from stp_core.common.log import getlogger + +logger = getlogger() + + +def delay_instance_change(txnPoolNodeSet, val): + for n in txnPoolNodeSet: + n.nodeIbStasher.delay(icDelay(val)) + + +def _get_ppseqno(nodes): + res = set() + for node in nodes: + for repl in node.replicas: + if repl.isMaster: + res.add(repl.lastPrePrepareSeqNo) + assert (len(res) == 1) + return min(res) + + +IC_DELAY_SEC = 100 + + +def test_propagate_primary_after_primary_restart_view_1( + looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward, tdir, allPluginsPath): + """ + Delay instance change msgs to prevent view change during primary restart + to test propagate primary for primary node. + ppSeqNo should be > 0 to be able to check that propagate primary restores all + indexes correctly + case viewNo > 0 + """ + + ensure_view_change(looper, txnPoolNodeSet) + checkViewNoForNodes(txnPoolNodeSet, expectedViewNo=1) + + sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) + + old_ppseqno = _get_ppseqno(txnPoolNodeSet) + assert (old_ppseqno > 0) + + old_viewNo = checkViewNoForNodes(txnPoolNodeSet) + old_primary = get_master_primary_node(txnPoolNodeSet) + + delay_instance_change(txnPoolNodeSet, IC_DELAY_SEC) + + disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, old_primary, stopNode=True) + + looper.removeProdable(old_primary) + + logger.info("Restart node {}".format(old_primary)) + + restartedNode = start_stopped_node(old_primary, looper, tconf, tdir, allPluginsPath, + delay_instance_change_msgs=False) + idx = [i for i, n in enumerate(txnPoolNodeSet) if n.name == restartedNode.name][0] + txnPoolNodeSet[idx] = restartedNode + + restartedNode.nodeIbStasher.delay(icDelay(IC_DELAY_SEC)) + + looper.run(checkNodesConnected(txnPoolNodeSet)) + ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) + + new_viewNo = checkViewNoForNodes(txnPoolNodeSet) + assert (new_viewNo == old_viewNo) + + new_primary = get_master_primary_node(txnPoolNodeSet) + assert (new_primary.name == old_primary.name) + + # check ppSeqNo the same + _get_ppseqno(txnPoolNodeSet) + + sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) + + new_ppseqno = _get_ppseqno(txnPoolNodeSet) + assert (new_ppseqno > old_ppseqno) From 69550e8df0f0d955a451eba6072c8657de56445d Mon Sep 17 00:00:00 2001 From: dsurnin Date: Fri, 20 Apr 2018 13:11:42 +0300 Subject: [PATCH 028/100] hotfix https://jira.hyperledger.org/browse/INDY-1256 Extend testing timeout Signed-off-by: dsurnin --- .../test_no_instance_change_before_node_is_ready.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/plenum/test/view_change/test_no_instance_change_before_node_is_ready.py b/plenum/test/view_change/test_no_instance_change_before_node_is_ready.py index b532f6917f..104b76248d 100644 --- a/plenum/test/view_change/test_no_instance_change_before_node_is_ready.py +++ b/plenum/test/view_change/test_no_instance_change_before_node_is_ready.py @@ -11,7 +11,7 @@ @pytest.fixture(scope="module", autouse=True) def tconf(tconf): old_vc_timeout = tconf.VIEW_CHANGE_TIMEOUT - tconf.VIEW_CHANGE_TIMEOUT = 5 + tconf.VIEW_CHANGE_TIMEOUT = 10 yield tconf tconf.VIEW_CHANGE_TIMEOUT = old_vc_timeout @@ -42,6 +42,8 @@ def test_no_instance_change_on_primary_disconnection_for_not_ready_node( # 3. make sure no InstanceChange sent by the new node assert 0 == new_node.view_changer.spylog.count(ViewChanger.sendInstanceChange.__name__) + logger.info("Start added node {}".format(new_node)) + # 4. add the node to the pool (send NODE txn) and make sure that the node is ready now. add_started_node(looper, new_node, From 3056e1d0b93a1c3a7e638742ef0845cb48d77309 Mon Sep 17 00:00:00 2001 From: dsurnin Date: Wed, 25 Apr 2018 09:59:27 +0300 Subject: [PATCH 029/100] Hotfix https://jira.hyperledger.org/browse/INDY-1256 Fix current_state processing Signed-off-by: dsurnin --- plenum/server/node.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/plenum/server/node.py b/plenum/server/node.py index c88ad58956..a983d154ea 100644 --- a/plenum/server/node.py +++ b/plenum/server/node.py @@ -1422,6 +1422,9 @@ def msgHasAcceptableInstId(self, msg, frm) -> bool: return False return True + def _should_accept_current_state(self): + return self.viewNo == 0 and self.mode == Mode.starting and self.master_primary_name is None + def msgHasAcceptableViewNo(self, msg, frm, from_current_state: bool = False) -> bool: """ Return true if the view no of message corresponds to the current view @@ -1436,7 +1439,7 @@ def msgHasAcceptableViewNo(self, msg, frm, from_current_state: bool = False) -> if self.viewNo - view_no > 1: self.discard(msg, "un-acceptable viewNo {}" .format(view_no), logMethod=logger.warning) - elif (view_no > self.viewNo) or (self.viewNo == 0 and from_current_state): + elif (view_no > self.viewNo) or (from_current_state and self._should_accept_current_state()): if view_no not in self.msgsForFutureViews: self.msgsForFutureViews[view_no] = deque() logger.info('{} stashing a message for a future view: {}'. From cfc7bc0e3141b11093c1bb2edc8064ce3eaa58fa Mon Sep 17 00:00:00 2001 From: dsurnin Date: Wed, 16 May 2018 11:09:34 +0300 Subject: [PATCH 030/100] Fix falke errors Signed-off-by: dsurnin --- ledger/test/helper.py | 10 +++++----- plenum/common/script_helper.py | 4 ++-- stp_core/common/util.py | 10 +++++----- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/ledger/test/helper.py b/ledger/test/helper.py index a4b8fc9f28..a1a2194e55 100644 --- a/ledger/test/helper.py +++ b/ledger/test/helper.py @@ -92,19 +92,19 @@ def create_ledger_text_file_storage(txn_serializer, hash_serializer, tempdir, in storeContentHash=False, ensureDurability=False) - return __create_ledger(store, txn_serializer, hash_serializer, tempdir, init_genesis_txn_file) + return _create_ledger(store, txn_serializer, hash_serializer, tempdir, init_genesis_txn_file) def create_ledger_leveldb_storage(txn_serializer, hash_serializer, tempdir, init_genesis_txn_file=None): store = KeyValueStorageLeveldbIntKeys(tempdir, 'transactions') - return __create_ledger(store, txn_serializer, hash_serializer, tempdir, init_genesis_txn_file) + return _create_ledger(store, txn_serializer, hash_serializer, tempdir, init_genesis_txn_file) def create_ledger_rocksdb_storage(txn_serializer, hash_serializer, tempdir, init_genesis_txn_file=None): store = KeyValueStorageRocksdbIntKeys(tempdir, 'transactions') - return __create_ledger(store, txn_serializer, hash_serializer, tempdir, init_genesis_txn_file) + return _create_ledger(store, txn_serializer, hash_serializer, tempdir, init_genesis_txn_file) def create_ledger_chunked_file_storage(txn_serializer, hash_serializer, tempdir, init_genesis_txn_file=None): @@ -126,10 +126,10 @@ def chunk_creator(name): chunk_creator=chunk_creator, storeContentHash=False, ensureDurability=False) - return __create_ledger(store, txn_serializer, hash_serializer, tempdir, init_genesis_txn_file) + return _create_ledger(store, txn_serializer, hash_serializer, tempdir, init_genesis_txn_file) -def __create_ledger(store, txn_serializer, hash_serializer, tempdir, init_genesis_txn_file=None): +def _create_ledger(store, txn_serializer, hash_serializer, tempdir, init_genesis_txn_file=None): genesis_txn_initiator = GenesisTxnInitiatorFromFile(tempdir, init_genesis_txn_file) if init_genesis_txn_file else None ledger = Ledger(CompactMerkleTree(hashStore=FileHashStore(dataDir=tempdir)), diff --git a/plenum/common/script_helper.py b/plenum/common/script_helper.py index 1d139a7290..63b3e7f0e0 100644 --- a/plenum/common/script_helper.py +++ b/plenum/common/script_helper.py @@ -197,7 +197,7 @@ def submitNodeIpChange(client, stewardWallet, name: str, nym: str, return req[0] -def __checkClientConnected(cli, ): +def _checkClientConnected(cli, ): assert cli.hasSufficientConnections @@ -218,7 +218,7 @@ def changeHA(looper, config, nodeName, nodeSeed, newNodeHA, client = Client(stewardName, ha=('0.0.0.0', randomClientPort), config=config, basedirpath=basedir) looper.add(client) timeout = waits.expectedClientToPoolConnectionTimeout(4) - looper.run(eventually(__checkClientConnected, client, + looper.run(eventually(_checkClientConnected, client, retryWait=1, timeout=timeout)) nodeVerKey = SimpleSigner(seed=nodeSeed).verkey diff --git a/stp_core/common/util.py b/stp_core/common/util.py index c2cdfa6ac8..fb6b0c891d 100644 --- a/stp_core/common/util.py +++ b/stp_core/common/util.py @@ -5,11 +5,11 @@ class Singleton(type): _instances = {} - def __call__(self, *args, **kwargs): - if self not in self._instances: - self._instances[self] = super( - Singleton, self).__call__(*args, **kwargs) - return self._instances[self] + def __call__(cls, *args, **kwargs): + if cls not in cls._instances: + cls._instances[cls] = super( + Singleton, cls).__call__(*args, **kwargs) + return cls._instances[cls] def lxor(a, b): From 1226f748ef2e61272afce2e8ad5445b50329b564 Mon Sep 17 00:00:00 2001 From: dsurnin Date: Wed, 16 May 2018 12:00:35 +0300 Subject: [PATCH 031/100] Fix packets versions Signed-off-by: dsurnin --- setup.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/setup.py b/setup.py index 08e1d08446..90dbe82ac5 100644 --- a/setup.py +++ b/setup.py @@ -50,13 +50,13 @@ '': ['*.txt', '*.md', '*.rst', '*.json', '*.conf', '*.html', '*.css', '*.ico', '*.png', 'LICENSE', 'LEGAL', 'plenum']}, include_package_data=True, - install_requires=['jsonpickle', 'ujson==1.33', - 'prompt_toolkit==0.57', 'pygments', - 'rlp', 'sha3', 'leveldb', - 'ioflo==1.5.4', 'semver', 'base58', 'orderedset', - 'sortedcontainers==1.5.7', 'psutil', 'pip<10.0.0', - 'portalocker==0.5.7', 'pyzmq', 'libnacl==1.6.1', - 'six==1.11.0', 'psutil', 'intervaltree', + install_requires=['jsonpickle==0.9.6', 'ujson==1.33', + 'prompt_toolkit==0.57', 'pygments==2.2.0', + 'rlp==0.5.1', 'sha3==0.2.1', 'leveldb', + 'ioflo==1.5.4', 'semver==2.7.9', 'base58==1.0.0', 'orderedset==2.0', + 'sortedcontainers==1.5.7', 'psutil==5.4.3', 'pip<10.0.0', + 'portalocker==0.5.7', 'pyzmq==17.0.0', 'libnacl==1.6.1', + 'six==1.11.0', 'psutil==5.4.3', 'intervaltree==2.1.0', 'msgpack-python==0.4.6', 'indy-crypto==0.4.0', 'python-rocksdb==0.6.9'], setup_requires=['pytest-runner'], From 7e821987e86912d3a822ad713dca1ec6b1f6b779 Mon Sep 17 00:00:00 2001 From: dsurnin Date: Wed, 16 May 2018 12:26:20 +0300 Subject: [PATCH 032/100] Revert old base58 Signed-off-by: dsurnin --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 90dbe82ac5..69249a50be 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,7 @@ install_requires=['jsonpickle==0.9.6', 'ujson==1.33', 'prompt_toolkit==0.57', 'pygments==2.2.0', 'rlp==0.5.1', 'sha3==0.2.1', 'leveldb', - 'ioflo==1.5.4', 'semver==2.7.9', 'base58==1.0.0', 'orderedset==2.0', + 'ioflo==1.5.4', 'semver==2.7.9', 'base58==0.2.4', 'orderedset==2.0', 'sortedcontainers==1.5.7', 'psutil==5.4.3', 'pip<10.0.0', 'portalocker==0.5.7', 'pyzmq==17.0.0', 'libnacl==1.6.1', 'six==1.11.0', 'psutil==5.4.3', 'intervaltree==2.1.0', From 5e33183ef59c876e2fe848d0a2815fdceece582c Mon Sep 17 00:00:00 2001 From: anikitinDSR Date: Thu, 17 May 2018 19:19:54 +0300 Subject: [PATCH 033/100] [Bump indy-crypto] Bump indy-crypto and python3-indy (#682) Signed-off-by: Andrew Nikitin Signed-off-by: anikitinDSR Sign-off-executed-by: toktar Approved-at: h-master --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 69249a50be..0fb676b78c 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ # Load the metadata using exec() so we don't trigger an import of ioflo.__init__ exec(compile(open(METADATA).read(), METADATA, 'exec')) -tests_require = ['pytest', 'pytest-xdist', 'python3-indy==1.3.1-dev-469'] +tests_require = ['pytest==3.4.1', 'pytest-xdist==1.22.1', 'python3-indy==1.4.0-dev-509'] setup( name='indy-plenum', @@ -57,8 +57,8 @@ 'sortedcontainers==1.5.7', 'psutil==5.4.3', 'pip<10.0.0', 'portalocker==0.5.7', 'pyzmq==17.0.0', 'libnacl==1.6.1', 'six==1.11.0', 'psutil==5.4.3', 'intervaltree==2.1.0', - 'msgpack-python==0.4.6', 'indy-crypto==0.4.0', - 'python-rocksdb==0.6.9'], + 'msgpack-python==0.4.6', 'indy-crypto==0.4.1', + 'python-rocksdb==0.6.9', 'python-dateutil==2.6.1'], setup_requires=['pytest-runner'], extras_require={ 'tests': tests_require, From fd5ac040ca9f120d3634ae88e8d42ec977f5a619 Mon Sep 17 00:00:00 2001 From: ashcherbakov Date: Tue, 26 Jun 2018 11:52:26 +0300 Subject: [PATCH 034/100] use protocolVersion=None in test genesis files Signed-off-by: ashcherbakov --- plenum/common/test_network_setup.py | 2 +- plenum/test/script/test_bootstrap_test_node.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/plenum/common/test_network_setup.py b/plenum/common/test_network_setup.py index 49d2306fb3..c57ac81f73 100644 --- a/plenum/common/test_network_setup.py +++ b/plenum/common/test_network_setup.py @@ -102,7 +102,7 @@ def bootstrapTestNodesCore( config, domainTxnFieldOrder) # TODO: make it parameter for generate genesis txns script - genesis_protocol_version = PlenumProtocolVersion.STATE_PROOF_SUPPORT.value + genesis_protocol_version = None # 1. INIT DOMAIN LEDGER GENESIS FILE seq_no = 1 diff --git a/plenum/test/script/test_bootstrap_test_node.py b/plenum/test/script/test_bootstrap_test_node.py index 7e910d2076..b6f5c7002a 100644 --- a/plenum/test/script/test_bootstrap_test_node.py +++ b/plenum/test/script/test_bootstrap_test_node.py @@ -107,7 +107,7 @@ def test_domain_genesis_txns(bootstrap, domain_genesis_file): assert get_payload_data(txn) assert get_type(txn) == NYM assert get_version(txn) == "1" - assert get_protocol_version(txn) == 1 + assert get_protocol_version(txn) is None assert get_payload_data(txn)[VERKEY] assert get_payload_data(txn)[TARGET_NYM] assert ALIAS not in get_payload_data(txn) @@ -133,7 +133,7 @@ def test_pool_genesis_txns(bootstrap, pool_genesis_file): assert get_payload_data(txn) assert get_type(txn) == NODE assert get_version(txn) == "1" - assert get_protocol_version(txn) == 1 + assert get_protocol_version(txn) is None assert get_payload_data(txn)[TARGET_NYM] data = get_payload_data(txn).get(DATA) assert data From f2f4219ccc4a4746a38eb16af5db1e6a64df0f6c Mon Sep 17 00:00:00 2001 From: ashcherbakov Date: Thu, 28 Jun 2018 15:35:53 +0300 Subject: [PATCH 035/100] fix get value from state if there is no BLS multi-sig Signed-off-by: ashcherbakov --- plenum/server/domain_req_handler.py | 2 +- plenum/test/req_handler/__init__.py | 0 .../req_handler/test_get_value_from_state.py | 100 ++++++++++++++++++ state/test/trie/test_proof.py | 9 +- state/trie/pruning_trie.py | 3 +- 5 files changed, 110 insertions(+), 4 deletions(-) create mode 100644 plenum/test/req_handler/__init__.py create mode 100644 plenum/test/req_handler/test_get_value_from_state.py diff --git a/plenum/server/domain_req_handler.py b/plenum/server/domain_req_handler.py index d73538426e..55b3c2a882 100644 --- a/plenum/server/domain_req_handler.py +++ b/plenum/server/domain_req_handler.py @@ -185,7 +185,7 @@ def get_value_from_state(self, path, head_hash=None, with_proof=False): if not multi_sig: # Just return the value and not proof try: - return self.state.get_for_root_hash(path, root_hash), None + return self.state.get_for_root_hash(root_hash, path), None except KeyError: return None, None else: diff --git a/plenum/test/req_handler/__init__.py b/plenum/test/req_handler/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/plenum/test/req_handler/test_get_value_from_state.py b/plenum/test/req_handler/test_get_value_from_state.py new file mode 100644 index 0000000000..26176345b3 --- /dev/null +++ b/plenum/test/req_handler/test_get_value_from_state.py @@ -0,0 +1,100 @@ +import base58 +import pytest + +from common.serializers.serialization import state_roots_serializer +from crypto.bls.bls_multi_signature import MultiSignature, MultiSignatureValue +from plenum.common.util import get_utc_epoch + +num = 0 + + +@pytest.fixture(scope="function") +def domain_req_handler(txnPoolNodeSet): + return txnPoolNodeSet[0].getDomainReqHandler() + + +@pytest.fixture(scope="function") +def i(): + global num + num += 1 + return num + + +@pytest.fixture(scope="function", params=['committed', 'not_committed']) +def is_committed(request): + return request.param == 'committed' + + +@pytest.fixture(scope="function", params=['with_proof', 'no_proof']) +def with_proof(request): + return request.param == 'with_proof' + + +@pytest.fixture(scope="function", params=['with_bls', 'no_bls']) +def with_bls(request): + return request.param == 'with_bls' + + +def create_bls_multi_sig(encoded_root_hash): + pool_state_root_hash = base58.b58encode(b"somefakepoolroothashsomefakepoolroothash").decode("utf-8") + txn_root_hash = base58.b58encode(b"somefaketxnroothashsomefaketxnroothash").decode("utf-8") + ledger_id = 1 + timestamp = get_utc_epoch() + + value = MultiSignatureValue(ledger_id=ledger_id, + state_root_hash=encoded_root_hash, + pool_state_root_hash=pool_state_root_hash, + txn_root_hash=txn_root_hash, + timestamp=timestamp) + + sign = "1q" * 16 + participants = ["q" * 32, "w" * 32, "e" * 32, "r" * 32] + + return MultiSignature(sign, participants, value) + + +def add_bls_multi_sig(domain_req_handler, root_hash): + encoded_root_hash = state_roots_serializer.serialize(bytes(root_hash)) + domain_req_handler.bls_store.put(create_bls_multi_sig(encoded_root_hash)) + + +def test_get_value_default_head_hash(domain_req_handler, with_bls, is_committed, with_proof, i): + path = "key{}".format(i).encode() + value = "value{}".format(i).encode() + domain_req_handler.state.set(path, value) + + if is_committed: + domain_req_handler.state.commit() + if with_bls: + add_bls_multi_sig(domain_req_handler, domain_req_handler.state.committedHeadHash) + + expected_value = value if is_committed else None + has_proof = with_proof and with_bls + result = domain_req_handler.get_value_from_state(path, with_proof=with_proof) + + assert expected_value == result[0] + assert result[1] if has_proof else result[1] is None + + +def test_get_value_old_head_hash(domain_req_handler, is_committed, with_proof, with_bls, i): + path1 = "111key{}".format(i).encode() + value1 = "111value{}".format(i).encode() + domain_req_handler.state.set(path1, value1) + domain_req_handler.state.commit() + state1 = domain_req_handler.state.committedHeadHash + + path2 = "222key{}".format(i).encode() + value2 = "222value{}".format(i).encode() + domain_req_handler.state.set(path2, value2) + + if is_committed: + domain_req_handler.state.commit() + if with_bls: + add_bls_multi_sig(domain_req_handler, state1) + + expected_value = value1 + has_proof = with_proof and with_bls + result = domain_req_handler.get_value_from_state(path1, with_proof=with_proof, head_hash=state1) + + assert expected_value == result[0] + assert result[1] if has_proof else result[1] is None diff --git a/state/test/trie/test_proof.py b/state/test/trie/test_proof.py index c655c561c7..23c2480932 100644 --- a/state/test/trie/test_proof.py +++ b/state/test/trie/test_proof.py @@ -322,9 +322,9 @@ def test_proof_prefix_with_other_nodes(): def test_proof_multiple_prefix_nodes(): node_trie = Trie(PersistentDB(KeyValueStorageInMemory())) prefix_1 = 'abcdefgh' - prefix_2 = 'abcdefxy' # Prefix overlaps with previous + prefix_2 = 'abcdefxy' # Prefix overlaps with previous prefix_3 = 'pqrstuvw' - prefix_4 = 'mnoptuvw' # Suffix overlaps + prefix_4 = 'mnoptuvw' # Suffix overlaps all_prefixes = (prefix_1, prefix_2, prefix_3, prefix_4) @@ -388,3 +388,8 @@ def test_get_proof_and_value(): proof.append(deepcopy(node_trie.root_node)) assert v == test_data[k] assert client_trie.verify_spv_proof(node_trie.root_hash, k, v, proof) + + +def test_get_proof_and_value_no_key(): + node_trie = Trie(PersistentDB(KeyValueStorageInMemory())) + assert ([], None) == node_trie.produce_spv_proof(b"unknown_key", get_value=True) diff --git a/state/trie/pruning_trie.py b/state/trie/pruning_trie.py index d57b625056..32f1ec03d1 100644 --- a/state/trie/pruning_trie.py +++ b/state/trie/pruning_trie.py @@ -1047,7 +1047,8 @@ def produce_spv_proof(self, key, root=None, get_value=False): rv = self.get_at(root, key) o = proof.get_nodelist() proof.pop() - return (o, rv) if get_value else o + value = rv if rv != BLANK_NODE else None + return (o, value) if get_value else o def produce_spv_proof_for_keys_with_prefix(self, key_prfx, root=None, get_value=False): # Return a proof for keys in the trie with the given prefix. From e875227df059e4d93cdddf3a42e2efc52e8470bb Mon Sep 17 00:00:00 2001 From: Andrew Nikitin Date: Thu, 19 Jul 2018 09:56:35 +0300 Subject: [PATCH 036/100] [Indy-plenum] rename package name to indy-plenum Signed-off-by: Andrew Nikitin --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 12ecc40f48..63ad579bca 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ tests_require = ['pytest==3.3.1', 'pytest-xdist==1.22.1', 'python3-indy==1.4.0-dev-586'] setup( - name='indy-plenum-dev', # TODO refers to metadata as well + name='indy-plenum', # TODO refers to metadata as well version=md['__version__'], author=md['__author__'], author_email=md['__author_email__'], From 296a446acc1db657f144df11981db1221b8ab290 Mon Sep 17 00:00:00 2001 From: toktar Date: Sun, 22 Jul 2018 18:53:29 +0300 Subject: [PATCH 037/100] Add test for catchup with receiving incorrect ledger status Test case when one node restart and in catchup receive Ledger Status without protocol version from the one of nodes in pool. Signed-off-by: toktar --- .../test_node_catchup_with_new_ls_form.py | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 plenum/test/node_catchup/test_node_catchup_with_new_ls_form.py diff --git a/plenum/test/node_catchup/test_node_catchup_with_new_ls_form.py b/plenum/test/node_catchup/test_node_catchup_with_new_ls_form.py new file mode 100644 index 0000000000..e4041577f4 --- /dev/null +++ b/plenum/test/node_catchup/test_node_catchup_with_new_ls_form.py @@ -0,0 +1,80 @@ +import pytest + +from plenum.common.constants import LEDGER_STATUS +from plenum.common.messages.fields import LedgerIdField, NonNegativeNumberField, \ + MerkleRootField +from plenum.common.messages.message_base import MessageBase +from plenum.test.helper import sdk_send_random_and_check +from plenum.test.node_catchup.helper import waitNodeDataEquality +from plenum.test.node_catchup.test_config_ledger import start_stopped_node +from plenum.test.pool_transactions.helper import \ + disconnect_node_and_ensure_disconnected +from plenum.test.test_node import checkNodesConnected +from plenum.common.types import f + + +def test_node_catchup_with_new_ls_form(txnPoolNodeSet, + looper, + sdk_pool_handle, + sdk_wallet_steward, + tconf, + tdir, + allPluginsPath): + ''' + One node restart and in catchup receive Ledger Status without protocol + version from the one of nodes in pool. + ''' + node_to_disconnect = txnPoolNodeSet[-1] + break_node = txnPoolNodeSet[0] + + sdk_send_random_and_check(looper, txnPoolNodeSet, + sdk_pool_handle, sdk_wallet_steward, 5) + + original_get_ledger_status = break_node.getLedgerStatus + + # Path the method getLedgerStatus to + # get_ledger_status_without_protocol_version for sending ledger status + # without protocol version. + + def get_ledger_status_without_protocol_version(ledgerId: int): + original_ledger_status = original_get_ledger_status(ledgerId) + return CustomLedgerStatus(original_ledger_status.ledgerId, + original_ledger_status.txnSeqNo, + original_ledger_status.viewNo, + original_ledger_status.ppSeqNo, + original_ledger_status.merkleRoot) + + break_node.getLedgerStatus = get_ledger_status_without_protocol_version + + # restart node + disconnect_node_and_ensure_disconnected(looper, + txnPoolNodeSet, + node_to_disconnect) + looper.removeProdable(name=node_to_disconnect.name) + sdk_send_random_and_check(looper, txnPoolNodeSet, + sdk_pool_handle, sdk_wallet_steward, + 2) + + # add node_to_disconnect to pool + node_to_disconnect = start_stopped_node(node_to_disconnect, looper, tconf, + tdir, allPluginsPath) + txnPoolNodeSet[-1] = node_to_disconnect + looper.run(checkNodesConnected(txnPoolNodeSet)) + waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet) + sdk_send_random_and_check(looper, txnPoolNodeSet, + sdk_pool_handle, sdk_wallet_steward, 5) + waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet) + + +class CustomLedgerStatus(MessageBase): + """ + LedgerStatus class without protocol version + """ + typename = LEDGER_STATUS + schema = ( + (f.LEDGER_ID.nm, LedgerIdField()), + (f.TXN_SEQ_NO.nm, NonNegativeNumberField()), + (f.VIEW_NO.nm, NonNegativeNumberField(nullable=True)), + (f.PP_SEQ_NO.nm, NonNegativeNumberField(nullable=True)), + (f.MERKLE_ROOT.nm, MerkleRootField()) + ) From 0ac4815b8d6006b065730e28445bd5b344d270de Mon Sep 17 00:00:00 2001 From: toktar Date: Sun, 22 Jul 2018 19:49:47 +0300 Subject: [PATCH 038/100] Add check discarding Ledger Statuses test_node_catchup_with_new_ls_form Add check discarding a Ledger Statuses from the break_node for all ledgers Signed-off-by: toktar --- .../test/node_catchup/test_node_catchup_with_new_ls_form.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/plenum/test/node_catchup/test_node_catchup_with_new_ls_form.py b/plenum/test/node_catchup/test_node_catchup_with_new_ls_form.py index e4041577f4..992bfe5a6a 100644 --- a/plenum/test/node_catchup/test_node_catchup_with_new_ls_form.py +++ b/plenum/test/node_catchup/test_node_catchup_with_new_ls_form.py @@ -4,7 +4,7 @@ from plenum.common.messages.fields import LedgerIdField, NonNegativeNumberField, \ MerkleRootField from plenum.common.messages.message_base import MessageBase -from plenum.test.helper import sdk_send_random_and_check +from plenum.test.helper import sdk_send_random_and_check, countDiscarded from plenum.test.node_catchup.helper import waitNodeDataEquality from plenum.test.node_catchup.test_config_ledger import start_stopped_node from plenum.test.pool_transactions.helper import \ @@ -61,6 +61,9 @@ def get_ledger_status_without_protocol_version(ledgerId: int): txnPoolNodeSet[-1] = node_to_disconnect looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet) + # check discarding a Ledger Statuses from the break_node for all ledgers + assert countDiscarded(node_to_disconnect, + 'invalid replied message structure') >= 3 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet) From a0091cc3a2a20c1f0c2a700ef93fe0a7d18004af Mon Sep 17 00:00:00 2001 From: Nikita Spivachuk Date: Wed, 25 Jul 2018 13:46:24 +0100 Subject: [PATCH 039/100] INDY-1519: Fixed handling of message replies - Added handling of TypeError to the logic of replied message creation in the handler of replies to message requests. Signed-off-by: Nikita Spivachuk --- plenum/server/message_handlers.py | 12 ++++++++++-- plenum/server/message_req_processor.py | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/plenum/server/message_handlers.py b/plenum/server/message_handlers.py index d38006fd8f..7a16aa8011 100644 --- a/plenum/server/message_handlers.py +++ b/plenum/server/message_handlers.py @@ -58,10 +58,18 @@ def process(self, msg: MessageRep, frm: str): logMethod=logger.debug) return None - valid_msg = self.create(msg.msg, **params) + valid_msg = None + + try: + valid_msg = self.create(msg.msg, **params) + except TypeError: + self.node.discard(msg, "invalid replied message structure", + logMethod=logger.warning) + if valid_msg is None: return None - return self.processor(valid_msg, params, frm) + + self.processor(valid_msg, params, frm) class LedgerStatusHandler(BaseHandler): diff --git a/plenum/server/message_req_processor.py b/plenum/server/message_req_processor.py index bae86a08f9..a89fb699ca 100644 --- a/plenum/server/message_req_processor.py +++ b/plenum/server/message_req_processor.py @@ -48,7 +48,7 @@ def process_message_rep(self, msg: MessageRep, frm): format(self, msg_type, frm)) return handler = self.handlers[msg_type] - return handler.process(msg, frm) + handler.process(msg, frm) def request_msg(self, typ, params: Dict, frm: List[str]=None): self.sendToNodes(MessageReq(**{ From b928bbb69b39ad347f6c02bc5c978b5483be2607 Mon Sep 17 00:00:00 2001 From: Nikita Spivachuk Date: Wed, 25 Jul 2018 20:42:21 +0100 Subject: [PATCH 040/100] INDY-1519: Fixes in handling of msg reqs / resps - Added unit tests for some unhandled cases in message request / response handling logic. - Added handling for these cases to message request / response processing logic. Signed-off-by: Nikita Spivachuk --- plenum/common/exceptions.py | 4 + plenum/server/message_handlers.py | 69 ++++++------ .../test_node_catchup_with_new_ls_form.py | 2 +- .../test_valid_message_request.py | 106 ++++++++++++++++-- 4 files changed, 133 insertions(+), 48 deletions(-) diff --git a/plenum/common/exceptions.py b/plenum/common/exceptions.py index 4fa3f7a15e..5609b5272f 100644 --- a/plenum/common/exceptions.py +++ b/plenum/common/exceptions.py @@ -182,6 +182,10 @@ class InvalidNodeMsg(InvalidNodeMessageException): pass +class MismatchedMessageReplyException(InvalidNodeMsg): + pass + + class MissingNodeOp(InvalidNodeMsg): pass diff --git a/plenum/server/message_handlers.py b/plenum/server/message_handlers.py index 7a16aa8011..8e79b81da1 100644 --- a/plenum/server/message_handlers.py +++ b/plenum/server/message_handlers.py @@ -1,7 +1,7 @@ from typing import Dict, Any, Optional from abc import ABCMeta, abstractmethod -from plenum.common.constants import THREE_PC_PREFIX +from plenum.common.exceptions import MismatchedMessageReplyException from plenum.common.messages.node_messages import MessageReq, MessageRep, \ LedgerStatus, PrePrepare, ConsistencyProof, Propagate, Prepare, Commit from plenum.common.types import f @@ -42,7 +42,8 @@ def serve(self, msg: MessageReq): params[field_name] = msg.params.get(type_name) if not self.validate(**params): - self.node.discard(msg, 'cannot serve request', logMethod=logger.debug) + self.node.discard(msg, 'cannot serve request', + logMethod=logger.debug) return None return self.requestor(params) @@ -54,22 +55,19 @@ def process(self, msg: MessageRep, frm: str): params[field_name] = msg.params.get(type_name) if not self.validate(**params): - self.node.discard(msg, 'cannot process requested message response', + self.node.discard(msg, 'cannot process message reply', logMethod=logger.debug) - return None - - valid_msg = None + return try: valid_msg = self.create(msg.msg, **params) + self.processor(valid_msg, params, frm) except TypeError: - self.node.discard(msg, "invalid replied message structure", + self.node.discard(msg, 'replied message has invalid structure', + logMethod=logger.warning) + except MismatchedMessageReplyException: + self.node.discard(msg, 'replied message does not satisfy query criteria', logMethod=logger.warning) - - if valid_msg is None: - return None - - self.processor(valid_msg, params, frm) class LedgerStatusHandler(BaseHandler): @@ -81,7 +79,10 @@ def validate(self, **kwargs) -> bool: return kwargs['ledger_id'] in self.node.ledger_ids def create(self, msg: Dict, **kwargs) -> LedgerStatus: - return LedgerStatus(**msg) + ls = LedgerStatus(**msg) + if ls.ledgerId != kwargs['ledger_id']: + raise MismatchedMessageReplyException + return ls def requestor(self, params: Dict[str, Any]) -> LedgerStatus: return self.node.getLedgerStatus(params['ledger_id']) @@ -105,7 +106,12 @@ def validate(self, **kwargs) -> bool: 'seq_no_end'] > 0) def create(self, msg: Dict, **kwargs) -> ConsistencyProof: - return ConsistencyProof(**msg) + cp = ConsistencyProof(**msg) + if cp.ledgerId != kwargs['ledger_id'] \ + or cp.seqNoStart != kwargs['seq_no_start'] \ + or cp.seqNoEnd != kwargs['seq_no_end']: + raise MismatchedMessageReplyException + return cp def requestor(self, params: Dict[str, Any]) -> ConsistencyProof: return self.node.ledgerManager._buildConsistencyProof( @@ -132,11 +138,10 @@ def validate(self, **kwargs) -> bool: def create(self, msg: Dict, **kwargs) -> Optional[PrePrepare]: pp = PrePrepare(**msg) - if pp.instId != kwargs['inst_id'] or pp.viewNo != kwargs['view_no']: - logger.warning( - '{}{} found PREPREPARE {} not satisfying query criteria' .format( - THREE_PC_PREFIX, self, pp)) - return None + if pp.instId != kwargs['inst_id'] \ + or pp.viewNo != kwargs['view_no'] \ + or pp.ppSeqNo != kwargs['pp_seq_no']: + raise MismatchedMessageReplyException return pp def requestor(self, params: Dict[str, Any]) -> Optional[PrePrepare]: @@ -165,11 +170,10 @@ def validate(self, **kwargs) -> bool: def create(self, msg: Dict, **kwargs) -> Optional[Prepare]: prepare = Prepare(**msg) - if prepare.instId != kwargs['inst_id'] or prepare.viewNo != kwargs['view_no']: - logger.warning( - '{}{} found PREPARE {} not satisfying query criteria' .format( - THREE_PC_PREFIX, self, prepare)) - return None + if prepare.instId != kwargs['inst_id'] \ + or prepare.viewNo != kwargs['view_no'] \ + or prepare.ppSeqNo != kwargs['pp_seq_no']: + raise MismatchedMessageReplyException return prepare def requestor(self, params: Dict[str, Any]) -> Prepare: @@ -198,11 +202,10 @@ def validate(self, **kwargs) -> bool: def create(self, msg: Dict, **kwargs) -> Optional[Commit]: commit = Commit(**msg) - if commit.instId != kwargs['inst_id'] or commit.viewNo != kwargs['view_no']: - logger.warning( - '{}{} found COMMIT {} not satisfying query criteria' .format( - THREE_PC_PREFIX, self, commit)) - return None + if commit.instId != kwargs['inst_id'] \ + or commit.viewNo != kwargs['view_no'] \ + or commit.ppSeqNo != kwargs['pp_seq_no']: + raise MismatchedMessageReplyException return commit def requestor(self, params: Dict[str, Any]) -> Commit: @@ -227,12 +230,8 @@ def validate(self, **kwargs) -> bool: def create(self, msg: Dict, **kwargs) -> Propagate: ppg = Propagate(**msg) request = self.node.client_request_class(**ppg.request) - if request.digest != kwargs[f.DIGEST.nm]: - logger.debug( - '{} found PROPAGATE {} not ' - 'satisfying query criteria'.format( - self, ppg)) - return None + if request.digest != kwargs['digest']: + raise MismatchedMessageReplyException return ppg def requestor(self, params: Dict[str, Any]) -> Optional[Propagate]: diff --git a/plenum/test/node_catchup/test_node_catchup_with_new_ls_form.py b/plenum/test/node_catchup/test_node_catchup_with_new_ls_form.py index 992bfe5a6a..8905226fda 100644 --- a/plenum/test/node_catchup/test_node_catchup_with_new_ls_form.py +++ b/plenum/test/node_catchup/test_node_catchup_with_new_ls_form.py @@ -63,7 +63,7 @@ def get_ledger_status_without_protocol_version(ledgerId: int): waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet) # check discarding a Ledger Statuses from the break_node for all ledgers assert countDiscarded(node_to_disconnect, - 'invalid replied message structure') >= 3 + 'replied message has invalid structure') >= 3 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet) diff --git a/plenum/test/node_request/message_request/test_valid_message_request.py b/plenum/test/node_request/message_request/test_valid_message_request.py index 049125a8cb..348833ff27 100644 --- a/plenum/test/node_request/message_request/test_valid_message_request.py +++ b/plenum/test/node_request/message_request/test_valid_message_request.py @@ -12,7 +12,9 @@ invalid_type_discard_log = "unknown value 'invalid_type'" invalid_req_discard_log = "cannot serve request" -invalid_rep_discard_log = "cannot process requested message response" +invalid_rep_discard_log = "cannot process message reply" +invalid_replied_msg_structure = "replied message has invalid structure" +mismatched_params_log = "replied message does not satisfy query criteria" whitelist = [invalid_type_discard_log, ] @@ -40,6 +42,13 @@ class PMessageRep(MessageRep): return PMessageRep +def patched_LedgerStatus(): + class PLedgerStatus(LedgerStatus): + schema = LedgerStatus.schema[:-1] + + return PLedgerStatus + + discard_counts = {} pre_prepare_msg = PrePrepare( @@ -79,7 +88,7 @@ class PMessageRep(MessageRep): 'protocolVersion': CURRENT_PROTOCOL_VERSION}, 'senderClient': '+DG1:vO9#de6?R?>:3RwdAXSdefgLLfxSoN4WMEe'}) -bad_msgs = [ +msg_reps_with_invalid_params = [ (LEDGER_STATUS, {'p1': 'v1', 'p2': 'v2'}, LedgerStatus( 1, 20, 1, 2, '77wuDUSr4FtAJzJbSqSW7bBw8bKAbra8ABSAjR72Nipq', CURRENT_PROTOCOL_VERSION)), (LEDGER_STATUS, {f.LEDGER_ID.nm: 100}, LedgerStatus( @@ -108,6 +117,44 @@ class PMessageRep(MessageRep): ] +msg_reqs_with_invalid_params = map(lambda triplet: triplet[:2], + msg_reps_with_invalid_params) + + +msg_reps_with_mismatched_params = [ + (LEDGER_STATUS, {f.LEDGER_ID.nm: 0}, LedgerStatus( + 1, 20, 1, 2, '77wuDUSr4FtAJzJbSqSW7bBw8bKAbra8ABSAjR72Nipq', CURRENT_PROTOCOL_VERSION)), + (CONSISTENCY_PROOF, {f.LEDGER_ID.nm: 2, f.SEQ_NO_START.nm: 2, f.SEQ_NO_END.nm: 20}, + ConsistencyProof(1, 2, 20, 1, 3, + 'BvmagFYpXAYNTuNW8Qssk9tMhEEPucLqL55YuwngUvMw', + 'Dce684wcwhV2wNZCuYTzdW9Kr13ZXFgiuAuAGibFZc4v', + ['58qasGZ9y3TB1pMz7ARKjJeccEbvbx6FT6g3NFnjYsTS'])), + (CONSISTENCY_PROOF, {f.LEDGER_ID.nm: 1, f.SEQ_NO_START.nm: 1, f.SEQ_NO_END.nm: 20}, + ConsistencyProof(1, 2, 20, 1, 3, + 'BvmagFYpXAYNTuNW8Qssk9tMhEEPucLqL55YuwngUvMw', + 'Dce684wcwhV2wNZCuYTzdW9Kr13ZXFgiuAuAGibFZc4v', + ['58qasGZ9y3TB1pMz7ARKjJeccEbvbx6FT6g3NFnjYsTS'])), + (CONSISTENCY_PROOF, {f.LEDGER_ID.nm: 1, f.SEQ_NO_START.nm: 2, f.SEQ_NO_END.nm: 10}, + ConsistencyProof(1, 2, 20, 1, 3, + 'BvmagFYpXAYNTuNW8Qssk9tMhEEPucLqL55YuwngUvMw', + 'Dce684wcwhV2wNZCuYTzdW9Kr13ZXFgiuAuAGibFZc4v', + ['58qasGZ9y3TB1pMz7ARKjJeccEbvbx6FT6g3NFnjYsTS'])), + (PREPREPARE, {f.INST_ID.nm: 1, f.VIEW_NO.nm: 1, f.PP_SEQ_NO.nm: 3}, + pre_prepare_msg), + (PREPREPARE, {f.INST_ID.nm: 0, f.VIEW_NO.nm: 1, f.PP_SEQ_NO.nm: 5}, + pre_prepare_msg), + (PREPARE, {f.INST_ID.nm: 1, f.VIEW_NO.nm: 1, f.PP_SEQ_NO.nm: 3}, + prepare_msg), + (PREPARE, {f.INST_ID.nm: 0, f.VIEW_NO.nm: 1, f.PP_SEQ_NO.nm: 5}, + prepare_msg), + (COMMIT, {f.INST_ID.nm: 1, f.VIEW_NO.nm: 1, f.PP_SEQ_NO.nm: 3}, + commit_msg), + (COMMIT, {f.INST_ID.nm: 0, f.VIEW_NO.nm: 1, f.PP_SEQ_NO.nm: 5}, + commit_msg), + (PROPAGATE, {f.DIGEST.nm: 'MISMATCHED_DIGEST'}, propagate_msg), +] + + def fill_counters(nodes, log_message): global discard_counts discard_counts[log_message] = {n.name: countDiscarded(n, log_message) @@ -128,12 +175,11 @@ def nodes(txnPoolNodeSet): return bad_node, other_nodes -def test_node_reject_invalid_req_resp_type(looper, nodes): +def test_node_rejects_msg_reqs_with_invalid_type(looper, nodes): """ Node does not accept invalid `MessageReq`, with an unacceptable type. Also it does not accept invalid `MessageRep` """ - global discard_counts bad_node, other_nodes = nodes fill_counters(other_nodes, invalid_type_discard_log) bad_msg = patched_MessageReq()('invalid_type', {'p1': 'v1', 'p2': 'v2'}) @@ -142,6 +188,13 @@ def test_node_reject_invalid_req_resp_type(looper, nodes): looper.run(eventually(chk, other_nodes, invalid_type_discard_log, retryWait=1)) + +def test_node_rejects_msg_reps_with_invalid_type(looper, nodes): + """ + Node does not accept invalid `MessageReq`, with an unacceptable type. Also + it does not accept invalid `MessageRep` + """ + bad_node, other_nodes = nodes fill_counters(other_nodes, invalid_type_discard_log) bad_msg = patched_MessageRep()('invalid_type', {'p1': 'v1', 'p2': 'v2'}, @@ -151,31 +204,60 @@ def test_node_reject_invalid_req_resp_type(looper, nodes): invalid_type_discard_log, retryWait=1)) -def test_node_reject_invalid_req_params(looper, nodes): +def test_node_rejects_msg_reqs_with_invalid_params(looper, nodes): """ Node does not accept invalid `MessageReq`, with missing params. Also it does not accept invalid `MessageRep` """ - global discard_counts, bad_msgs bad_node, other_nodes = nodes - for bad_msg in bad_msgs: + for msg_req_with_invalid_params in msg_reqs_with_invalid_params: fill_counters(other_nodes, invalid_req_discard_log) - bad_node.send(patched_MessageReq()(*bad_msg[:2])) + bad_node.send(patched_MessageReq()(*msg_req_with_invalid_params)) looper.run(eventually(chk, other_nodes, invalid_req_discard_log, retryWait=1)) -def test_node_reject_invalid_resp_params(looper, nodes): +def test_node_rejects_msg_reps_with_invalid_params(looper, nodes): """ Node does not accept invalid `MessageReq`, with missing params. Also it does not accept invalid `MessageRep` """ - global discard_counts, bad_msgs bad_node, other_nodes = nodes - for bad_msg in bad_msgs: + for msg_rep_with_invalid_params in msg_reps_with_invalid_params: fill_counters(other_nodes, invalid_rep_discard_log) - bad_node.send(patched_MessageRep()(*bad_msg)) + bad_node.send(patched_MessageRep()(*msg_rep_with_invalid_params)) looper.run(eventually(chk, other_nodes, invalid_rep_discard_log, retryWait=1)) + + +def test_node_rejects_msg_reps_with_invalid_msg_structure(looper, nodes): + """ + Node does not accept invalid `MessageReq`, with an unacceptable type. Also + it does not accept invalid `MessageRep` + """ + bad_node, other_nodes = nodes + fill_counters(other_nodes, invalid_replied_msg_structure) + + bad_msg = patched_MessageRep()( + LEDGER_STATUS, + {f.LEDGER_ID.nm: 1}, + patched_LedgerStatus()(1, 20, 1, 2, '77wuDUSr4FtAJzJbSqSW7bBw8bKAbra8ABSAjR72Nipq')) + bad_node.send(bad_msg) + looper.run(eventually(chk, other_nodes, + invalid_replied_msg_structure, retryWait=1)) + + +def test_node_rejects_msg_reps_with_mismatched_params(looper, nodes): + """ + Node does not accept invalid `MessageReq`, with missing params. + Also it does not accept invalid `MessageRep` + """ + bad_node, other_nodes = nodes + + for msg_rep_with_mismatched_params in msg_reps_with_mismatched_params: + fill_counters(other_nodes, mismatched_params_log) + bad_node.send(patched_MessageRep()(*msg_rep_with_mismatched_params)) + looper.run(eventually(chk, other_nodes, mismatched_params_log, + retryWait=1)) From e25d0e3118181a971ddcf28fddc0588a3e88c63b Mon Sep 17 00:00:00 2001 From: lovesh harchandani Date: Sun, 12 Aug 2018 20:01:48 +0530 Subject: [PATCH 041/100] get request handler by txn type Signed-off-by: lovesh harchandani --- plenum/server/node.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/plenum/server/node.py b/plenum/server/node.py index d73aaf0e5d..692b757cb3 100644 --- a/plenum/server/node.py +++ b/plenum/server/node.py @@ -1938,7 +1938,10 @@ def postRecvTxnFromCatchup(self, ledgerId: int, txn: Any): self.poolManager.onPoolMembershipChange(txn) if ledgerId == DOMAIN_LEDGER_ID: self.post_txn_from_catchup_added_to_domain_ledger(txn) - rh = self.get_req_handler(ledgerId) + typ = get_type(txn) + # Since a ledger can contain txns which can be processed by an arbitrary number of request handlers; + # ledger-to-request_handler is a one-to-many relationship + rh = self.get_req_handler(txn_type=typ) return rh # TODO: should be renamed to `post_all_ledgers_caughtup` From 57fc3845196492ad925bdee7b4ff92a8ef5f5b40 Mon Sep 17 00:00:00 2001 From: Andrew Nikitin Date: Wed, 10 Oct 2018 18:38:36 +0300 Subject: [PATCH 042/100] [bump indy-crypto] bump version for python3-indy-crypto Signed-off-by: Andrew Nikitin --- ci/ubuntu.dockerfile | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/ubuntu.dockerfile b/ci/ubuntu.dockerfile index 5a8a7483f0..d50ca6b524 100644 --- a/ci/ubuntu.dockerfile +++ b/ci/ubuntu.dockerfile @@ -9,7 +9,7 @@ RUN echo "To invalidate cache" RUN apt-get update -y && apt-get install -y \ python3-nacl \ - libindy-crypto=0.4.3 \ + libindy-crypto=0.4.5 \ libindy=1.6.1~683 \ # rocksdb python wrapper libbz2-dev \ diff --git a/setup.py b/setup.py index f65a91d9a4..8d2c230def 100644 --- a/setup.py +++ b/setup.py @@ -48,7 +48,7 @@ 'sortedcontainers==1.5.7', 'psutil==5.4.3', 'pip<10.0.0', 'portalocker==0.5.7', 'pyzmq==17.0.0', 'libnacl==1.6.1', 'six==1.11.0', 'psutil==5.4.3', 'intervaltree==2.1.0', - 'msgpack-python==0.4.6', 'indy-crypto==0.4.3', + 'msgpack-python==0.4.6', 'indy-crypto==0.4.5', 'python-rocksdb==0.6.9', 'python-dateutil==2.6.1', 'pympler==0.5'], setup_requires=['pytest-runner'], From f36746ff09007e82736863cb64462e4440f7c412 Mon Sep 17 00:00:00 2001 From: toktar Date: Fri, 30 Nov 2018 14:15:28 +0300 Subject: [PATCH 043/100] INDY-1872: add more logging Signed-off-by: toktar --- plenum/common/throughput_measurements.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/plenum/common/throughput_measurements.py b/plenum/common/throughput_measurements.py index 048a4adb85..07202a3760 100644 --- a/plenum/common/throughput_measurements.py +++ b/plenum/common/throughput_measurements.py @@ -1,7 +1,9 @@ from abc import ABCMeta, abstractmethod from enum import unique, Enum +from logging import getLogger from common.exceptions import LogicError +logger = getLogger() class ThroughputMeasurement(metaclass=ABCMeta): @@ -93,6 +95,7 @@ def init_time(self, start_ts): def get_throughput(self, request_time): if request_time < self.first_ts + (self.window_size * self.min_cnt): return None + logger.trace("Request time {}; First ts {}.".format(request_time, self.first_ts)) return super().get_throughput(request_time) From 698eb98774fa7d20a5262ddb0c68c50709191c2b Mon Sep 17 00:00:00 2001 From: toktar Date: Fri, 30 Nov 2018 18:26:24 +0300 Subject: [PATCH 044/100] INDY-1872: add more logging Signed-off-by: toktar --- plenum/server/monitor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plenum/server/monitor.py b/plenum/server/monitor.py index f8ca43ade4..4440116b40 100644 --- a/plenum/server/monitor.py +++ b/plenum/server/monitor.py @@ -293,8 +293,8 @@ def create_throughput_measurement(config, start_ts=time.perf_counter()): tm = config.throughput_measurement_class( **config.throughput_measurement_params) tm.init_time(start_ts) - logger.trace("Creating throughput measurement class {} with parameters {}" - .format(str(config.throughput_measurement_class), str(config.throughput_measurement_params))) + logger.trace("Creating throughput measurement class {} with parameters {} in start time {}" + .format(str(config.throughput_measurement_class), str(config.throughput_measurement_params), start_ts)) return tm def reset(self): From 9eff3734aa8e5cbf1b79c89ceb75ad412cd227c5 Mon Sep 17 00:00:00 2001 From: toktar Date: Mon, 3 Dec 2018 13:28:42 +0300 Subject: [PATCH 045/100] INDY-1872: fix problem with 'start_ts' in create_throughput_measurement Signed-off-by: toktar --- plenum/common/throughput_measurements.py | 1 - plenum/server/monitor.py | 4 +++- plenum/test/monitoring/conftest.py | 2 +- .../test_throughput_based_master_degradation_detection.py | 3 +++ 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/plenum/common/throughput_measurements.py b/plenum/common/throughput_measurements.py index 07202a3760..535952d96b 100644 --- a/plenum/common/throughput_measurements.py +++ b/plenum/common/throughput_measurements.py @@ -95,7 +95,6 @@ def init_time(self, start_ts): def get_throughput(self, request_time): if request_time < self.first_ts + (self.window_size * self.min_cnt): return None - logger.trace("Request time {}; First ts {}.".format(request_time, self.first_ts)) return super().get_throughput(request_time) diff --git a/plenum/server/monitor.py b/plenum/server/monitor.py index 4440116b40..aac447ac8e 100644 --- a/plenum/server/monitor.py +++ b/plenum/server/monitor.py @@ -289,7 +289,9 @@ def calculateTraffic(self): return currNetwork @staticmethod - def create_throughput_measurement(config, start_ts=time.perf_counter()): + def create_throughput_measurement(config, start_ts=None): + if start_ts is None: + start_ts = time.perf_counter() tm = config.throughput_measurement_class( **config.throughput_measurement_params) tm.init_time(start_ts) diff --git a/plenum/test/monitoring/conftest.py b/plenum/test/monitoring/conftest.py index eed9563d48..c408b76b01 100644 --- a/plenum/test/monitoring/conftest.py +++ b/plenum/test/monitoring/conftest.py @@ -40,7 +40,7 @@ def getThroughput(self, instId): instances = Instances() num_of_replicas = 5 for i in range(num_of_replicas): - throughputs[i] = Monitor.create_throughput_measurement(tconf, start_ts=0) + throughputs[i] = Monitor.create_throughput_measurement(tconf) instances.add(i) monitor = FakeSomething( throughputs=throughputs, diff --git a/plenum/test/monitoring/test_throughput_based_master_degradation_detection.py b/plenum/test/monitoring/test_throughput_based_master_degradation_detection.py index 8ac12bbd5b..2bb9a30c7b 100644 --- a/plenum/test/monitoring/test_throughput_based_master_degradation_detection.py +++ b/plenum/test/monitoring/test_throughput_based_master_degradation_detection.py @@ -299,3 +299,6 @@ def test_instances_not_degraded_on_new_instance(fake_monitor, tconf): assert not fake_monitor.is_instance_throughput_too_low(new_id) assert not fake_monitor.isMasterThroughputTooLow() + assert all(fake_monitor.throughputs[new_id].first_ts != th.first_ts + for inst_id, th in fake_monitor.throughputs.items() + if inst_id != new_id) From 5d322d16e89dab9daac62068294a0ef22b641561 Mon Sep 17 00:00:00 2001 From: toktar Date: Mon, 3 Dec 2018 15:14:43 +0300 Subject: [PATCH 046/100] INDY-1872: refactoring throughput_measurements Signed-off-by: toktar --- plenum/common/throughput_measurements.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/plenum/common/throughput_measurements.py b/plenum/common/throughput_measurements.py index 535952d96b..048a4adb85 100644 --- a/plenum/common/throughput_measurements.py +++ b/plenum/common/throughput_measurements.py @@ -1,9 +1,7 @@ from abc import ABCMeta, abstractmethod from enum import unique, Enum -from logging import getLogger from common.exceptions import LogicError -logger = getLogger() class ThroughputMeasurement(metaclass=ABCMeta): From 3e5fa6c1bbec20b6a813de85851991c4ac6a88bd Mon Sep 17 00:00:00 2001 From: toktar Date: Thu, 6 Dec 2018 18:51:23 +0300 Subject: [PATCH 047/100] INDY-1896: process read and action requests Changes: - move check in unpackClientMsg and add check for request type - add tests Signed-off-by: toktar --- plenum/server/node.py | 29 +++++--- plenum/test/conftest.py | 20 +++++ plenum/test/node/test_api.py | 23 ------ .../test_client_req_during_view_change.py | 73 +++++++++++++++---- 4 files changed, 96 insertions(+), 49 deletions(-) diff --git a/plenum/server/node.py b/plenum/server/node.py index 9e9aadb934..9f81911ce4 100644 --- a/plenum/server/node.py +++ b/plenum/server/node.py @@ -1964,17 +1964,7 @@ def unpackClientMsg(self, msg, frm): :param msg: a client message :param frm: the name of the client that sent this `msg` """ - if self.view_changer.view_change_in_progress: - - msg_dict = msg if isinstance(msg, dict) else msg.as_dict - self.discard(msg_dict, - reason="view change in progress", - logMethod=logger.debug) - self.send_nack_to_client((idr_from_req_data(msg_dict), - msg_dict.get(f.REQ_ID.nm, None)), - "Client request is discarded since view " - "change is in progress", frm) - return + if isinstance(msg, Batch): for m in msg.messages: # This check is done since Client uses NodeStack (which can @@ -1987,6 +1977,23 @@ def unpackClientMsg(self, msg, frm): m = self.clientstack.deserializeMsg(m) self.handleOneClientMsg((m, frm)) else: + msg_dict = msg.as_dict if isinstance(msg, Request) else msg + if isinstance(msg_dict, dict): + txn_type = msg_dict.get(OPERATION).get(TXN_TYPE, False) \ + if OPERATION in msg_dict \ + else False + txn_need_quorum = txn_type and not (txn_type == GET_TXN or + self.is_action(txn_type) or + self.is_query(txn_type)) + if self.view_changer.view_change_in_progress and txn_need_quorum: + self.discard(msg_dict, + reason="view change in progress", + logMethod=logger.debug) + self.send_nack_to_client((idr_from_req_data(msg_dict), + msg_dict.get(f.REQ_ID.nm, None)), + "Client request is discarded since view " + "change is in progress", frm) + return self.postToClientInBox(msg, frm) def postToClientInBox(self, msg, frm): diff --git a/plenum/test/conftest.py b/plenum/test/conftest.py index bbbe6b2f5e..d2518eb150 100644 --- a/plenum/test/conftest.py +++ b/plenum/test/conftest.py @@ -1134,3 +1134,23 @@ def one_replica_and_others_in_backup_instance( return primary, non_primaries else: return non_primaries[0], [primary] + non_primaries[1:] + + +@pytest.fixture(scope='function') +def test_node( + tdirWithPoolTxns, + tdirWithDomainTxns, + poolTxnNodeNames, + tdirWithNodeKeepInited, + tdir, + tconf, + allPluginsPath): + node_name = poolTxnNodeNames[0] + config_helper = PNodeConfigHelper(node_name, tconf, chroot=tdir) + node = TestNode( + node_name, + config_helper=config_helper, + config=tconf, + pluginPaths=allPluginsPath) + yield node + node.onStopping() # TODO stop won't call onStopping as we are in Stopped state diff --git a/plenum/test/node/test_api.py b/plenum/test/node/test_api.py index 63c91af42c..da9a521f57 100644 --- a/plenum/test/node/test_api.py +++ b/plenum/test/node/test_api.py @@ -1,33 +1,10 @@ import pytest from common.exceptions import LogicError -from plenum.test.test_node import TestNode from plenum.common.constants import TXN_TYPE -from plenum.common.config_helper import PNodeConfigHelper from plenum.common.request import Request -@pytest.fixture(scope='function') -def test_node( - tdirWithPoolTxns, - tdirWithDomainTxns, - poolTxnNodeNames, - tdirWithNodeKeepInited, - tdir, - tconf, - allPluginsPath): - - node_name = poolTxnNodeNames[0] - config_helper = PNodeConfigHelper(node_name, tconf, chroot=tdir) - node = TestNode( - node_name, - config_helper=config_helper, - config=tconf, - pluginPaths=allPluginsPath) - yield node - node.onStopping() # TODO stop won't call onStopping as we are in Stopped state - - def test_on_view_change_complete_fails(test_node): with pytest.raises(LogicError) as excinfo: test_node.on_view_change_complete() diff --git a/plenum/test/view_change/test_client_req_during_view_change.py b/plenum/test/view_change/test_client_req_during_view_change.py index 3684631971..7c78b84196 100644 --- a/plenum/test/view_change/test_client_req_during_view_change.py +++ b/plenum/test/view_change/test_client_req_during_view_change.py @@ -1,12 +1,24 @@ +import functools + import pytest +from plenum.common.constants import NODE, TXN_TYPE, GET_TXN from plenum.common.exceptions import RequestNackedException from plenum.test.helper import sdk_send_random_and_check, \ sdk_send_random_requests, sdk_get_and_check_replies, sdk_gen_request, \ checkDiscardMsg +from plenum.test.pool_transactions.helper import sdk_build_get_txn_request, sdk_sign_and_send_prepared_request +from plenum.test.testing_utils import FakeSomething + + +@pytest.fixture(scope='function') +def test_node(test_node): + test_node.view_changer = FakeSomething(view_change_in_progress=True, + view_no=1) + return test_node -def test_client_msg_discard_in_view_change_integration(txnPoolNodeSet, +def test_client_write_request_discard_in_view_change_integration(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): @@ -26,24 +38,55 @@ def test_client_msg_discard_in_view_change_integration(txnPoolNodeSet, "change is in progress" in e.args[0] -def test_client_msg_discard_in_view_change_with_dict(txnPoolNodeSet): - node = txnPoolNodeSet[0] - node.view_changer.view_change_in_progress = True - node.send_nack_to_client = check_nack_msg +def test_client_get_request_not_discard_in_view_change_integration(txnPoolNodeSet, + looper, + sdk_pool_handle, + sdk_wallet_client): + ''' + Check that client requests sent in view change will discard. + ''' + for node in txnPoolNodeSet: + node.view_changer.view_change_in_progress = True + _, steward_did = sdk_wallet_client + request = sdk_build_get_txn_request(looper, steward_did, 1) + + sdk_request = sdk_sign_and_send_prepared_request(looper, + sdk_wallet_client, + sdk_pool_handle, + request) + sdk_get_and_check_replies(looper, [sdk_request]) + + +def test_client_write_request_discard_in_view_change_with_dict(test_node): + test_node.send_nack_to_client = check_nack_msg + + msg = sdk_gen_request({TXN_TYPE: NODE}).as_dict + test_node.unpackClientMsg(msg, "frm") + checkDiscardMsg([test_node, ], msg, "view change in progress") + + +def test_client_get_request_not_discard_in_view_change_with_dict(test_node): + sender = "frm" + msg = sdk_gen_request({TXN_TYPE: GET_TXN}).as_dict + + def post_to_client_in_box(received_msg, received_frm): + assert received_frm == sender + assert received_msg == msg + test_node.postToClientInBox = post_to_client_in_box + + def discard(received_msg, reason, logLevel): + assert False, "Message {} was discard with '{}'".format(received_msg, reason) + test_node.discard = discard - msg = sdk_gen_request("op").as_dict - node.unpackClientMsg(msg, "frm") - checkDiscardMsg([node, ], msg, "view change in progress") + test_node.unpackClientMsg(msg, sender) -def test_client_msg_discard_in_view_change_with_request(txnPoolNodeSet): - node = txnPoolNodeSet[0] - node.view_changer.view_change_in_progress = True - node.send_nack_to_client = check_nack_msg +def test_client_msg_discard_in_view_change_with_request(test_node): + test_node.send_nack_to_client = check_nack_msg - msg = sdk_gen_request("op") - node.unpackClientMsg(msg, "frm") - checkDiscardMsg([node, ], msg.as_dict, "view change in progress") + msg = sdk_gen_request({TXN_TYPE: NODE}) + test_node.unpackClientMsg(msg, "frm") + checkDiscardMsg([test_node, ], msg.as_dict, "view change in progress") def check_nack_msg(req_key, reason, to_client): From 32732f9d9a2ed16774bec7d0907f8ecc21e428df Mon Sep 17 00:00:00 2001 From: toktar Date: Thu, 6 Dec 2018 18:54:35 +0300 Subject: [PATCH 048/100] INDY-1896: test refactoring Signed-off-by: toktar --- plenum/test/view_change/test_client_req_during_view_change.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/plenum/test/view_change/test_client_req_during_view_change.py b/plenum/test/view_change/test_client_req_during_view_change.py index 7c78b84196..c45737e1fa 100644 --- a/plenum/test/view_change/test_client_req_during_view_change.py +++ b/plenum/test/view_change/test_client_req_during_view_change.py @@ -1,5 +1,3 @@ -import functools - import pytest from plenum.common.constants import NODE, TXN_TYPE, GET_TXN From 0d86b62a8042c940a06191d87a68c91480b180a4 Mon Sep 17 00:00:00 2001 From: toktar Date: Fri, 7 Dec 2018 14:52:16 +0300 Subject: [PATCH 049/100] INDY-1896: tests refactoring Signed-off-by: toktar --- .../test_client_req_during_view_change.py | 45 +------------------ ...ient_req_during_view_change_integration.py | 45 +++++++++++++++++++ 2 files changed, 46 insertions(+), 44 deletions(-) create mode 100644 plenum/test/view_change/test_client_req_during_view_change_integration.py diff --git a/plenum/test/view_change/test_client_req_during_view_change.py b/plenum/test/view_change/test_client_req_during_view_change.py index c45737e1fa..ea5e3ae02c 100644 --- a/plenum/test/view_change/test_client_req_during_view_change.py +++ b/plenum/test/view_change/test_client_req_during_view_change.py @@ -1,11 +1,7 @@ import pytest from plenum.common.constants import NODE, TXN_TYPE, GET_TXN -from plenum.common.exceptions import RequestNackedException -from plenum.test.helper import sdk_send_random_and_check, \ - sdk_send_random_requests, sdk_get_and_check_replies, sdk_gen_request, \ - checkDiscardMsg -from plenum.test.pool_transactions.helper import sdk_build_get_txn_request, sdk_sign_and_send_prepared_request +from plenum.test.helper import sdk_gen_request, checkDiscardMsg from plenum.test.testing_utils import FakeSomething @@ -16,45 +12,6 @@ def test_node(test_node): return test_node -def test_client_write_request_discard_in_view_change_integration(txnPoolNodeSet, - looper, - sdk_pool_handle, - sdk_wallet_client): - ''' - Check that client requests sent in view change will discard. - ''' - sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, - sdk_wallet_client, 4) - - for node in txnPoolNodeSet: - node.view_changer.view_change_in_progress = True - discard_reqs = sdk_send_random_requests(looper, sdk_pool_handle, - sdk_wallet_client, 1) - with pytest.raises(RequestNackedException) as e: - sdk_get_and_check_replies(looper, discard_reqs) - assert "Client request is discarded since view " \ - "change is in progress" in e.args[0] - - -def test_client_get_request_not_discard_in_view_change_integration(txnPoolNodeSet, - looper, - sdk_pool_handle, - sdk_wallet_client): - ''' - Check that client requests sent in view change will discard. - ''' - for node in txnPoolNodeSet: - node.view_changer.view_change_in_progress = True - _, steward_did = sdk_wallet_client - request = sdk_build_get_txn_request(looper, steward_did, 1) - - sdk_request = sdk_sign_and_send_prepared_request(looper, - sdk_wallet_client, - sdk_pool_handle, - request) - sdk_get_and_check_replies(looper, [sdk_request]) - - def test_client_write_request_discard_in_view_change_with_dict(test_node): test_node.send_nack_to_client = check_nack_msg diff --git a/plenum/test/view_change/test_client_req_during_view_change_integration.py b/plenum/test/view_change/test_client_req_during_view_change_integration.py new file mode 100644 index 0000000000..27d67e44b2 --- /dev/null +++ b/plenum/test/view_change/test_client_req_during_view_change_integration.py @@ -0,0 +1,45 @@ +import pytest + +from plenum.common.exceptions import RequestNackedException +from plenum.test.helper import sdk_send_random_and_check, \ + sdk_send_random_requests, sdk_get_and_check_replies +from plenum.test.pool_transactions.helper import sdk_build_get_txn_request, sdk_sign_and_send_prepared_request + + +def test_client_write_request_discard_in_view_change_integration(txnPoolNodeSet, + looper, + sdk_pool_handle, + sdk_wallet_client): + ''' + Check that client requests sent in view change will discard. + ''' + sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, + sdk_wallet_client, 4) + + for node in txnPoolNodeSet: + node.view_changer.view_change_in_progress = True + discard_reqs = sdk_send_random_requests(looper, sdk_pool_handle, + sdk_wallet_client, 1) + with pytest.raises(RequestNackedException) as e: + sdk_get_and_check_replies(looper, discard_reqs) + assert "Client request is discarded since view " \ + "change is in progress" in e.args[0] + + +def test_client_get_request_not_discard_in_view_change_integration(txnPoolNodeSet, + looper, + sdk_pool_handle, + sdk_wallet_client): + ''' + Check that client requests sent in view change will discard. + ''' + for node in txnPoolNodeSet: + node.view_changer.view_change_in_progress = True + _, steward_did = sdk_wallet_client + request = sdk_build_get_txn_request(looper, steward_did, 1) + + sdk_request = sdk_sign_and_send_prepared_request(looper, + sdk_wallet_client, + sdk_pool_handle, + request) + sdk_get_and_check_replies(looper, [sdk_request]) From da4b2ae132d6ddd02310455175bf83e43b55bef5 Mon Sep 17 00:00:00 2001 From: toktar Date: Mon, 10 Dec 2018 11:26:30 +0300 Subject: [PATCH 050/100] INDY-1879: add test that read requests are not discarded in view change Signed-off-by: toktar --- plenum/server/node.py | 16 +++++++++----- plenum/test/conftest.py | 15 ++++++------- .../test_client_req_during_view_change.py | 22 ++++++++++++++++++- 3 files changed, 38 insertions(+), 15 deletions(-) diff --git a/plenum/server/node.py b/plenum/server/node.py index 9f81911ce4..5bbbf8be43 100644 --- a/plenum/server/node.py +++ b/plenum/server/node.py @@ -1979,13 +1979,11 @@ def unpackClientMsg(self, msg, frm): else: msg_dict = msg.as_dict if isinstance(msg, Request) else msg if isinstance(msg_dict, dict): - txn_type = msg_dict.get(OPERATION).get(TXN_TYPE, False) \ + txn_type = msg_dict.get(OPERATION).get(TXN_TYPE, None) \ if OPERATION in msg_dict \ - else False - txn_need_quorum = txn_type and not (txn_type == GET_TXN or - self.is_action(txn_type) or - self.is_query(txn_type)) - if self.view_changer.view_change_in_progress and txn_need_quorum: + else None + + if self.view_changer.view_change_in_progress and self.is_txn_need_quorum(txn_type): self.discard(msg_dict, reason="view change in progress", logMethod=logger.debug) @@ -3694,3 +3692,9 @@ def check_outdated_reqs(self): if outdated: self._clean_req_from_verified(req_state.request) self.requests.pop(req_key) + self.doneProcessingReq(req_key) + + def is_txn_need_quorum(self, txn_type): + return txn_type and not (txn_type == GET_TXN or + self.is_action(txn_type) or + self.is_query(txn_type)) diff --git a/plenum/test/conftest.py b/plenum/test/conftest.py index d2518eb150..bde51d9f10 100644 --- a/plenum/test/conftest.py +++ b/plenum/test/conftest.py @@ -1137,14 +1137,13 @@ def one_replica_and_others_in_backup_instance( @pytest.fixture(scope='function') -def test_node( - tdirWithPoolTxns, - tdirWithDomainTxns, - poolTxnNodeNames, - tdirWithNodeKeepInited, - tdir, - tconf, - allPluginsPath): +def test_node(tdirWithPoolTxns, + tdirWithDomainTxns, + poolTxnNodeNames, + tdirWithNodeKeepInited, + tdir, + tconf, + allPluginsPath): node_name = poolTxnNodeNames[0] config_helper = PNodeConfigHelper(node_name, tconf, chroot=tdir) node = TestNode( diff --git a/plenum/test/view_change/test_client_req_during_view_change.py b/plenum/test/view_change/test_client_req_during_view_change.py index ea5e3ae02c..bf030a9e1f 100644 --- a/plenum/test/view_change/test_client_req_during_view_change.py +++ b/plenum/test/view_change/test_client_req_during_view_change.py @@ -1,7 +1,8 @@ import pytest -from plenum.common.constants import NODE, TXN_TYPE, GET_TXN +from plenum.common.constants import NODE, TXN_TYPE, GET_TXN, CONFIG_LEDGER_ID from plenum.test.helper import sdk_gen_request, checkDiscardMsg +from plenum.test.test_config_req_handler import READ_CONF, TestConfigReqHandler from plenum.test.testing_utils import FakeSomething @@ -9,6 +10,9 @@ def test_node(test_node): test_node.view_changer = FakeSomething(view_change_in_progress=True, view_no=1) + test_node.getConfigReqHandler = lambda: TestConfigReqHandler(test_node.configLedger, + test_node.states[CONFIG_LEDGER_ID]) + test_node.setup_config_req_handler() return test_node @@ -36,6 +40,22 @@ def discard(received_msg, reason, logLevel): test_node.unpackClientMsg(msg, sender) +def test_client_read_request_not_discard_in_view_change_with_dict(test_node): + sender = "frm" + msg = sdk_gen_request({TXN_TYPE: READ_CONF}).as_dict + + def post_to_client_in_box(received_msg, received_frm): + assert received_frm == sender + assert received_msg == msg + test_node.postToClientInBox = post_to_client_in_box + + def discard(received_msg, reason, logLevel): + assert False, "Message {} was discard with '{}'".format(received_msg, reason) + test_node.discard = discard + + test_node.unpackClientMsg(msg, sender) + + def test_client_msg_discard_in_view_change_with_request(test_node): test_node.send_nack_to_client = check_nack_msg From a9fae3eafc0eebd34405cf2df5feb9fc9ec2a955 Mon Sep 17 00:00:00 2001 From: toktar Date: Mon, 10 Dec 2018 12:26:08 +0300 Subject: [PATCH 051/100] INDY-1896: refactoring is_request_need_quorum Signed-off-by: toktar --- plenum/server/node.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/plenum/server/node.py b/plenum/server/node.py index 5bbbf8be43..90035ec455 100644 --- a/plenum/server/node.py +++ b/plenum/server/node.py @@ -1979,11 +1979,7 @@ def unpackClientMsg(self, msg, frm): else: msg_dict = msg.as_dict if isinstance(msg, Request) else msg if isinstance(msg_dict, dict): - txn_type = msg_dict.get(OPERATION).get(TXN_TYPE, None) \ - if OPERATION in msg_dict \ - else None - - if self.view_changer.view_change_in_progress and self.is_txn_need_quorum(txn_type): + if self.view_changer.view_change_in_progress and self.is_request_need_quorum(msg_dict): self.discard(msg_dict, reason="view change in progress", logMethod=logger.debug) @@ -3694,7 +3690,11 @@ def check_outdated_reqs(self): self.requests.pop(req_key) self.doneProcessingReq(req_key) - def is_txn_need_quorum(self, txn_type): + def is_request_need_quorum(self, msg_dict: dict): + txn_type = msg_dict.get(OPERATION).get(TXN_TYPE, None) \ + if OPERATION in msg_dict \ + else None + return txn_type and not (txn_type == GET_TXN or self.is_action(txn_type) or self.is_query(txn_type)) From 3077a8751b5538aa19adbbc220b4f8e55e7e1fff Mon Sep 17 00:00:00 2001 From: toktar Date: Thu, 13 Dec 2018 17:23:52 +0300 Subject: [PATCH 052/100] INDY-1909: add old instance change messages discarding Signed-off-by: toktar --- plenum/config.py | 3 + plenum/server/models.py | 102 +++++++++++------- plenum/server/view_change/view_changer.py | 10 +- .../test_instance_change_from_unknown.py | 4 +- .../test_old_instance_change_discarding.py | 42 ++++++++ .../test_prepare_in_queue_before_vc.py | 3 +- .../test_view_change_happens_post_timeout.py | 2 +- ...ange_with_different_prepare_certificate.py | 4 +- 8 files changed, 121 insertions(+), 49 deletions(-) create mode 100644 plenum/test/view_change/test_old_instance_change_discarding.py diff --git a/plenum/config.py b/plenum/config.py index 9b980c8281..88c4a551cb 100644 --- a/plenum/config.py +++ b/plenum/config.py @@ -374,3 +374,6 @@ OUTDATED_REQS_CHECK_INTERVAL = 30 # seconds PROPAGATES_PHASE_REQ_TIMEOUT = 36000 # seconds ORDERING_PHASE_REQ_TIMEOUT = 72000 # seconds + +# Timeout factor after which an InstanceChange message are removed (0 to turn off) +OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL = 300 # seconds diff --git a/plenum/server/models.py b/plenum/server/models.py index 08d843289f..e7c3ed65e1 100644 --- a/plenum/server/models.py +++ b/plenum/server/models.py @@ -1,7 +1,8 @@ """ Some model objects used in Plenum protocol. """ -from typing import NamedTuple, Set, Optional, Any +import time +from typing import NamedTuple, Set, Optional, Any, Dict from plenum.common.messages.node_messages import Prepare, Commit @@ -12,29 +13,29 @@ class TrackedMsgs(dict): - def getKey(self, msg): + def _get_key(self, msg): raise NotImplementedError - def newVoteMsg(self, msg): + def _new_vote_msg(self, msg): return ThreePhaseVotes(voters=set(), msg=msg) - def addMsg(self, msg, voter: str): - key = self.getKey(msg) + def _add_msg(self, msg, voter: str): + key = self._get_key(msg) if key not in self: - self[key] = self.newVoteMsg(msg) + self[key] = self._new_vote_msg(msg) self[key].voters.add(voter) - def hasMsg(self, msg) -> bool: - key = self.getKey(msg) + def _has_msg(self, msg) -> bool: + key = self._get_key(msg) return key in self - def hasVote(self, msg, voter: str) -> bool: - key = self.getKey(msg) + def _has_vote(self, msg, voter: str) -> bool: + key = self._get_key(msg) return key in self and voter in self[key].voters - def hasEnoughVotes(self, msg, count) -> bool: - key = self.getKey(msg) - return self.hasMsg(msg) and len(self[key].voters) >= count + def _has_enough_votes(self, msg, count) -> bool: + key = self._get_key(msg) + return self._has_msg(msg) and len(self[key].voters) >= count class Prepares(TrackedMsgs): @@ -46,7 +47,7 @@ class Prepares(TrackedMsgs): (viewNo, seqNo) -> (digest, {senders}) """ - def getKey(self, prepare): + def _get_key(self, prepare): return prepare.viewNo, prepare.ppSeqNo # noinspection PyMethodMayBeStatic @@ -58,18 +59,18 @@ def addVote(self, prepare: Prepare, voter: str) -> None: :param prepare: the PREPARE to add to the list :param voter: the name of the node who sent the PREPARE """ - self.addMsg(prepare, voter) + self._add_msg(prepare, voter) # noinspection PyMethodMayBeStatic def hasPrepare(self, prepare: Prepare) -> bool: - return super().hasMsg(prepare) + return super()._has_msg(prepare) # noinspection PyMethodMayBeStatic def hasPrepareFrom(self, prepare: Prepare, voter: str) -> bool: - return super().hasVote(prepare, voter) + return super()._has_vote(prepare, voter) def hasQuorum(self, prepare: Prepare, quorum: int) -> bool: - return self.hasEnoughVotes(prepare, quorum) + return self._has_enough_votes(prepare, quorum) class Commits(TrackedMsgs): @@ -80,7 +81,7 @@ class Commits(TrackedMsgs): replica names in case of multiple protocol instances) """ - def getKey(self, commit): + def _get_key(self, commit): return commit.viewNo, commit.ppSeqNo # noinspection PyMethodMayBeStatic @@ -92,18 +93,23 @@ def addVote(self, commit: Commit, voter: str) -> None: :param commit: the COMMIT to add to the list :param voter: the name of the replica who sent the COMMIT """ - super().addMsg(commit, voter) + super()._add_msg(commit, voter) # noinspection PyMethodMayBeStatic def hasCommit(self, commit: Commit) -> bool: - return super().hasMsg(commit) + return super()._has_msg(commit) # noinspection PyMethodMayBeStatic def hasCommitFrom(self, commit: Commit, voter: str) -> bool: - return super().hasVote(commit, voter) + return super()._has_vote(commit, voter) def hasQuorum(self, commit: Commit, quorum: int) -> bool: - return self.hasEnoughVotes(commit, quorum) + return self._has_enough_votes(commit, quorum) + + +InstanceChangesVotes = NamedTuple("InstanceChangesVotes", [ + ("voters", Dict[str, int]), + ("msg", Optional[Any])]) class InstanceChanges(TrackedMsgs): @@ -116,20 +122,42 @@ class InstanceChanges(TrackedMsgs): that can trigger a view change as equal """ - def getKey(self, msg): - return msg if isinstance(msg, int) else msg.viewNo - - # noinspection PyMethodMayBeStatic - def addVote(self, msg: int, voter: str): - super().addMsg(msg, voter) + def __init__(self, config) -> None: + self.outdated_ic_interval = \ + config.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL + super().__init__() - # noinspection PyMethodMayBeStatic - def hasView(self, viewNo: int) -> bool: - return super().hasMsg(viewNo) + def _new_vote_msg(self, msg): + return InstanceChangesVotes(voters=dict(), msg=msg) - # noinspection PyMethodMayBeStatic - def hasInstChngFrom(self, viewNo: int, voter: str) -> bool: - return super().hasVote(viewNo, voter) + def _get_key(self, msg): + return msg if isinstance(msg, int) else msg.viewNo - def hasQuorum(self, viewNo: int, quorum: int) -> bool: - return self.hasEnoughVotes(viewNo, quorum) + def add_vote(self, msg, voter: str): + # This method can't use _add_message() because + # the voters collection is a dict. + key = self._get_key(msg) + if key not in self: + self[key] = self._new_vote_msg(msg) + self[key].voters[voter] = time.perf_counter() + + def has_view(self, view_no: int) -> bool: + self._update_votes(view_no) + return super()._has_msg(view_no) + + def has_inst_chng_from(self, view_no: int, voter: str) -> bool: + self._update_votes(view_no) + return super()._has_vote(view_no, voter) + + def has_quorum(self, view_no: int, quorum: int) -> bool: + self._update_votes(view_no) + return self._has_enough_votes(view_no, quorum) + + def _update_votes(self, view_no: int): + if self.outdated_ic_interval <= 0 or view_no not in self: + return + for voter, vote_time in dict(self[view_no].voters).items(): + if vote_time < time.perf_counter() - self.outdated_ic_interval: + del self[view_no].voters[voter] + if not self[view_no].voters: + del self[view_no] diff --git a/plenum/server/view_change/view_changer.py b/plenum/server/view_change/view_changer.py index 45332f4bdb..8d73393696 100644 --- a/plenum/server/view_change/view_changer.py +++ b/plenum/server/view_change/view_changer.py @@ -47,7 +47,7 @@ def __init__(self, node): (FutureViewChangeDone, self.process_future_view_vchd_msg) ) - self.instanceChanges = InstanceChanges() + self.instanceChanges = InstanceChanges(node.config) # The quorum of `ViewChangeDone` msgs is different depending on whether we're doing a real view change, # or just propagating view_no and Primary from `CurrentState` messages sent to a newly joined Node. @@ -354,7 +354,7 @@ def process_instance_change_msg(self, instChg: InstanceChange, frm: str) -> None # found then change view even if master not degraded self._on_verified_instance_change_msg(instChg, frm) - if self.instanceChanges.hasInstChngFrom(instChg.viewNo, self.name): + if self.instanceChanges.has_inst_chng_from(instChg.viewNo, self.name): logger.info("{} received instance change message {} but has already " "sent an instance change message".format(self, instChg)) elif not self.node.monitor.isMasterDegraded(): @@ -466,8 +466,8 @@ def _create_instance_change_msg(self, view_no, suspicion_code): def _on_verified_instance_change_msg(self, msg, frm): view_no = msg.viewNo - if not self.instanceChanges.hasInstChngFrom(view_no, frm): - self.instanceChanges.addVote(msg, frm) + if not self.instanceChanges.has_inst_chng_from(view_no, frm): + self.instanceChanges.add_vote(msg, frm) if view_no > self.view_no: self.do_view_change_if_possible(view_no) @@ -507,7 +507,7 @@ def _canViewChange(self, proposedViewNo: int) -> (bool, str): """ msg = None quorum = self.quorums.view_change.value - if not self.instanceChanges.hasQuorum(proposedViewNo, quorum): + if not self.instanceChanges.has_quorum(proposedViewNo, quorum): msg = '{} has no quorum for view {}'.format(self, proposedViewNo) elif not proposedViewNo > self.view_no: msg = '{} is in higher view more than {}'.format( diff --git a/plenum/test/view_change/test_instance_change_from_unknown.py b/plenum/test/view_change/test_instance_change_from_unknown.py index fa5c806300..5793ea8654 100644 --- a/plenum/test/view_change/test_instance_change_from_unknown.py +++ b/plenum/test/view_change/test_instance_change_from_unknown.py @@ -6,7 +6,7 @@ def test_instance_change_from_known(fake_view_changer): frm = list(fake_view_changer.node.nodestack.connecteds)[0] fake_view_changer.process_instance_change_msg(ic_msg, frm=frm) - assert fake_view_changer.instanceChanges.hasInstChngFrom(proposed_view, frm) + assert fake_view_changer.instanceChanges.has_inst_chng_from(proposed_view, frm) def test_instance_change_from_unknown(fake_view_changer): @@ -17,4 +17,4 @@ def test_instance_change_from_unknown(fake_view_changer): frm = b'SomeUnknownNode' fake_view_changer.process_instance_change_msg(ic_msg, frm=frm) - assert not fake_view_changer.instanceChanges.hasInstChngFrom(proposed_view, frm) + assert not fake_view_changer.instanceChanges.has_inst_chng_from(proposed_view, frm) diff --git a/plenum/test/view_change/test_old_instance_change_discarding.py b/plenum/test/view_change/test_old_instance_change_discarding.py new file mode 100644 index 0000000000..e0afa29db8 --- /dev/null +++ b/plenum/test/view_change/test_old_instance_change_discarding.py @@ -0,0 +1,42 @@ +import pytest + +from plenum.test.node_catchup.helper import ensure_all_nodes_have_same_data +from plenum.test.test_node import ensureElectionsDone +from stp_core.loop.eventually import eventually + + +@pytest.fixture(scope="module") +def tconf(tconf): + old_interval = tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL + tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL = 0 + yield tconf + + tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL = old_interval + + +def test_old_instance_change_discarding(txnPoolNodeSet, + looper, + tconf): + view_no = txnPoolNodeSet[0].viewNo + first_nodes = txnPoolNodeSet[:2] + second_nodes = txnPoolNodeSet[2:] + + for node in first_nodes: + node.view_changer.on_master_degradation() + + def chk_ic_discard(): + for n in txnPoolNodeSet: + assert not n.view_changer.instanceChanges.has_view(view_no + 1) + for frm in first_nodes: + assert not n.view_changer.instanceChanges.has_inst_chng_from(view_no + 1, frm.name) + + looper.run(eventually(chk_ic_discard, + timeout=tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL)) + + for node in second_nodes: + node.view_changer.on_master_degradation() + + ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) + ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) + for node in txnPoolNodeSet: + assert node.viewNo == view_no diff --git a/plenum/test/view_change/test_prepare_in_queue_before_vc.py b/plenum/test/view_change/test_prepare_in_queue_before_vc.py index f0a5931a6c..5a64b5bd69 100644 --- a/plenum/test/view_change/test_prepare_in_queue_before_vc.py +++ b/plenum/test/view_change/test_prepare_in_queue_before_vc.py @@ -74,8 +74,7 @@ def test_prepare_in_queue_before_vc(looper, """ def chk_quorumed_prepares_count(prepares, count): pp_qourum = slow_node.quorums.prepare.value - assert len([pp for key, pp in prepares.items() if prepares.hasQuorum(pp.msg, pp_qourum)]) == count - + assert len([pp for key, pp in prepares.items() if prepares.has_quorum(pp.msg, pp_qourum)]) == count def patched_startViewChange(self, *args, **kwargs): self.node.processNodeInBox = functools.partial(TestNode.processNodeInBox, self.node) diff --git a/plenum/test/view_change/test_view_change_happens_post_timeout.py b/plenum/test/view_change/test_view_change_happens_post_timeout.py index 9f92381786..8f6d137677 100644 --- a/plenum/test/view_change/test_view_change_happens_post_timeout.py +++ b/plenum/test/view_change/test_view_change_happens_post_timeout.py @@ -15,7 +15,7 @@ def reset(): def is_instance_change_sent_for_view_no(node, view_no): - return node.view_changer.instanceChanges.hasView(view_no) + return node.view_changer.instanceChanges.has_view(view_no) def test_instance_change_happens_post_timeout(tconf, looper, txnPoolNodeSet): diff --git a/plenum/test/view_change_with_delays/test_view_change_with_different_prepare_certificate.py b/plenum/test/view_change_with_delays/test_view_change_with_different_prepare_certificate.py index a993d4ba15..2c93b36237 100644 --- a/plenum/test/view_change_with_delays/test_view_change_with_different_prepare_certificate.py +++ b/plenum/test/view_change_with_delays/test_view_change_with_different_prepare_certificate.py @@ -14,8 +14,8 @@ def check_prepare_certificate(nodes, ppSeqNo): for node in nodes: key = (node.viewNo, ppSeqNo) quorum = node.master_replica.quorums.prepare.value - assert node.master_replica.prepares.hasQuorum(ThreePhaseKey(*key), - quorum) + assert node.master_replica.prepares.has_quorum(ThreePhaseKey(*key), + quorum) def test_view_change_with_different_prepare_certificate(looper, txnPoolNodeSet, From 019ca9076db57b3df5e750d053754a7adec1cfde Mon Sep 17 00:00:00 2001 From: toktar Date: Thu, 13 Dec 2018 18:10:48 +0300 Subject: [PATCH 053/100] INDY-1909: add unit tests for models.py Signed-off-by: toktar --- plenum/server/models.py | 6 +- plenum/test/server/__init__.py | 0 plenum/test/server/test_models.py | 71 +++++++++++++++++++ .../test_old_instance_change_discarding.py | 2 +- 4 files changed, 75 insertions(+), 4 deletions(-) create mode 100644 plenum/test/server/__init__.py create mode 100644 plenum/test/server/test_models.py diff --git a/plenum/server/models.py b/plenum/server/models.py index e7c3ed65e1..84f512a2fe 100644 --- a/plenum/server/models.py +++ b/plenum/server/models.py @@ -123,7 +123,7 @@ class InstanceChanges(TrackedMsgs): """ def __init__(self, config) -> None: - self.outdated_ic_interval = \ + self._outdated_ic_interval = \ config.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL super().__init__() @@ -154,10 +154,10 @@ def has_quorum(self, view_no: int, quorum: int) -> bool: return self._has_enough_votes(view_no, quorum) def _update_votes(self, view_no: int): - if self.outdated_ic_interval <= 0 or view_no not in self: + if self._outdated_ic_interval <= 0 or view_no not in self: return for voter, vote_time in dict(self[view_no].voters).items(): - if vote_time < time.perf_counter() - self.outdated_ic_interval: + if vote_time < time.perf_counter() - self._outdated_ic_interval: del self[view_no].voters[voter] if not self[view_no].voters: del self[view_no] diff --git a/plenum/test/server/__init__.py b/plenum/test/server/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/plenum/test/server/test_models.py b/plenum/test/server/test_models.py new file mode 100644 index 0000000000..8556f38ac5 --- /dev/null +++ b/plenum/test/server/test_models.py @@ -0,0 +1,71 @@ +import pytest + +from plenum.common.messages.node_messages import InstanceChange +from plenum.server.models import InstanceChanges +from plenum.server.suspicion_codes import Suspicions +from stp_core.loop.eventually import eventually + + +@pytest.fixture(scope="function") +def instance_changes(tconf): + return InstanceChanges(tconf) + + +@pytest.fixture(scope="module") +def tconf(tconf): + old_interval = tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL + tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL = 10 + yield tconf + + tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL = old_interval + + +def test_add_vote(instance_changes): + frm = "Node1" + view_no = 1 + msg = InstanceChange(view_no, Suspicions.PRIMARY_DEGRADED.code) + instance_changes.add_vote(msg, frm) + assert instance_changes[view_no].msg == msg + assert instance_changes[view_no].voters[frm] + + +def test_has_view(instance_changes): + frm = "Node1" + view_no = 1 + msg = InstanceChange(view_no, Suspicions.PRIMARY_DEGRADED.code) + instance_changes.add_vote(msg, frm) + assert instance_changes.has_view(view_no) + + +def test_has_inst_chng_from(instance_changes): + frm = "Node1" + view_no = 1 + msg = InstanceChange(view_no, Suspicions.PRIMARY_DEGRADED.code) + instance_changes.add_vote(msg, frm) + assert instance_changes.has_inst_chng_from(view_no, frm) + + +def test_has_quorum(instance_changes): + quorum = 2 + view_no = 1 + + assert not instance_changes.has_quorum(view_no, quorum) + for i in range(quorum): + instance_changes.add_vote(InstanceChange(view_no, Suspicions.PRIMARY_DEGRADED.code), + "Node{}".format(i)) + assert instance_changes.has_quorum(view_no, quorum) + + +def test_old_ic_discard(instance_changes, looper, tconf): + frm = "Node1" + view_no = 1 + quorum = 2 + msg = InstanceChange(view_no, Suspicions.PRIMARY_DEGRADED.code) + instance_changes.add_vote(msg, frm) + + def chk_ic_discard(): + assert not instance_changes.has_view(view_no) + assert not instance_changes.has_inst_chng_from(view_no, frm) + assert not instance_changes.has_quorum(view_no, quorum) + looper.run(eventually(chk_ic_discard, + timeout=tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL)) diff --git a/plenum/test/view_change/test_old_instance_change_discarding.py b/plenum/test/view_change/test_old_instance_change_discarding.py index e0afa29db8..d14545cf70 100644 --- a/plenum/test/view_change/test_old_instance_change_discarding.py +++ b/plenum/test/view_change/test_old_instance_change_discarding.py @@ -8,7 +8,7 @@ @pytest.fixture(scope="module") def tconf(tconf): old_interval = tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL - tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL = 0 + tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL = 10 yield tconf tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL = old_interval From 9f9670fb3fc72fa5518222d339ec478e38ec19f1 Mon Sep 17 00:00:00 2001 From: toktar Date: Fri, 14 Dec 2018 11:53:56 +0300 Subject: [PATCH 054/100] INDY-1909: change InstanceChanges discarding interval to 2 hours. Changes: - change an interval - fix tests Signed-off-by: toktar --- plenum/config.py | 2 +- plenum/test/view_change/test_old_instance_change_discarding.py | 2 +- plenum/test/view_change/test_prepare_in_queue_before_vc.py | 2 +- .../test_view_change_with_different_prepare_certificate.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/plenum/config.py b/plenum/config.py index 88c4a551cb..a0940c3ea3 100644 --- a/plenum/config.py +++ b/plenum/config.py @@ -376,4 +376,4 @@ ORDERING_PHASE_REQ_TIMEOUT = 72000 # seconds # Timeout factor after which an InstanceChange message are removed (0 to turn off) -OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL = 300 # seconds +OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL = 7200 # seconds diff --git a/plenum/test/view_change/test_old_instance_change_discarding.py b/plenum/test/view_change/test_old_instance_change_discarding.py index d14545cf70..fba9bf5861 100644 --- a/plenum/test/view_change/test_old_instance_change_discarding.py +++ b/plenum/test/view_change/test_old_instance_change_discarding.py @@ -31,7 +31,7 @@ def chk_ic_discard(): assert not n.view_changer.instanceChanges.has_inst_chng_from(view_no + 1, frm.name) looper.run(eventually(chk_ic_discard, - timeout=tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL)) + timeout=tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL + 10)) for node in second_nodes: node.view_changer.on_master_degradation() diff --git a/plenum/test/view_change/test_prepare_in_queue_before_vc.py b/plenum/test/view_change/test_prepare_in_queue_before_vc.py index 5a64b5bd69..543ed21815 100644 --- a/plenum/test/view_change/test_prepare_in_queue_before_vc.py +++ b/plenum/test/view_change/test_prepare_in_queue_before_vc.py @@ -74,7 +74,7 @@ def test_prepare_in_queue_before_vc(looper, """ def chk_quorumed_prepares_count(prepares, count): pp_qourum = slow_node.quorums.prepare.value - assert len([pp for key, pp in prepares.items() if prepares.has_quorum(pp.msg, pp_qourum)]) == count + assert len([pp for key, pp in prepares.items() if prepares.hasQuorum(pp.msg, pp_qourum)]) == count def patched_startViewChange(self, *args, **kwargs): self.node.processNodeInBox = functools.partial(TestNode.processNodeInBox, self.node) diff --git a/plenum/test/view_change_with_delays/test_view_change_with_different_prepare_certificate.py b/plenum/test/view_change_with_delays/test_view_change_with_different_prepare_certificate.py index 2c93b36237..e00681d7d0 100644 --- a/plenum/test/view_change_with_delays/test_view_change_with_different_prepare_certificate.py +++ b/plenum/test/view_change_with_delays/test_view_change_with_different_prepare_certificate.py @@ -14,7 +14,7 @@ def check_prepare_certificate(nodes, ppSeqNo): for node in nodes: key = (node.viewNo, ppSeqNo) quorum = node.master_replica.quorums.prepare.value - assert node.master_replica.prepares.has_quorum(ThreePhaseKey(*key), + assert node.master_replica.prepares.hasQuorum(ThreePhaseKey(*key), quorum) From fdbdca264cd84e74ec95c4b834141672ef99dff9 Mon Sep 17 00:00:00 2001 From: toktar Date: Fri, 14 Dec 2018 15:42:45 +0300 Subject: [PATCH 055/100] INDY-1909: change time counter in InstanceChanges in models.py Changed: -add tests -change time counter Signed-off-by: toktar --- plenum/server/models.py | 16 +++- plenum/test/helper.py | 11 ++- plenum/test/metrics/helper.py | 8 -- plenum/test/metrics/test_metrics_collector.py | 3 +- plenum/test/metrics/test_metrics_stats.py | 3 +- plenum/test/server/test_models.py | 75 +++++++++++++------ 6 files changed, 78 insertions(+), 38 deletions(-) diff --git a/plenum/server/models.py b/plenum/server/models.py index 84f512a2fe..6d81f5064a 100644 --- a/plenum/server/models.py +++ b/plenum/server/models.py @@ -2,9 +2,12 @@ Some model objects used in Plenum protocol. """ import time -from typing import NamedTuple, Set, Optional, Any, Dict +from typing import NamedTuple, Set, Optional, Any, Dict, Callable from plenum.common.messages.node_messages import Prepare, Commit +from stp_core.common.log import getlogger + +logger = getlogger() ThreePhaseVotes = NamedTuple("ThreePhaseVotes", [ ("voters", Set[str]), @@ -122,9 +125,10 @@ class InstanceChanges(TrackedMsgs): that can trigger a view change as equal """ - def __init__(self, config) -> None: + def __init__(self, config, time_provider: Callable = time.perf_counter) -> None: self._outdated_ic_interval = \ config.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL + self.time_provider = time_provider super().__init__() def _new_vote_msg(self, msg): @@ -139,7 +143,7 @@ def add_vote(self, msg, voter: str): key = self._get_key(msg) if key not in self: self[key] = self._new_vote_msg(msg) - self[key].voters[voter] = time.perf_counter() + self[key].voters[voter] = self.time_provider() def has_view(self, view_no: int) -> bool: self._update_votes(view_no) @@ -157,7 +161,11 @@ def _update_votes(self, view_no: int): if self._outdated_ic_interval <= 0 or view_no not in self: return for voter, vote_time in dict(self[view_no].voters).items(): - if vote_time < time.perf_counter() - self._outdated_ic_interval: + now = self.time_provider() + if vote_time < now - self._outdated_ic_interval: + logger.info("Discard InstanceChange from {} " + "because it is out of date (was received {}sec " + "ago)".format(voter, int(now - vote_time))) del self[view_no].voters[voter] if not self[view_no].voters: del self[view_no] diff --git a/plenum/test/helper.py b/plenum/test/helper.py index c49e654ef4..8ba11bf3b3 100644 --- a/plenum/test/helper.py +++ b/plenum/test/helper.py @@ -1,3 +1,4 @@ +from datetime import datetime import itertools import os import random @@ -1033,4 +1034,12 @@ def acc_monitor(tconf, acc_monitor_enabled=True, acc_monitor_timeout=3, acc_moni tconf.ACC_MONITOR_TIMEOUT = old_timeout tconf.ACC_MONITOR_TXN_DELTA_K = old_delta - tconf.ACC_MONITOR_ENABLED = old_acc_monitor_enabled \ No newline at end of file + tconf.ACC_MONITOR_ENABLED = old_acc_monitor_enabled + + +class MockTimestamp: + def __init__(self, value=datetime.utcnow()): + self.value = value + + def __call__(self): + return self.value diff --git a/plenum/test/metrics/helper.py b/plenum/test/metrics/helper.py index 08bc3bac30..02b3a5d88a 100644 --- a/plenum/test/metrics/helper.py +++ b/plenum/test/metrics/helper.py @@ -36,14 +36,6 @@ def generate_events(num: int, min_ts=None) -> List[MetricsEvent]: return result -class MockTimestamp: - def __init__(self, value=datetime.utcnow()): - self.value = value - - def __call__(self): - return self.value - - class MockEvent: def __init__(self, name, count, sum): self.name = name diff --git a/plenum/test/metrics/test_metrics_collector.py b/plenum/test/metrics/test_metrics_collector.py index 1c17cf0895..994df65c33 100644 --- a/plenum/test/metrics/test_metrics_collector.py +++ b/plenum/test/metrics/test_metrics_collector.py @@ -8,8 +8,9 @@ from plenum.common.metrics_collector import MetricsName, KvStoreMetricsCollector, KvStoreMetricsFormat, MetricsEvent, \ measure_time, async_measure_time from plenum.common.value_accumulator import ValueAccumulator -from plenum.test.metrics.helper import gen_next_timestamp, gen_metrics_name, generate_events, MockTimestamp, \ +from plenum.test.metrics.helper import gen_next_timestamp, gen_metrics_name, generate_events, \ MockMetricsCollector, MockEvent +from plenum.test.helper import MockTimestamp from storage.kv_store import KeyValueStorage diff --git a/plenum/test/metrics/test_metrics_stats.py b/plenum/test/metrics/test_metrics_stats.py index 40ca11a23c..73719d3889 100644 --- a/plenum/test/metrics/test_metrics_stats.py +++ b/plenum/test/metrics/test_metrics_stats.py @@ -6,7 +6,8 @@ from plenum.common.metrics_collector import KvStoreMetricsCollector, MetricsName from plenum.common.metrics_stats import trunc_ts, ValueAccumulator, MetricsStatsFrame, \ MetricsStats, load_metrics_from_kv_store -from plenum.test.metrics.helper import generate_events, MockTimestamp +from plenum.test.metrics.helper import generate_events +from plenum.test.helper import MockTimestamp def _metrics_stats_frame(events): diff --git a/plenum/test/server/test_models.py b/plenum/test/server/test_models.py index 8556f38ac5..067a721581 100644 --- a/plenum/test/server/test_models.py +++ b/plenum/test/server/test_models.py @@ -3,7 +3,7 @@ from plenum.common.messages.node_messages import InstanceChange from plenum.server.models import InstanceChanges from plenum.server.suspicion_codes import Suspicions -from stp_core.loop.eventually import eventually +from plenum.test.helper import MockTimestamp @pytest.fixture(scope="function") @@ -11,38 +11,60 @@ def instance_changes(tconf): return InstanceChanges(tconf) -@pytest.fixture(scope="module") -def tconf(tconf): - old_interval = tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL - tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL = 10 - yield tconf - - tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL = old_interval - - -def test_add_vote(instance_changes): +def test_add_first_vote(instance_changes): frm = "Node1" view_no = 1 msg = InstanceChange(view_no, Suspicions.PRIMARY_DEGRADED.code) + + assert view_no not in instance_changes + assert not instance_changes.has_view(view_no) + assert not instance_changes.has_inst_chng_from(view_no, frm) + instance_changes.add_vote(msg, frm) + assert instance_changes[view_no].msg == msg assert instance_changes[view_no].voters[frm] + assert instance_changes.has_view(view_no) + assert instance_changes.has_inst_chng_from(view_no, frm) -def test_has_view(instance_changes): +def test_equal_votes_dont_accumulate_when_added(instance_changes, tconf): frm = "Node1" view_no = 1 + time_provider = MockTimestamp(0) + second_vote_time = 1 + instance_changes = InstanceChanges(tconf, time_provider) msg = InstanceChange(view_no, Suspicions.PRIMARY_DEGRADED.code) + + instance_changes.add_vote(msg, frm) + time_provider.value = second_vote_time instance_changes.add_vote(msg, frm) - assert instance_changes.has_view(view_no) + assert instance_changes[view_no].voters[frm] == second_vote_time + assert len(instance_changes[view_no].voters) == 1 + assert len(instance_changes) == 1 -def test_has_inst_chng_from(instance_changes): - frm = "Node1" + +def test_has_no_quorum_if_message_discarded(instance_changes, tconf): + frm1 = "Node1" + frm2 = "Node2" view_no = 1 + quorum = 2 + time_provider = MockTimestamp(0) + instance_changes = InstanceChanges(tconf, time_provider) msg = InstanceChange(view_no, Suspicions.PRIMARY_DEGRADED.code) - instance_changes.add_vote(msg, frm) - assert instance_changes.has_inst_chng_from(view_no, frm) + + instance_changes.add_vote(msg, frm1) + time_provider.value += (tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL/2) + instance_changes.add_vote(msg, frm2) + + time_provider.value += (tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL/2) + 1 + assert not instance_changes.has_quorum(view_no, quorum) + + assert instance_changes.has_view(view_no) + assert instance_changes[view_no].msg == msg + assert not instance_changes.has_inst_chng_from(view_no, frm1) + assert instance_changes.has_inst_chng_from(view_no, frm2) def test_has_quorum(instance_changes): @@ -60,12 +82,19 @@ def test_old_ic_discard(instance_changes, looper, tconf): frm = "Node1" view_no = 1 quorum = 2 + time_provider = MockTimestamp(0) + instance_changes = InstanceChanges(tconf, time_provider) msg = InstanceChange(view_no, Suspicions.PRIMARY_DEGRADED.code) + + time_provider.value = 0 + instance_changes.add_vote(msg, frm) + time_provider.value += tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL + 1 + assert not instance_changes.has_view(view_no) + instance_changes.add_vote(msg, frm) + time_provider.value += tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL + 1 + assert not instance_changes.has_inst_chng_from(view_no, frm) - def chk_ic_discard(): - assert not instance_changes.has_view(view_no) - assert not instance_changes.has_inst_chng_from(view_no, frm) - assert not instance_changes.has_quorum(view_no, quorum) - looper.run(eventually(chk_ic_discard, - timeout=tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL)) + instance_changes.add_vote(msg, frm) + time_provider.value += tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL + 1 + assert not instance_changes.has_quorum(view_no, quorum) From 42aae27f64ede339d50c7518ca6681aa9b49a4d6 Mon Sep 17 00:00:00 2001 From: toktar Date: Fri, 14 Dec 2018 15:55:09 +0300 Subject: [PATCH 056/100] INDY-1909: add logging Signed-off-by: toktar --- plenum/server/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plenum/server/models.py b/plenum/server/models.py index 6d81f5064a..e94e7654d3 100644 --- a/plenum/server/models.py +++ b/plenum/server/models.py @@ -163,9 +163,9 @@ def _update_votes(self, view_no: int): for voter, vote_time in dict(self[view_no].voters).items(): now = self.time_provider() if vote_time < now - self._outdated_ic_interval: - logger.info("Discard InstanceChange from {} " + logger.info("Discard InstanceChange from {} for ViewNo {} " "because it is out of date (was received {}sec " - "ago)".format(voter, int(now - vote_time))) + "ago)".format(voter, view_no, int(now - vote_time))) del self[view_no].voters[voter] if not self[view_no].voters: del self[view_no] From b853edfe9603ccfe2ec6212c99f9a428a2935fe0 Mon Sep 17 00:00:00 2001 From: toktar Date: Fri, 14 Dec 2018 18:16:55 +0300 Subject: [PATCH 057/100] INDY-1909: refactoring models.py and tests Signed-off-by: toktar --- plenum/server/models.py | 6 +++--- plenum/test/server/test_models.py | 17 ++++++++++++----- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/plenum/server/models.py b/plenum/server/models.py index e94e7654d3..f2085cabfb 100644 --- a/plenum/server/models.py +++ b/plenum/server/models.py @@ -128,7 +128,7 @@ class InstanceChanges(TrackedMsgs): def __init__(self, config, time_provider: Callable = time.perf_counter) -> None: self._outdated_ic_interval = \ config.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL - self.time_provider = time_provider + self._time_provider = time_provider super().__init__() def _new_vote_msg(self, msg): @@ -143,7 +143,7 @@ def add_vote(self, msg, voter: str): key = self._get_key(msg) if key not in self: self[key] = self._new_vote_msg(msg) - self[key].voters[voter] = self.time_provider() + self[key].voters[voter] = self._time_provider() def has_view(self, view_no: int) -> bool: self._update_votes(view_no) @@ -161,7 +161,7 @@ def _update_votes(self, view_no: int): if self._outdated_ic_interval <= 0 or view_no not in self: return for voter, vote_time in dict(self[view_no].voters).items(): - now = self.time_provider() + now = self._time_provider() if vote_time < now - self._outdated_ic_interval: logger.info("Discard InstanceChange from {} for ViewNo {} " "because it is out of date (was received {}sec " diff --git a/plenum/test/server/test_models.py b/plenum/test/server/test_models.py index 067a721581..3b32f8964b 100644 --- a/plenum/test/server/test_models.py +++ b/plenum/test/server/test_models.py @@ -11,15 +11,22 @@ def instance_changes(tconf): return InstanceChanges(tconf) -def test_add_first_vote(instance_changes): +def test_instance_changes_are_empty_when_created(instance_changes): frm = "Node1" view_no = 1 - msg = InstanceChange(view_no, Suspicions.PRIMARY_DEGRADED.code) + assert not instance_changes assert view_no not in instance_changes assert not instance_changes.has_view(view_no) assert not instance_changes.has_inst_chng_from(view_no, frm) + +def test_add_first_vote(instance_changes): + frm = "Node1" + view_no = 1 + msg = InstanceChange(view_no, Suspicions.PRIMARY_DEGRADED.code) + assert not instance_changes + instance_changes.add_vote(msg, frm) assert instance_changes[view_no].msg == msg @@ -45,7 +52,7 @@ def test_equal_votes_dont_accumulate_when_added(instance_changes, tconf): assert len(instance_changes) == 1 -def test_has_no_quorum_if_message_discarded(instance_changes, tconf): +def test_too_old_messages_dont_count_towards_quorum(instance_changes, tconf): frm1 = "Node1" frm2 = "Node2" view_no = 1 @@ -67,7 +74,7 @@ def test_has_no_quorum_if_message_discarded(instance_changes, tconf): assert instance_changes.has_inst_chng_from(view_no, frm2) -def test_has_quorum(instance_changes): +def test_instance_changes_has_quorum_when_enough_distinct_votes_are_added(instance_changes): quorum = 2 view_no = 1 @@ -78,7 +85,7 @@ def test_has_quorum(instance_changes): assert instance_changes.has_quorum(view_no, quorum) -def test_old_ic_discard(instance_changes, looper, tconf): +def test_old_ic_discard(instance_changes, tconf): frm = "Node1" view_no = 1 quorum = 2 From 699ede2326f0652cfc73a0d172c6bdc9e5a72fbe Mon Sep 17 00:00:00 2001 From: Sergey Shilov Date: Thu, 13 Dec 2018 18:23:38 +0300 Subject: [PATCH 058/100] INDY-1836: increase ToleratePrimaryDisconnection and bind re-try time. ToleratePrimaryDisconnection increased up to 60 seconds. The bind re-try time increased up to 10 seconds. Signed-off-by: Sergey Shilov --- plenum/config.py | 2 +- plenum/test/conftest.py | 3 ++- stp_zmq/zstack.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/plenum/config.py b/plenum/config.py index a0940c3ea3..a9d84370f9 100644 --- a/plenum/config.py +++ b/plenum/config.py @@ -193,7 +193,7 @@ # A node if finds itself disconnected from primary of the master instance will # wait for `ToleratePrimaryDisconnection` before sending a view change message -ToleratePrimaryDisconnection = 2 +ToleratePrimaryDisconnection = 60 # A node if finds itself disconnected from primary of some backup instance will # wait for `TolerateBackupPrimaryDisconnection` before remove its replica diff --git a/plenum/test/conftest.py b/plenum/test/conftest.py index bde51d9f10..dc4dfaa2ab 100644 --- a/plenum/test/conftest.py +++ b/plenum/test/conftest.py @@ -228,7 +228,8 @@ def getValueFromModule(request, name: str, default: Any = None): }, "VIEW_CHANGE_TIMEOUT": 60, "MIN_TIMEOUT_CATCHUPS_DONE_DURING_VIEW_CHANGE": 15, - "INITIAL_PROPOSE_VIEW_CHANGE_TIMEOUT": 60 + "INITIAL_PROPOSE_VIEW_CHANGE_TIMEOUT": 60, + "ToleratePrimaryDisconnection": 2 } diff --git a/stp_zmq/zstack.py b/stp_zmq/zstack.py index f903bfbe72..1503c73332 100644 --- a/stp_zmq/zstack.py +++ b/stp_zmq/zstack.py @@ -380,7 +380,7 @@ def open(self): bound = True except zmq.error.ZMQError as zmq_err: bind_retries += 1 - if bind_retries == 5: + if bind_retries == 50: raise zmq_err time.sleep(0.2) From 7f91c850a806970a9fccb76faadfc82e1f4bec25 Mon Sep 17 00:00:00 2001 From: Sergey Shilov Date: Wed, 19 Dec 2018 17:48:16 +0300 Subject: [PATCH 059/100] INDY-1926: add check for None of replica's primary name during logging. Signed-off-by: Sergey Shilov --- plenum/server/replicas.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/plenum/server/replicas.py b/plenum/server/replicas.py index 85d9ef46d2..4a865fe07d 100644 --- a/plenum/server/replicas.py +++ b/plenum/server/replicas.py @@ -223,7 +223,9 @@ def unordered_request_handler_logging(self, unordereds): 'Received {} valid Prepares from {}. ' 'Received {} valid Commits from {}. ' 'Transaction contents: {}. ' - .format(reqId, duration, replica.primaryName.split(':')[0], prepre_sender, + .format(reqId, duration, + replica.primaryName.split(':')[0] if replica.primaryName is not None else None, + prepre_sender, n_prepares, str_prepares, n_commits, str_commits, content)) def keys(self): From 237faaccf4bfabc810ccdb7b9ac91e820492c5e6 Mon Sep 17 00:00:00 2001 From: Andrew Nikitin Date: Tue, 23 Apr 2019 16:27:25 +0300 Subject: [PATCH 060/100] [RC-1.7.0] change to rc version Signed-off-by: Andrew Nikitin --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index 46c9e99eee..00b9713b66 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 7, 0, "dev", 0] +[1, 7, 0, "rc", 1] From 888e34fc548128d3b83da217b3799686253081c0 Mon Sep 17 00:00:00 2001 From: Sovbot Date: Tue, 23 Apr 2019 15:21:42 +0000 Subject: [PATCH 061/100] release 1.7.0 Signed-off-by: Sovbot --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index 00b9713b66..2749a900a6 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 7, 0, "rc", 1] +[1, 7, 0, "", ""] From c119fd4289d3e0eed4cde18ed7dc7ecbc4d94653 Mon Sep 17 00:00:00 2001 From: Andrew Nikitin Date: Wed, 24 Apr 2019 18:44:57 +0300 Subject: [PATCH 062/100] [RC-1.7.1.rc1] change version of indy-plenum Signed-off-by: Andrew Nikitin --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index 2749a900a6..f0798d049d 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 7, 0, "", ""] +[1, 7, 1, "rc", 1] From 370c718aa307e4094f677b4b00c3537757ce4cdc Mon Sep 17 00:00:00 2001 From: Sovbot Date: Wed, 24 Apr 2019 17:26:23 +0000 Subject: [PATCH 063/100] release 1.7.1 Signed-off-by: Sovbot --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index f0798d049d..759e218580 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 7, 1, "rc", 1] +[1, 7, 1, "", ""] From bf317f218fd10d397a53b9d4b5b35ae903c9d61c Mon Sep 17 00:00:00 2001 From: Andrew Nikitin Date: Tue, 28 May 2019 10:29:23 +0300 Subject: [PATCH 064/100] [RC1-1.8.0] change version to rc Signed-off-by: Andrew Nikitin --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index fb55de5e58..302aaa053e 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 8, 0, "dev", 0] +[1, 8, 0, "rc", 1] From 2d330d4615eb8a4ebf633299ad6fd5f1f01110bb Mon Sep 17 00:00:00 2001 From: Sovbot Date: Tue, 28 May 2019 10:57:46 +0000 Subject: [PATCH 065/100] release 1.8.0 Signed-off-by: Sovbot --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index 302aaa053e..f43e41d157 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 8, 0, "rc", 1] +[1, 8, 0, "", ""] From 9e7d35995d4e41e7952dcf262856a3c82f7de4f4 Mon Sep 17 00:00:00 2001 From: Andrew Nikitin Date: Tue, 4 Jun 2019 20:13:28 +0300 Subject: [PATCH 066/100] [HOTFIX-1.8.1] bump version to 1.8.1 Signed-off-by: Andrew Nikitin --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index f43e41d157..9b34a5d546 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 8, 0, "", ""] +[1, 8, 1, "rc", 1] From 104372944741de67bb5cf022528278ebd923ffad Mon Sep 17 00:00:00 2001 From: Sovbot Date: Wed, 5 Jun 2019 08:07:33 +0000 Subject: [PATCH 067/100] release 1.8.1 Signed-off-by: Sovbot --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index 9b34a5d546..55b9e6627d 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 8, 1, "rc", 1] +[1, 8, 1, "", ""] From 55ace5f2c5335e399e61e1740c87114b7208502d Mon Sep 17 00:00:00 2001 From: Andrey Kononykhin Date: Thu, 27 Jun 2019 14:54:18 +0300 Subject: [PATCH 068/100] bumps version to 1.9.0.rc1 Signed-off-by: Andrey Kononykhin --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index 55b9e6627d..d038712fea 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 8, 1, "", ""] +[1, 9, 0, "rc", 1] From dc2b6c9895d75e327f6413d1301a2711ce3750b2 Mon Sep 17 00:00:00 2001 From: Sovbot Date: Thu, 27 Jun 2019 14:00:05 +0000 Subject: [PATCH 069/100] release 1.9.0 Signed-off-by: Sovbot --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index d038712fea..84dd702a5d 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 9, 0, "rc", 1] +[1, 9, 0, "", ""] From dc70af89a516173295a9d4e735e8cb1621969df2 Mon Sep 17 00:00:00 2001 From: Sergey Khoroshavin Date: Mon, 29 Jul 2019 18:28:04 +0300 Subject: [PATCH 070/100] INDY-2162: RC 1.9.1 Signed-off-by: Sergey Khoroshavin --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index 8e5c4a26b6..eecf634706 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 9, 1, "dev", 0] +[1, 9, 1, "rc", 1] From 09d1caf9afd3549eedda82978ff97f16f6b3ed10 Mon Sep 17 00:00:00 2001 From: Sergey Khoroshavin Date: Mon, 29 Jul 2019 21:22:06 +0300 Subject: [PATCH 071/100] INDY-2162: 1.9.1.rc2 Signed-off-by: Sergey Khoroshavin --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index eecf634706..c7e8a0dc0f 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 9, 1, "rc", 1] +[1, 9, 1, "rc", 2] From 81a0278ccc0d67aca8ed01ad5ab34ba6fc0cab9e Mon Sep 17 00:00:00 2001 From: Sergey Khoroshavin Date: Tue, 30 Jul 2019 00:34:08 +0300 Subject: [PATCH 072/100] INDY-2162: 1.9.1.rc3 Signed-off-by: Sergey Khoroshavin --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index c7e8a0dc0f..31f818ee7a 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 9, 1, "rc", 2] +[1, 9, 1, "rc", 3] From 82d023e97993f7a4ff8b47abd5abd764b9582454 Mon Sep 17 00:00:00 2001 From: Sovbot Date: Mon, 29 Jul 2019 22:40:35 +0000 Subject: [PATCH 073/100] release 1.9.1 Signed-off-by: Sovbot --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index 31f818ee7a..7fc906c3ca 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 9, 1, "rc", 3] +[1, 9, 1, "", ""] From cbcfc1e0445141c40e7a2a93ee299976fbb30800 Mon Sep 17 00:00:00 2001 From: ashcherbakov Date: Mon, 26 Aug 2019 15:28:09 +0300 Subject: [PATCH 074/100] Merge pull request #1303 from KitHat/INDY-1954 INDY-1954 -- Implement GET_TXN audit proof read Signed-off-by: Andrew Nikitin --- ci/ubuntu.dockerfile | 2 +- ledger/ledger.py | 12 +++ ledger/util.py | 1 + plenum/server/node.py | 48 +--------- plenum/server/node_bootstrap.py | 2 +- .../request_handlers/get_txn_handler.py | 30 +++++- .../pool_transactions/test_get_txn_request.py | 9 +- plenum/test/test_get_txn_state_proof.py | 95 +++++++++++++++++++ plenum/test/test_req_authenticator.py | 1 + setup.py | 2 +- 10 files changed, 150 insertions(+), 52 deletions(-) create mode 100644 plenum/test/test_get_txn_state_proof.py diff --git a/ci/ubuntu.dockerfile b/ci/ubuntu.dockerfile index 8f2a89aaae..b540f2d1c2 100644 --- a/ci/ubuntu.dockerfile +++ b/ci/ubuntu.dockerfile @@ -10,7 +10,7 @@ RUN echo "To invalidate cache" RUN apt-get update -y && apt-get install -y \ python3-nacl \ libindy-crypto=0.4.5 \ - libindy=1.10.0~1198 \ + libindy=1.11.0~1282 \ # rocksdb python wrapper libbz2-dev \ zlib1g-dev \ diff --git a/ledger/ledger.py b/ledger/ledger.py index 49ec34f652..f49c3db148 100644 --- a/ledger/ledger.py +++ b/ledger/ledger.py @@ -204,6 +204,18 @@ def merkleInfo(self, seqNo): F.auditPath.name: [self.hashToStr(h) for h in auditPath] } + def auditProof(self, seqNo): + seqNo = int(seqNo) + if seqNo <= 0: + raise PlenumValueError('seqNo', seqNo, '> 0') + rootHash = self.tree.merkle_tree_hash(0, self.size) + auditPath = self.tree.inclusion_proof(seqNo - 1, self.size) + return { + F.rootHash.name: self.hashToStr(rootHash), + F.auditPath.name: [self.hashToStr(h) for h in auditPath], + F.ledgerSize.name: self.size + } + def start(self, loop=None, ensureDurability=True): if self._transactionLog and not self._transactionLog.closed: logging.debug("Ledger already started.") diff --git a/ledger/util.py b/ledger/util.py index 37281bf54a..aedcd48d34 100644 --- a/ledger/util.py +++ b/ledger/util.py @@ -58,6 +58,7 @@ class F(Enum): leafHash = 9 nodeHash = 10 height = 11 + ledgerSize = 12 STH = namedtuple("STH", ["tree_size", "sha256_root_hash"]) diff --git a/plenum/server/node.py b/plenum/server/node.py index 2a892fbb22..d02839522e 100644 --- a/plenum/server/node.py +++ b/plenum/server/node.py @@ -53,7 +53,7 @@ NODE_IP, BLS_PREFIX, NodeHooks, LedgerState, CURRENT_PROTOCOL_VERSION, AUDIT_LEDGER_ID, \ AUDIT_TXN_VIEW_NO, AUDIT_TXN_PP_SEQ_NO, \ TXN_AUTHOR_AGREEMENT_VERSION, AML, TXN_AUTHOR_AGREEMENT_TEXT, TS_LABEL, SEQ_NO_DB_LABEL, NODE_STATUS_DB_LABEL, \ - LAST_SENT_PP_STORE_LABEL + LAST_SENT_PP_STORE_LABEL, AUDIT_TXN_PRIMARIES, MULTI_SIGNATURE from plenum.common.exceptions import SuspiciousNode, SuspiciousClient, \ MissingNodeOp, InvalidNodeOp, InvalidNodeMsg, InvalidClientMsgType, \ InvalidClientRequest, BaseExc, \ @@ -2268,10 +2268,6 @@ def processRequest(self, request: Request, frm: str): if self.is_action(txn_type): self.process_action(request, frm) - elif txn_type == GET_TXN: - self.handle_get_txn_req(request, frm) - self.total_read_request_number += 1 - elif self.is_query(txn_type): self.process_query(request, frm) self.total_read_request_number += 1 @@ -2405,44 +2401,6 @@ def send_ack_to_client(self, req_key, to_client): def send_nack_to_client(self, req_key, reason, to_client): self.transmitToClient(RequestNack(*req_key, reason), to_client) - def handle_get_txn_req(self, request: Request, frm: str): - """ - Handle GET_TXN request - """ - ledger_id = request.operation.get(f.LEDGER_ID.nm, DOMAIN_LEDGER_ID) - if ledger_id not in self.ledger_ids: - self.send_nack_to_client((request.identifier, request.reqId), - 'Invalid ledger id {}'.format(ledger_id), - frm) - return - - seq_no = request.operation.get(DATA) - self.send_ack_to_client((request.identifier, request.reqId), frm) - ledger = self.getLedger(ledger_id) - - try: - txn = self.getReplyFromLedger(ledger, seq_no) - except KeyError: - txn = None - - if txn is None: - logger.debug( - "{} can not handle GET_TXN request: ledger doesn't " - "have txn with seqNo={}".format(self, str(seq_no))) - - result = { - f.IDENTIFIER.nm: request.identifier, - f.REQ_ID.nm: request.reqId, - TXN_TYPE: request.operation[TXN_TYPE], - DATA: None - } - - if txn: - result[DATA] = txn.result - result[f.SEQ_NO.nm] = get_seq_no(txn.result) - - self.transmitToClient(Reply(result), frm) - @measure_time(MetricsName.PROCESS_ORDERED_TIME) def processOrdered(self, ordered: Ordered): """ @@ -3367,12 +3325,12 @@ def getReplyFromLedgerForRequest(self, request): else: return None - def getReplyFromLedger(self, ledger, seq_no): + def getReplyFromLedger(self, ledger, seq_no, write=True): # DoS attack vector, client requesting already processed request id # results in iterating over ledger (or its subset) txn = ledger.getBySeqNo(int(seq_no)) if txn: - txn.update(ledger.merkleInfo(seq_no)) + txn.update(ledger.merkleInfo(seq_no) if write else ledger.auditProof(seq_no)) txn = self.update_txn_with_extra_data(txn) return Reply(txn) else: diff --git a/plenum/server/node_bootstrap.py b/plenum/server/node_bootstrap.py index fc885f8a31..e2eb78b305 100644 --- a/plenum/server/node_bootstrap.py +++ b/plenum/server/node_bootstrap.py @@ -182,7 +182,7 @@ def register_ts_store_batch_handlers(self): self.node.write_manager.register_batch_handler(ts_store_b_h, ledger_id=lid) def register_common_handlers(self): - get_txn_handler = GetTxnHandler(self, self.node.db_manager) + get_txn_handler = GetTxnHandler(self.node, self.node.db_manager) for lid in self.node.ledger_ids: self.node.read_manager.register_req_handler(get_txn_handler, ledger_id=lid) self.register_ts_store_batch_handlers() diff --git a/plenum/server/request_handlers/get_txn_handler.py b/plenum/server/request_handlers/get_txn_handler.py index a88e3722f7..d16b938bea 100644 --- a/plenum/server/request_handlers/get_txn_handler.py +++ b/plenum/server/request_handlers/get_txn_handler.py @@ -1,4 +1,6 @@ -from plenum.common.constants import DOMAIN_LEDGER_ID, DATA, TXN_TYPE, GET_TXN +from plenum.common.constants import DOMAIN_LEDGER_ID, DATA, TXN_TYPE, GET_TXN, MULTI_SIGNATURE, AUDIT_LEDGER_ID, \ + AUDIT_TXN_STATE_ROOT, TXN_PAYLOAD +from plenum.common.exceptions import InvalidClientRequest from plenum.common.messages.node_messages import RequestNack, Reply from plenum.common.request import Request from plenum.common.txn_util import get_seq_no @@ -16,6 +18,17 @@ def __init__(self, node, database_manager: DatabaseManager): super().__init__(database_manager, GET_TXN, None) self.node = node + def static_validation(self, request: Request): + ledger_id = request.operation.get(f.LEDGER_ID.nm, DOMAIN_LEDGER_ID) + if ledger_id not in self.node.ledger_ids: + raise InvalidClientRequest(request.identifier, request.reqId, + 'Invalid ledger id {}'.format(ledger_id)) + seq_no = request.operation.get(DATA) + if seq_no < 1: + raise InvalidClientRequest(request.identifier, request.reqId, + "Invalid sequence number: {} is smaller than 1".format(seq_no)) + super().static_validation(request) + def get_result(self, request: Request): ledger_id = request.operation.get(f.LEDGER_ID.nm, DOMAIN_LEDGER_ID) db = self.database_manager.get_database(ledger_id) @@ -26,7 +39,18 @@ def get_result(self, request: Request): seq_no = request.operation.get(DATA) try: - txn = self.node.getReplyFromLedger(db.ledger, seq_no) + txn = self.node.getReplyFromLedger(db.ledger, seq_no, write=False) + audit_ledger = self.database_manager.get_ledger(AUDIT_LEDGER_ID) + audit_seq_no = audit_ledger.size + state_root = None + for seq_no in reversed(range(1, audit_seq_no + 1)): + audit_txn = audit_ledger.getBySeqNo(seq_no) + state_root = audit_txn[TXN_PAYLOAD][DATA][AUDIT_TXN_STATE_ROOT].get(ledger_id, None) + if state_root: + break + if state_root is not None: + multi_sig = self.database_manager.bls_store.get(state_root) + txn.result[MULTI_SIGNATURE] = multi_sig.as_dict() except KeyError: txn = None @@ -46,4 +70,4 @@ def get_result(self, request: Request): result[DATA] = txn.result result[f.SEQ_NO.nm] = get_seq_no(txn.result) - return Reply(result) + return result diff --git a/plenum/test/pool_transactions/test_get_txn_request.py b/plenum/test/pool_transactions/test_get_txn_request.py index c150a23213..76359ec7b8 100644 --- a/plenum/test/pool_transactions/test_get_txn_request.py +++ b/plenum/test/pool_transactions/test_get_txn_request.py @@ -20,6 +20,7 @@ INVALID_LEDGER_ID = 5908 INVALID_SEQ_NO = -23 +whitelist = ["GET_TXN has no seq_no, skip AuditProof logic", "Given signature is not for current root hash, aborting"] def test_get_txn_for_invalid_ledger_id(looper, txnPoolNodeSet, @@ -133,4 +134,10 @@ def test_get_txn_response_as_expected(looper, txnPoolNodeSet, request) result2 = sdk_get_and_check_replies(looper, [request_couple])[0][1]['result'] - assert result1 == result2['data'] + + assert result1['reqSignature'] == result2['data']['reqSignature'] + assert result1['txn'] == result2['data']['txn'] + assert result1['txnMetadata'] == result2['data']['txnMetadata'] + assert result1['rootHash'] == result2['data']['rootHash'] + assert result1['ver'] == result2['data']['ver'] + assert result1['auditPath'] == result2['data']['auditPath'] diff --git a/plenum/test/test_get_txn_state_proof.py b/plenum/test/test_get_txn_state_proof.py new file mode 100644 index 0000000000..c7be247377 --- /dev/null +++ b/plenum/test/test_get_txn_state_proof.py @@ -0,0 +1,95 @@ +import json + +import pytest +import random + +from plenum.common.constants import TXN_METADATA, TXN_METADATA_SEQ_NO, OP_FIELD_NAME +from plenum.test.delayers import req_delay +from plenum.test.stasher import delay_rules +from indy.did import create_and_store_my_did +from indy.ledger import build_nym_request, build_get_txn_request, sign_and_submit_request, submit_request, build_attrib_request, build_acceptance_mechanisms_request + + +def nym_on_ledger(looper, sdk_pool_handle, sdk_wallet_client, sdk_wallet_steward, seed=None): + did_future = create_and_store_my_did(sdk_wallet_client[0], json.dumps({"seed": seed}) if seed else "{}") + did, vk = looper.loop.run_until_complete(did_future) + nym_req_future = build_nym_request(sdk_wallet_steward[1], did, vk, None, None) + nym_req = looper.loop.run_until_complete(nym_req_future) + nym_resp_future = sign_and_submit_request(sdk_pool_handle, sdk_wallet_steward[0], sdk_wallet_steward[1], nym_req) + nym_resp = looper.loop.run_until_complete(nym_resp_future) + nym = json.loads(nym_resp) + assert nym["result"] + assert nym["result"][TXN_METADATA] + assert nym["result"][TXN_METADATA][TXN_METADATA_SEQ_NO] + return nym["result"][TXN_METADATA][TXN_METADATA_SEQ_NO] + + +def attrib_on_ledger(looper, sdk_pool_handle, sdk_wallet_steward, sdk_client_wallet): + attrib_req_future = build_attrib_request(sdk_wallet_steward[1], sdk_client_wallet[1], None, "{}", None) + attrib_req = looper.loop.run_until_complete(attrib_req_future) + attrib_resp_future = sign_and_submit_request(sdk_pool_handle, sdk_wallet_steward[0], sdk_wallet_steward[1], attrib_req) + attrib_resp = looper.loop.run_until_complete(attrib_resp_future) + attrib = json.loads(attrib_resp) + print(attrib) + assert attrib["result"] + assert attrib["result"][TXN_METADATA] + assert attrib["result"][TXN_METADATA][TXN_METADATA_SEQ_NO] + return attrib["result"][TXN_METADATA][TXN_METADATA_SEQ_NO] + + +def aml_on_ledger(looper, sdk_pool_handle, sdk_wallet_trustee): + ver = random.randint(1, 10000) + aml_req_future = build_acceptance_mechanisms_request(sdk_wallet_trustee[1], "{\"test\":\"aml\"}", str(ver), None) + aml_req = looper.loop.run_until_complete(aml_req_future) + aml_resp_future = sign_and_submit_request(sdk_pool_handle, sdk_wallet_trustee[0], sdk_wallet_trustee[1], aml_req) + aml_resp = looper.loop.run_until_complete(aml_resp_future) + aml = json.loads(aml_resp) + assert aml["result"] + assert aml["result"][TXN_METADATA] + assert aml["result"][TXN_METADATA][TXN_METADATA_SEQ_NO] + return aml["result"][TXN_METADATA][TXN_METADATA_SEQ_NO] + + +@pytest.fixture(scope="function", params=[ + (['NYM'], 0, "DOMAIN"), + (['NYM', 'NYM', 'NYM'], 1, "DOMAIN"), + (['NYM', 'AML', 'NYM'], 1, "CONFIG") +]) +def transactions(request, looper, sdk_pool_handle, sdk_wallet_client, sdk_wallet_steward, sdk_wallet_trustee): + txns, for_get, id = request.param + res = [] + + for txn in txns: + seq_no = -1 + if txn == 'NYM': + seq_no = nym_on_ledger(looper, sdk_pool_handle, sdk_wallet_client, sdk_wallet_steward) + elif txn == 'AML': + seq_no = aml_on_ledger(looper, sdk_pool_handle, sdk_wallet_trustee) + res.append(seq_no) + + return res[for_get], id + + +@pytest.fixture(scope='function', params=['all_responding', 'one_responding']) +def nodeSetAlwaysResponding(request, txnPoolNodeSet, transactions): + if request.param == 'all_responding': + yield txnPoolNodeSet + else: + stashers = [node.clientIbStasher for node in txnPoolNodeSet[1:]] + with delay_rules(stashers, req_delay()): + yield txnPoolNodeSet + + +def sdk_get_txn(looper, sdk_pool_handle, seq_no, ledger_id): + get_txn_request_future = build_get_txn_request(None, ledger_id, seq_no) + get_txn_request = looper.loop.run_until_complete(get_txn_request_future) + get_txn_response_future = submit_request(sdk_pool_handle, get_txn_request) + get_txn_response = looper.loop.run_until_complete(get_txn_response_future) + return get_txn_response + + +def test_get_txn_audit_proof(nodeSetAlwaysResponding, looper, sdk_pool_handle, transactions): + seq_no, ledger = transactions + response = sdk_get_txn(looper, sdk_pool_handle, seq_no, ledger) + resp_json = json.loads(response) + assert resp_json[OP_FIELD_NAME] == "REPLY" diff --git a/plenum/test/test_req_authenticator.py b/plenum/test/test_req_authenticator.py index 901ffccd38..5da032093e 100644 --- a/plenum/test/test_req_authenticator.py +++ b/plenum/test/test_req_authenticator.py @@ -14,6 +14,7 @@ from plenum.test.stasher import delay_rules from stp_core.loop.eventually import eventually +whitelist = ["Given signature is not for current root hash, aborting"] @pytest.fixture(scope='module') def pre_reqs(): diff --git a/setup.py b/setup.py index f41a6f39a4..84f90d62b2 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ with open(metadata['__file__'], 'r') as f: exec(f.read(), metadata) -tests_require = ['pytest==3.3.1', 'pytest-xdist==1.22.1', 'python3-indy==1.10.0-dev-1198', 'pytest-asyncio==0.8.0'] +tests_require = ['pytest==3.3.1', 'pytest-xdist==1.22.1', 'python3-indy==1.11.0-dev-1282', 'pytest-asyncio==0.8.0'] setup( name=metadata['__title__'], From 36d9af0e2baa4104b79c8cf9a254116b744478d8 Mon Sep 17 00:00:00 2001 From: toktar Date: Tue, 27 Aug 2019 16:14:44 +0300 Subject: [PATCH 075/100] Merge pull request #1308 from KitHat/INDY-1954 INDY-1954 Add multisig check Signed-off-by: Andrew Nikitin --- plenum/server/request_handlers/get_txn_handler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/plenum/server/request_handlers/get_txn_handler.py b/plenum/server/request_handlers/get_txn_handler.py index d16b938bea..2a99b92452 100644 --- a/plenum/server/request_handlers/get_txn_handler.py +++ b/plenum/server/request_handlers/get_txn_handler.py @@ -50,7 +50,8 @@ def get_result(self, request: Request): break if state_root is not None: multi_sig = self.database_manager.bls_store.get(state_root) - txn.result[MULTI_SIGNATURE] = multi_sig.as_dict() + if multi_sig: + txn.result[MULTI_SIGNATURE] = multi_sig.as_dict() except KeyError: txn = None From ccfa7576de677870534c571b6e36991abd3f87f6 Mon Sep 17 00:00:00 2001 From: toktar Date: Mon, 26 Aug 2019 18:31:40 +0300 Subject: [PATCH 076/100] Merge pull request #1305 from Toktar/bug-2215-ls-reask INDY-2215: add re-ask LedgerStatuses for the init catchup Signed-off-by: Andrew Nikitin --- plenum/common/timer.py | 14 +++++- plenum/server/catchup/cons_proof_service.py | 30 ++++++------ ...st_node_catchup_with_connection_problem.py | 46 ++++++++++--------- ...hing_3pc_while_catchup_only_checkpoints.py | 3 +- 4 files changed, 55 insertions(+), 38 deletions(-) diff --git a/plenum/common/timer.py b/plenum/common/timer.py index 4a288595ec..40a025d570 100644 --- a/plenum/common/timer.py +++ b/plenum/common/timer.py @@ -1,11 +1,14 @@ from abc import ABC, abstractmethod from functools import wraps +from logging import getLogger from typing import Callable, NamedTuple import time from sortedcontainers import SortedListWithKey +logger = getLogger() + class TimerService(ABC): @abstractmethod @@ -64,14 +67,15 @@ def wrapped_callback(): self._timer.schedule(self._interval, self._callback) self._timer = timer - self._interval = interval + self._interval = None + self.update_interval(interval) self._callback = wrapped_callback self._active = False if active: self.start() def start(self): - if self._active: + if self._active or not self._interval: return self._active = True self._timer.schedule(self._interval, self._callback) @@ -81,3 +85,9 @@ def stop(self): return self._active = False self._timer.cancel(self._callback) + + def update_interval(self, interval): + if interval <= 0: + logger.debug("RepeatingTimer - incorrect interval {}".format(interval)) + return + self._interval = interval diff --git a/plenum/server/catchup/cons_proof_service.py b/plenum/server/catchup/cons_proof_service.py index 82ee9097b7..e6e15dc3e8 100644 --- a/plenum/server/catchup/cons_proof_service.py +++ b/plenum/server/catchup/cons_proof_service.py @@ -48,8 +48,16 @@ def __init__(self, self._cons_proofs = {} self._already_asked_for_cons_proofs_without_timeout = False self._last_txn_3PC_key = {} - self._ledger_status_timer = None - self._consistency_proof_timer = None + self._ledger_status_timer = \ + RepeatingTimer(self._timer, + self._config.LedgerStatusTimeout * (len(self._provider.all_nodes_names()) - 1), + self._reask_for_ledger_status, + active=False) + self._consistency_proof_timer = \ + RepeatingTimer(self._timer, + self._config.ConsistencyProofsTimeout * (len(self._provider.all_nodes_names()) - 1), + self._reask_for_last_consistency_proof, + active=False) def __repr__(self) -> str: return "{}:ConsProofService:{}".format(self._provider.node_name(), self._ledger_id) @@ -66,7 +74,7 @@ def start(self, request_ledger_statuses: bool): if request_ledger_statuses: self._request_ledger_status_from_nodes() - self._schedule_reask_ledger_status() + self._schedule_reask_ledger_status() def process_ledger_status(self, ledger_status: LedgerStatus, frm: str): if not self._can_process_ledger_status(ledger_status): @@ -432,22 +440,18 @@ def _schedule_reask_cons_proof(self): ) def _schedule_reask_ledger_status(self): - self._ledger_status_timer = \ - RepeatingTimer(self._timer, - self._config.LedgerStatusTimeout * (len(self._provider.all_nodes_names()) - 1), - self._reask_for_ledger_status) + self._ledger_status_timer.update_interval( + self._config.LedgerStatusTimeout * (len(self._provider.all_nodes_names()) - 1)) + self._ledger_status_timer.start() def _schedule_reask_last_cons_proof(self): - if self._consistency_proof_timer is None: - self._consistency_proof_timer = \ - RepeatingTimer(self._timer, - self._config.ConsistencyProofsTimeout * (len(self._provider.all_nodes_names()) - 1), - self._reask_for_last_consistency_proof) + self._consistency_proof_timer.update_interval( + self._config.ConsistencyProofsTimeout * (len(self._provider.all_nodes_names()) - 1)) + self._consistency_proof_timer.start() def _cancel_reask(self): if self._consistency_proof_timer: self._consistency_proof_timer.stop() - self._consistency_proof_timer = None if self._ledger_status_timer: self._ledger_status_timer.stop() self._timer.cancel(self._request_CPs_if_needed) diff --git a/plenum/test/node_catchup/test_node_catchup_with_connection_problem.py b/plenum/test/node_catchup/test_node_catchup_with_connection_problem.py index 8a50fd55e2..19daaf8330 100644 --- a/plenum/test/node_catchup/test_node_catchup_with_connection_problem.py +++ b/plenum/test/node_catchup/test_node_catchup_with_connection_problem.py @@ -1,6 +1,8 @@ import pytest from plenum.common.config_helper import PNodeConfigHelper +from plenum.common.messages.node_messages import LedgerStatus, ConsistencyProof from plenum.common.util import getCallableName +from plenum.server.router import Route from plenum.test.helper import sdk_send_random_and_check from plenum.test.node_catchup.helper import waitNodeDataEquality from plenum.test.pool_transactions.helper import \ @@ -12,7 +14,7 @@ call_count = 0 -@pytest.fixture(scope='function', params=range(1, 4)) +@pytest.fixture(scope='function', params=range(1, 5)) def lost_count(request): return request.param @@ -31,14 +33,6 @@ def test_catchup_with_lost_ledger_status(txnPoolNodeSet, node_to_disconnect = txnPoolNodeSet[-1] - def unpatch_after_call(status, frm): - global call_count - call_count += 1 - if call_count >= lost_count: - # unpatch processLedgerStatus after lost_count calls - monkeypatch.undo() - call_count = 0 - sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) @@ -60,9 +54,17 @@ def unpatch_after_call(status, frm): config=tconf, ha=nodeHa, cliha=nodeCHa, pluginPaths=allPluginsPath) + + def unpatch_after_call(status, frm): + global call_count + call_count += 1 + if call_count >= lost_count: + # unpatch processLedgerStatus after lost_count calls + node_to_disconnect.nodeMsgRouter.add((LedgerStatus, node_to_disconnect.ledgerManager.processLedgerStatus)) + call_count = 0 + # patch processLedgerStatus - monkeypatch.setattr(node_to_disconnect.ledgerManager, 'processLedgerStatus', - unpatch_after_call) + node_to_disconnect.nodeMsgRouter.add((LedgerStatus, unpatch_after_call)) # add node_to_disconnect to pool looper.add(node_to_disconnect) @@ -88,14 +90,6 @@ def test_catchup_with_lost_first_consistency_proofs(txnPoolNodeSet, Test makes sure that the node eventually finishes catchup''' node_to_disconnect = txnPoolNodeSet[-1] - def unpatch_after_call(proof, frm): - global call_count - call_count += 1 - if call_count >= lost_count: - # unpatch processConsistencyProof after lost_count calls - monkeypatch.undo() - call_count = 0 - sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) @@ -117,10 +111,18 @@ def unpatch_after_call(proof, frm): config=tconf, ha=nodeHa, cliha=nodeCHa, pluginPaths=allPluginsPath) + + def unpatch_after_call(proof, frm): + global call_count + call_count += 1 + if call_count >= lost_count: + # unpatch processConsistencyProof after lost_count calls + node_to_disconnect.nodeMsgRouter.add((ConsistencyProof, + node_to_disconnect.ledgerManager.processConsistencyProof)) + call_count = 0 + # patch processConsistencyProof - monkeypatch.setattr(node_to_disconnect.ledgerManager, - 'processConsistencyProof', - unpatch_after_call) + node_to_disconnect.nodeMsgRouter.add((ConsistencyProof, unpatch_after_call)) # add node_to_disconnect to pool looper.add(node_to_disconnect) txnPoolNodeSet[-1] = node_to_disconnect diff --git a/plenum/test/node_catchup_with_3pc/test_stashing_3pc_while_catchup_only_checkpoints.py b/plenum/test/node_catchup_with_3pc/test_stashing_3pc_while_catchup_only_checkpoints.py index cb83b59c3a..637e802231 100644 --- a/plenum/test/node_catchup_with_3pc/test_stashing_3pc_while_catchup_only_checkpoints.py +++ b/plenum/test/node_catchup_with_3pc/test_stashing_3pc_while_catchup_only_checkpoints.py @@ -1,6 +1,7 @@ from logging import getLogger import pytest +from plenum.common.constants import LEDGER_STATUS from plenum.common.messages.node_messages import Checkpoint, LedgerStatus from plenum.common.startable import Mode @@ -87,7 +88,7 @@ def test_3pc_while_catchup_with_chkpoints_only(tdir, tconf, lagging_node.nodeIbStasher.delay(pDelay()) lagging_node.nodeIbStasher.delay(cDelay()) - with delay_rules(lagging_node.nodeIbStasher, lsDelay(), cr_delay()): + with delay_rules(lagging_node.nodeIbStasher, lsDelay(), cr_delay(), msg_rep_delay(types_to_delay=[LEDGER_STATUS])): looper.add(lagging_node) txnPoolNodeSet[-1] = lagging_node looper.run(checkNodesConnected(txnPoolNodeSet)) From 99da7a49029c821662f5fb9a1506da344f803085 Mon Sep 17 00:00:00 2001 From: anikitinDSR Date: Wed, 28 Aug 2019 11:30:03 +0300 Subject: [PATCH 077/100] Merge pull request #1309 from anikitinDSR/public/indy-2218 [INDY-2218] Add check for identifier decoding Signed-off-by: Andrew Nikitin --- stp_zmq/test/conftest.py | 94 ++++++++++++++++++++ stp_zmq/test/test_stashed_client_messages.py | 63 +------------ stp_zmq/test/test_zstack.py | 15 ++++ stp_zmq/zstack.py | 6 ++ 4 files changed, 116 insertions(+), 62 deletions(-) diff --git a/stp_zmq/test/conftest.py b/stp_zmq/test/conftest.py index abdab4d593..bbf75e245e 100644 --- a/stp_zmq/test/conftest.py +++ b/stp_zmq/test/conftest.py @@ -3,12 +3,18 @@ import pytest import zmq.asyncio +from plenum.common.stacks import ClientZStack +from plenum.test.helper import MockTimer from stp_core.common.log import getlogger from stp_core.common.config.util import getConfig from stp_core.common.temp_file_util import SafeTemporaryDirectory +from stp_core.crypto.util import randomSeed from stp_core.loop.looper import Looper from stp_core.network.port_dispenser import genHa +from stp_core.test.helper import SMotor +from stp_zmq.simple_zstack import SimpleZStack +from stp_zmq.test.helper import genKeys @pytest.fixture() @@ -69,3 +75,91 @@ def set_info_log_level(): logger.setLevel(logging.INFO) yield logger.setLevel(lvl) + + +@pytest.fixture() +def alpha_handler(tdir, looper): + return Handler() + + +@pytest.fixture() +def stacks(tdir, looper, alpha_handler): + names = ['Alpha', 'Beta'] + genKeys(tdir, names) + aseed = randomSeed() + bseed = randomSeed() + + def bHandler(m): + msg, a = m + beta.send(msg, a) + + stackParams = { + "name": names[0], + "ha": genHa(), + "auto": 2, + "basedirpath": tdir + } + timer = MockTimer(0) + alpha = SimpleZStack(stackParams, alpha_handler.handle, aseed, False, + timer=timer) + + stackParams = { + "name": names[1], + "ha": genHa(), + "auto": 2, + "basedirpath": tdir + } + timer = MockTimer(0) + beta = SimpleZStack(stackParams, bHandler, bseed, True, + timer=timer) + + amotor = SMotor(alpha) + looper.add(amotor) + + bmotor = SMotor(beta) + looper.add(bmotor) + return alpha, beta + + +@pytest.fixture() +def clientstack(tdir, looper, alpha_handler): + names = ['ClientA', 'Alpha'] + genKeys(tdir, names) + aseed = randomSeed() + cseed = randomSeed() + + stackParams = { + "name": names[0], + "ha": genHa(), + "auto": 2, + "basedirpath": tdir + } + timer = MockTimer(0) + client = ClientZStack(stackParams, alpha_handler.handle, cseed, False, + timer=timer) + + stackParams = { + "name": names[1], + "ha": genHa(), + "auto": 2, + "basedirpath": tdir + } + timer = MockTimer(0) + alpha = SimpleZStack(stackParams, alpha_handler, aseed, True, + timer=timer) + + amotor = SMotor(alpha) + looper.add(amotor) + + cmotor = SMotor(client) + looper.add(cmotor) + return alpha, client + + +class Handler: + def __init__(self) -> None: + self.received_messages = [] + + def handle(self, m): + d, msg = m + self.received_messages.append(d) diff --git a/stp_zmq/test/test_stashed_client_messages.py b/stp_zmq/test/test_stashed_client_messages.py index dc8ee8e924..2b89eac0d4 100644 --- a/stp_zmq/test/test_stashed_client_messages.py +++ b/stp_zmq/test/test_stashed_client_messages.py @@ -1,74 +1,13 @@ -import json - -import pytest import zmq -from plenum.test.helper import assertExp, MockTimer, MockTimestamp -from stp_core.crypto.util import randomSeed +from plenum.test.helper import assertExp from stp_core.loop.eventually import eventually -from stp_core.network.port_dispenser import genHa -from stp_core.test.helper import SMotor -from stp_zmq.test.helper import genKeys -from stp_zmq.simple_zstack import SimpleZStack - - -@pytest.fixture() -def alpha_handler(tdir, looper): - return Handler() - - -@pytest.fixture() -def stacks(tdir, looper, alpha_handler): - names = ['Alpha', 'Beta'] - genKeys(tdir, names) - aseed = randomSeed() - bseed = randomSeed() - - def bHandler(m): - msg, a = m - beta.send(msg, a) - - stackParams = { - "name": names[0], - "ha": genHa(), - "auto": 2, - "basedirpath": tdir - } - timer = MockTimer(0) - alpha = SimpleZStack(stackParams, alpha_handler.handle, aseed, False, - timer=timer) - - stackParams = { - "name": names[1], - "ha": genHa(), - "auto": 2, - "basedirpath": tdir - } - timer = MockTimer(0) - beta = SimpleZStack(stackParams, bHandler, bseed, True, - timer=timer) - - amotor = SMotor(alpha) - looper.add(amotor) - - bmotor = SMotor(beta) - looper.add(bmotor) - return alpha, beta def create_msg(i): return {'msg': 'msg{}'.format(i)} -class Handler: - def __init__(self) -> None: - self.received_messages = [] - - def handle(self, m): - d, msg = m - self.received_messages.append(d) - - def test_stash_msg_to_unknown(tdir, looper, stacks, alpha_handler): alpha, beta = stacks pending_client_messages = beta._client_message_provider._pending_client_messages diff --git a/stp_zmq/test/test_zstack.py b/stp_zmq/test/test_zstack.py index d3a8ec3625..d4767b670e 100644 --- a/stp_zmq/test/test_zstack.py +++ b/stp_zmq/test/test_zstack.py @@ -127,6 +127,21 @@ def test_zstack_non_utf8(tdir, looper, tconf): looper.run(eventually(chkPrinted, betaP, {"k3": "v3"})) +def test_ident_not_utf_8_decoded(clientstack): + """ + ZStack gets a non utf-8 identifier and does not hand it over to the + processing method + :return: + """ + _, client = clientstack + wrong_identifier = b'\xd1 \xf8\x16\x9a]~~\x14\x94CF\xc1\x89n\xd5\tL\x1b\xe8J+\xa5\xbe\x17\xf6\xe6J@\xa1\xd0#' + + msg = b'{"msg": "msg"}' + assert not client.rxMsgs + assert not client._verifyAndAppend(msg, wrong_identifier) + assert not client.rxMsgs + + def test_zstack_creates_keys_with_secure_permissions(tdir): any_seed = b'0' * 32 stack_name = 'aStack' diff --git a/stp_zmq/zstack.py b/stp_zmq/zstack.py index 52296c1449..10f5f0eded 100644 --- a/stp_zmq/zstack.py +++ b/stp_zmq/zstack.py @@ -472,6 +472,12 @@ async def service(self, limit=None, quota: Optional[Quota] = None) -> int: return 0 def _verifyAndAppend(self, msg, ident): + try: + ident.decode() + except ValueError: + logger.error("Identifier {} is not decoded into UTF-8 string. " + "Request will not be processed".format(ident)) + return False try: self.metrics.add_event(self.mt_incoming_size, len(msg)) self.msgLenVal.validate(msg) From 1ab57b3cb7374527a03abde7f228ffe7198b1762 Mon Sep 17 00:00:00 2001 From: Andrew Nikitin Date: Wed, 28 Aug 2019 15:21:06 +0300 Subject: [PATCH 078/100] [rc-1.9.2.rc1] bump package version Signed-off-by: Andrew Nikitin --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index 7fc906c3ca..ebef9ef5cc 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 9, 1, "", ""] +[1, 9, 2, "rc", 1] From dcff8ae789a5ef2fb7b478141a3f1470660d90c8 Mon Sep 17 00:00:00 2001 From: Sovbot Date: Wed, 28 Aug 2019 15:08:14 +0000 Subject: [PATCH 079/100] release 1.9.2 Signed-off-by: Sovbot --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index ebef9ef5cc..3f81a88a98 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 9, 2, "rc", 1] +[1, 9, 2, "", ""] From 1b94cbdb563398d18dd312fadcf740c1bfb39172 Mon Sep 17 00:00:00 2001 From: ashcherbakov Date: Wed, 25 Sep 2019 12:02:55 +0300 Subject: [PATCH 080/100] Merge pull request #1348 from donqui/master [INDY-2213]: Bump pyzmq version Signed-off-by: Andrew Nikitin --- setup.py | 79 +++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 69 insertions(+), 10 deletions(-) diff --git a/setup.py b/setup.py index 84f90d62b2..f40169ce1b 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,11 @@ import os import sys +import subprocess +import distutils.cmd from setuptools import setup, find_packages +from setuptools.command.install import install +from setuptools.command.develop import develop v = sys.version_info if sys.version_info < (3, 5): @@ -25,7 +29,45 @@ tests_require = ['pytest==3.3.1', 'pytest-xdist==1.22.1', 'python3-indy==1.11.0-dev-1282', 'pytest-asyncio==0.8.0'] + +class PyZMQCommand(distutils.cmd.Command): + description = 'pyzmq install target' + + version = 'pyzmq==18.1.0' + options = '--install-option=--zmq=bundled' + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + command = ['pip', 'install', self.version, self.options] + subprocess.check_call(command) + + +class InstallCommand(install): + description = 'install target' + + def run(self): + install.run_command(self, command='pyzmq') + install.run(self) + + +class DevelopCommand(develop): + description = 'develop target' + + def run(self): + develop.run_command(self, command='pyzmq') + develop.run(self) + setup( + cmdclass={ + 'install': InstallCommand, + 'develop': DevelopCommand, + 'pyzmq': PyZMQCommand, + }, name=metadata['__title__'], version=metadata['__version__'], author=metadata['__author__'], @@ -46,16 +88,33 @@ '': ['*.txt', '*.md', '*.rst', '*.json', '*.conf', '*.html', '*.css', '*.ico', '*.png', 'LICENSE', 'LEGAL', 'plenum']}, include_package_data=True, - install_requires=['jsonpickle==0.9.6', 'ujson==1.33', - 'prompt_toolkit==0.57', 'pygments==2.2.0', - 'rlp==0.5.1', 'sha3==0.2.1', 'leveldb', - 'ioflo==1.5.4', 'semver==2.7.9', 'base58==1.0.0', 'orderedset==2.0', - 'sortedcontainers==1.5.7', 'psutil==5.4.3', 'pip<10.0.0', - 'portalocker==0.5.7', 'pyzmq==17.0.0', 'libnacl==1.6.1', - 'six==1.11.0', 'psutil==5.4.3', 'intervaltree==2.1.0', - 'msgpack-python==0.4.6', 'indy-crypto==0.4.5', - 'python-rocksdb==0.6.9', 'python-dateutil==2.6.1', - 'pympler==0.5', 'packaging==19.0'], + install_requires=[ + 'jsonpickle==0.9.6', + 'ujson==1.33', + 'prompt_toolkit==0.57', + 'pygments==2.2.0', + 'rlp==0.5.1', + 'sha3==0.2.1', + 'leveldb', + 'ioflo==1.5.4', + 'semver==2.7.9', + 'base58==1.0.0', + 'orderedset==2.0', + 'sortedcontainers==1.5.7', + 'psutil==5.4.3', + 'pip<10.0.0', + 'portalocker==0.5.7', + 'libnacl==1.6.1', + 'six==1.11.0', + 'psutil==5.4.3', + 'intervaltree==2.1.0', + 'msgpack-python==0.4.6', + 'indy-crypto==0.4.5', + 'python-rocksdb==0.6.9', + 'python-dateutil==2.6.1', + 'pympler==0.5', + 'packaging==19.0', + ], setup_requires=['pytest-runner'], extras_require={ 'tests': tests_require, From 76dba258a572ed234d0cab008592d47d63ce2a33 Mon Sep 17 00:00:00 2001 From: anikitinDSR Date: Thu, 26 Sep 2019 12:30:45 +0300 Subject: [PATCH 081/100] Merge pull request #1350 from donqui/master [INDY-2213]: Fix FPM dependencies Signed-off-by: Andrew Nikitin --- .../ubuntu-1604/build-3rd-parties.sh | 45 +++++++++++++------ .../ubuntu-1604/build-indy-plenum.sh | 1 + setup.py | 1 + 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/build-scripts/ubuntu-1604/build-3rd-parties.sh b/build-scripts/ubuntu-1604/build-3rd-parties.sh index 99608799d2..cbee7750d8 100755 --- a/build-scripts/ubuntu-1604/build-3rd-parties.sh +++ b/build-scripts/ubuntu-1604/build-3rd-parties.sh @@ -42,19 +42,36 @@ function build_from_pypi { sed -i 's/{package_name}/python3-'${PACKAGE_NAME_TMP}'/' ${POSTINST_TMP} sed -i 's/{package_name}/python3-'${PACKAGE_NAME_TMP}'/' ${PREREM_TMP} - fpm --input-type "python" \ - --output-type "deb" \ - --architecture "amd64" \ - --verbose \ - --python-package-name-prefix "python3"\ - --python-bin "/usr/bin/python3" \ - --exclude "*.pyc" \ - --exclude "*.pyo" \ - --maintainer "Hyperledger " \ - --after-install ${POSTINST_TMP} \ - --before-remove ${PREREM_TMP} \ - --package ${OUTPUT_PATH} \ - ${PACKAGE_NAME}${PACKAGE_VERSION} + if [ -z $3 ]; then + fpm --input-type "python" \ + --output-type "deb" \ + --architecture "amd64" \ + --verbose \ + --python-package-name-prefix "python3"\ + --python-bin "/usr/bin/python3" \ + --exclude "*.pyc" \ + --exclude "*.pyo" \ + --maintainer "Hyperledger " \ + --after-install ${POSTINST_TMP} \ + --before-remove ${PREREM_TMP} \ + --package ${OUTPUT_PATH} \ + ${PACKAGE_NAME}${PACKAGE_VERSION} + else + fpm --input-type "python" \ + --output-type "deb" \ + --architecture "amd64" \ + --python-setup-py-arguments "--zmq=bundled" \ + --verbose \ + --python-package-name-prefix "python3"\ + --python-bin "/usr/bin/python3" \ + --exclude "*.pyc" \ + --exclude "*.pyo" \ + --maintainer "Hyperledger " \ + --after-install ${POSTINST_TMP} \ + --before-remove ${PREREM_TMP} \ + --package ${OUTPUT_PATH} \ + ${PACKAGE_NAME}${PACKAGE_VERSION} + fi rm ${POSTINST_TMP} rm ${PREREM_TMP} @@ -80,7 +97,7 @@ build_from_pypi python-dateutil 2.6.1 build_from_pypi semver 2.7.9 build_from_pypi pygments 2.2.0 build_from_pypi psutil 5.4.3 -build_from_pypi pyzmq 17.0.0 +build_from_pypi pyzmq 18.1.0 bundled build_from_pypi intervaltree 2.1.0 build_from_pypi jsonpickle 0.9.6 # TODO: add libsnappy dependency for python-rocksdb package diff --git a/build-scripts/ubuntu-1604/build-indy-plenum.sh b/build-scripts/ubuntu-1604/build-indy-plenum.sh index 4c0ef1632c..e8c2a7e290 100755 --- a/build-scripts/ubuntu-1604/build-indy-plenum.sh +++ b/build-scripts/ubuntu-1604/build-indy-plenum.sh @@ -21,6 +21,7 @@ sed -i 's/{package_name}/'${PACKAGE_NAME}'/' "prerm" fpm --input-type "python" \ --output-type "deb" \ --architecture "amd64" \ + --depends "python3-pyzmq (= 18.1.0)" \ --verbose \ --python-package-name-prefix "python3"\ --python-bin "/usr/bin/python3" \ diff --git a/setup.py b/setup.py index f40169ce1b..1ea9601c11 100644 --- a/setup.py +++ b/setup.py @@ -62,6 +62,7 @@ def run(self): develop.run_command(self, command='pyzmq') develop.run(self) + setup( cmdclass={ 'install': InstallCommand, From e39ac08771fff99c84e66f7d42f4beaff8292543 Mon Sep 17 00:00:00 2001 From: Nemanja Patrnogic Date: Thu, 26 Sep 2019 13:58:21 +0200 Subject: [PATCH 082/100] [INDY-2213]: Update pyzmq for Jenkins CD Signed-off-by: Nemanja Patrnogic --- Jenkinsfile.cd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile.cd b/Jenkinsfile.cd index c41d9dc464..8105068a63 100644 --- a/Jenkinsfile.cd +++ b/Jenkinsfile.cd @@ -259,7 +259,7 @@ options.setBuiltPkgs([ 'python3-semver': '2.7.9', 'python3-pygments': '2.2.0', 'python3-psutil': '5.4.3', - 'python3-pyzmq': '17.0.0', + 'python3-pyzmq': '18.1.0', 'python3-intervaltree': '2.1.0', 'python3-jsonpickle': '0.9.6', 'python3-rocksdb': '0.6.9', From feacbab7c173663f6a76e3d570181ec70ff2c997 Mon Sep 17 00:00:00 2001 From: Andrew Nikitin Date: Mon, 30 Sep 2019 15:31:17 +0300 Subject: [PATCH 083/100] [RC-1.10.0] bump package version Signed-off-by: Andrew Nikitin --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index be535be52e..59adf7df7b 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 10, 0, "dev", 0] +[1, 10, 0, "rc", 1] From 816bb22e91733715ceec0297015805144a1904da Mon Sep 17 00:00:00 2001 From: Andrew Nikitin Date: Tue, 1 Oct 2019 11:11:37 +0300 Subject: [PATCH 084/100] [RC-1.10.0] MC Signed-off-by: Andrew Nikitin --- plenum/test/view_change/test_client_req_during_view_change.py | 2 +- setup.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/plenum/test/view_change/test_client_req_during_view_change.py b/plenum/test/view_change/test_client_req_during_view_change.py index ffde6f37d0..296b3f7b39 100644 --- a/plenum/test/view_change/test_client_req_during_view_change.py +++ b/plenum/test/view_change/test_client_req_during_view_change.py @@ -70,4 +70,4 @@ def test_client_msg_discard_in_view_change_with_request(test_node): def check_nack_msg(req_key, reason, to_client): assert "Client request is discarded since view " \ - "change is in progress" == reason +"change is in progress" == reason diff --git a/setup.py b/setup.py index 1ea9601c11..f7964b6420 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,6 @@ tests_require = ['pytest==3.3.1', 'pytest-xdist==1.22.1', 'python3-indy==1.11.0-dev-1282', 'pytest-asyncio==0.8.0'] - class PyZMQCommand(distutils.cmd.Command): description = 'pyzmq install target' From 98b51c9567c234bec497a506e2fcb510659fab6a Mon Sep 17 00:00:00 2001 From: ashcherbakov Date: Wed, 2 Oct 2019 16:27:41 +0300 Subject: [PATCH 085/100] Merge pull request #1356 from KitHat/INDY-2233 INDY-2233 -- Fix audit proof for GET_TXN Signed-off-by: Andrew Nikitin --- ci/ubuntu.dockerfile | 2 +- .../request_handlers/get_txn_handler.py | 13 +++-- plenum/test/bls/helper.py | 15 +++-- plenum/test/client/test_protocol_version.py | 6 +- plenum/test/pool_transactions/helper.py | 6 +- .../test_get_txn_after_bls_key_rotation.py | 56 +++++++++++++++++++ .../test_get_empty_txn_author_agreement.py | 7 +-- setup.py | 2 +- 8 files changed, 87 insertions(+), 20 deletions(-) create mode 100644 plenum/test/test_get_txn_after_bls_key_rotation.py diff --git a/ci/ubuntu.dockerfile b/ci/ubuntu.dockerfile index b540f2d1c2..7271d0b11c 100644 --- a/ci/ubuntu.dockerfile +++ b/ci/ubuntu.dockerfile @@ -10,7 +10,7 @@ RUN echo "To invalidate cache" RUN apt-get update -y && apt-get install -y \ python3-nacl \ libindy-crypto=0.4.5 \ - libindy=1.11.0~1282 \ + libindy=1.11.1~1343 \ # rocksdb python wrapper libbz2-dev \ zlib1g-dev \ diff --git a/plenum/server/request_handlers/get_txn_handler.py b/plenum/server/request_handlers/get_txn_handler.py index 2a99b92452..a3b01ef53f 100644 --- a/plenum/server/request_handlers/get_txn_handler.py +++ b/plenum/server/request_handlers/get_txn_handler.py @@ -1,5 +1,5 @@ from plenum.common.constants import DOMAIN_LEDGER_ID, DATA, TXN_TYPE, GET_TXN, MULTI_SIGNATURE, AUDIT_LEDGER_ID, \ - AUDIT_TXN_STATE_ROOT, TXN_PAYLOAD + AUDIT_TXN_STATE_ROOT, TXN_PAYLOAD, STATE_PROOF from plenum.common.exceptions import InvalidClientRequest from plenum.common.messages.node_messages import RequestNack, Reply from plenum.common.request import Request @@ -38,6 +38,8 @@ def get_result(self, request: Request): seq_no = request.operation.get(DATA) + multi_sig = None + try: txn = self.node.getReplyFromLedger(db.ledger, seq_no, write=False) audit_ledger = self.database_manager.get_ledger(AUDIT_LEDGER_ID) @@ -50,8 +52,6 @@ def get_result(self, request: Request): break if state_root is not None: multi_sig = self.database_manager.bls_store.get(state_root) - if multi_sig: - txn.result[MULTI_SIGNATURE] = multi_sig.as_dict() except KeyError: txn = None @@ -64,9 +64,14 @@ def get_result(self, request: Request): f.IDENTIFIER.nm: request.identifier, f.REQ_ID.nm: request.reqId, TXN_TYPE: request.operation[TXN_TYPE], - DATA: None + DATA: None, } + if multi_sig: + result[STATE_PROOF] = { + MULTI_SIGNATURE: multi_sig.as_dict() + } + if txn: result[DATA] = txn.result result[f.SEQ_NO.nm] = get_seq_no(txn.result) diff --git a/plenum/test/bls/helper.py b/plenum/test/bls/helper.py index ac90a05a8c..10f771a12a 100644 --- a/plenum/test/bls/helper.py +++ b/plenum/test/bls/helper.py @@ -124,7 +124,8 @@ def sdk_change_bls_key(looper, txnPoolNodeSet, add_wrong=False, new_bls=None, new_key_proof=None, - check_functional=True): + check_functional=True, + pool_refresh=True): if add_wrong: _, new_blspk, key_proof = create_default_bls_crypto_factory().generate_bls_keys() else: @@ -139,11 +140,13 @@ def sdk_change_bls_key(looper, txnPoolNodeSet, None, None, bls_key=key_in_txn, services=None, - key_proof=bls_key_proof) + key_proof=bls_key_proof, + pool_refresh=pool_refresh) poolSetExceptOne = list(txnPoolNodeSet) poolSetExceptOne.remove(node) waitNodeDataEquality(looper, node, *poolSetExceptOne) - sdk_pool_refresh(looper, sdk_pool_handle) + if pool_refresh: + sdk_pool_refresh(looper, sdk_pool_handle) if check_functional: sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) return new_blspk @@ -173,7 +176,8 @@ def check_update_bls_key(node_num, saved_multi_sigs_count, sdk_wallet_stewards, sdk_wallet_client, sdk_pool_handle, - add_wrong=False): + add_wrong=False, + pool_refresh=True): # 1. Change BLS key for a specified NODE node = txnPoolNodeSet[node_num] sdk_wallet_steward = sdk_wallet_stewards[node_num] @@ -181,7 +185,8 @@ def check_update_bls_key(node_num, saved_multi_sigs_count, node, sdk_pool_handle, sdk_wallet_steward, - add_wrong) + add_wrong, + pool_refresh=pool_refresh) # 2. Check that all Nodes see the new BLS key value check_bls_key(new_blspk, node, txnPoolNodeSet, add_wrong) diff --git a/plenum/test/client/test_protocol_version.py b/plenum/test/client/test_protocol_version.py index 6ade012e22..996efc71b1 100644 --- a/plenum/test/client/test_protocol_version.py +++ b/plenum/test/client/test_protocol_version.py @@ -6,7 +6,7 @@ from plenum.server.node import Node from plenum.common.constants import CURRENT_PROTOCOL_VERSION -from plenum.common.exceptions import RequestNackedException +from plenum.common.exceptions import RequestNackedException, CommonSdkIOException from plenum.test.helper import sdk_send_signed_requests, \ sdk_get_and_check_replies, sdk_random_request_objects, \ sdk_sign_request_objects, sdk_get_bad_response, sdk_send_random_and_check @@ -118,8 +118,8 @@ def test_request_with_invalid_version(looper, signed_reqs = sdk_sign_request_objects(looper, sdk_wallet_client, reqs_obj) reqs = sdk_send_signed_requests(sdk_pool_handle, signed_reqs) - sdk_get_bad_response(looper, reqs, RequestNackedException, - 'Unknown protocol version value. ' + error_msg) + sdk_get_bad_response(looper, reqs, CommonSdkIOException, + 'Got an error with code 113') def test_request_with_correct_version(looper, diff --git a/plenum/test/pool_transactions/helper.py b/plenum/test/pool_transactions/helper.py index cb02e2a9c4..ba0dc41896 100644 --- a/plenum/test/pool_transactions/helper.py +++ b/plenum/test/pool_transactions/helper.py @@ -317,7 +317,8 @@ def sdk_send_update_node(looper, sdk_submitter_wallet, client_ip, client_port, services=[VALIDATOR], bls_key=None, - key_proof=None): + key_proof=None, + pool_refresh=True): _, submitter_did = sdk_submitter_wallet # filling node request node_request = looper.loop.run_until_complete( @@ -338,7 +339,8 @@ def sdk_send_update_node(looper, sdk_submitter_wallet, # waitng for replies reply = sdk_get_and_check_replies(looper, [request_couple])[0][1] - sdk_pool_refresh(looper, sdk_pool_handle) + if pool_refresh: + sdk_pool_refresh(looper, sdk_pool_handle) return reply diff --git a/plenum/test/test_get_txn_after_bls_key_rotation.py b/plenum/test/test_get_txn_after_bls_key_rotation.py new file mode 100644 index 0000000000..0d8424af86 --- /dev/null +++ b/plenum/test/test_get_txn_after_bls_key_rotation.py @@ -0,0 +1,56 @@ +from plenum.test.bls.helper import check_update_bls_key +from indy import ledger +from indy.did import create_and_store_my_did + +from plenum.test.delayers import cDelay +from plenum.test.stasher import delay_rules, delay_rules_without_processing + +nodeCount = 4 + + +def test_get_txn_after_bls_key_rotation(looper, txnPoolNodeSet, + sdk_wallet_stewards, + sdk_wallet_trustee, + sdk_wallet_client, + sdk_pool_handle): + check_update_bls_key(node_num=0, saved_multi_sigs_count=4, + looper=looper, txnPoolNodeSet=txnPoolNodeSet, + sdk_wallet_stewards=sdk_wallet_stewards, + sdk_wallet_client=sdk_wallet_client, + sdk_pool_handle=sdk_pool_handle, + pool_refresh=False) + check_update_bls_key(node_num=1, saved_multi_sigs_count=4, + looper=looper, txnPoolNodeSet=txnPoolNodeSet, + sdk_wallet_stewards=sdk_wallet_stewards, + sdk_wallet_client=sdk_wallet_client, + sdk_pool_handle=sdk_pool_handle, + pool_refresh=False) + check_update_bls_key(node_num=2, saved_multi_sigs_count=4, + looper=looper, txnPoolNodeSet=txnPoolNodeSet, + sdk_wallet_stewards=sdk_wallet_stewards, + sdk_wallet_client=sdk_wallet_client, + sdk_pool_handle=sdk_pool_handle, + pool_refresh=False) + check_update_bls_key(node_num=3, saved_multi_sigs_count=4, + looper=looper, txnPoolNodeSet=txnPoolNodeSet, + sdk_wallet_stewards=sdk_wallet_stewards, + sdk_wallet_client=sdk_wallet_client, + sdk_pool_handle=sdk_pool_handle, + pool_refresh=False) + + # Stop receiving of commits in a circle, so all nodes will have different sets of multi signatures + with delay_rules_without_processing(txnPoolNodeSet[0].nodeIbStasher, cDelay(delay=1200, sender_filter=txnPoolNodeSet[3].name)): + with delay_rules_without_processing(txnPoolNodeSet[1].nodeIbStasher, cDelay(delay=1200, sender_filter=txnPoolNodeSet[0].name)): + with delay_rules_without_processing(txnPoolNodeSet[2].nodeIbStasher, cDelay(delay=1200, sender_filter=txnPoolNodeSet[1].name)): + with delay_rules_without_processing(txnPoolNodeSet[3].nodeIbStasher, cDelay(delay=1200, sender_filter=txnPoolNodeSet[2].name)): + did_future = create_and_store_my_did(sdk_wallet_client[0], "{}") + did, verkey = looper.loop.run_until_complete(did_future) + nym_request_future = ledger.build_nym_request(sdk_wallet_trustee[1], did, verkey, None, None) + nym_request = looper.loop.run_until_complete(nym_request_future) + nym_response_future = ledger.sign_and_submit_request(sdk_pool_handle, sdk_wallet_trustee[0], sdk_wallet_trustee[1], nym_request) + looper.loop.run_until_complete(nym_response_future) + + get_txn_request_future = ledger.build_get_txn_request(sdk_wallet_client[1], "DOMAIN", 1) + get_txn_request = looper.loop.run_until_complete(get_txn_request_future) + get_txn_response_future = ledger.submit_request(sdk_pool_handle, get_txn_request) + looper.loop.run_until_complete(get_txn_response_future) diff --git a/plenum/test/txn_author_agreement/test_get_empty_txn_author_agreement.py b/plenum/test/txn_author_agreement/test_get_empty_txn_author_agreement.py index dac292d849..310aadda0e 100644 --- a/plenum/test/txn_author_agreement/test_get_empty_txn_author_agreement.py +++ b/plenum/test/txn_author_agreement/test_get_empty_txn_author_agreement.py @@ -1,7 +1,8 @@ import pytest +from indy.error import CommonInvalidParam3 from plenum.common.constants import REPLY, CONFIG_LEDGER_ID -from plenum.common.exceptions import RequestNackedException +from plenum.common.exceptions import RequestNackedException, CommonSdkIOException from plenum.common.util import get_utc_epoch from plenum.test.delayers import req_delay from plenum.test.stasher import delay_rules @@ -59,7 +60,5 @@ def test_get_txn_author_agreement_works_on_clear_state(params, state_key, looper ]) def test_get_txn_author_agreement_cannot_have_more_than_one_parameter(params, looper, nodeSetWithoutTaa, sdk_pool_handle, sdk_wallet_client): - with pytest.raises(RequestNackedException) as e: + with pytest.raises(CommonInvalidParam3) as e: sdk_get_txn_author_agreement(looper, sdk_pool_handle, sdk_wallet_client, **params) - assert e.match("GET_TXN_AUTHOR_AGREEMENT request can have at most one " - "of the following parameters: version, digest, timestamp") diff --git a/setup.py b/setup.py index 9db1ed3c01..885ed7bb73 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ with open(metadata['__file__'], 'r') as f: exec(f.read(), metadata) -tests_require = ['pytest==3.3.1', 'pytest-xdist==1.22.1', 'python3-indy==1.11.0-dev-1282', 'pytest-asyncio==0.8.0', 'attrs==19.1.0'] +tests_require = ['attrs==19.1.0', 'pytest==3.3.1', 'pytest-xdist==1.22.1', 'python3-indy==1.11.1-dev-1343', 'pytest-asyncio==0.8.0'] class PyZMQCommand(distutils.cmd.Command): From 46e20795a9da4cffd2a34c669843bc99cc6a92a5 Mon Sep 17 00:00:00 2001 From: Sovbot Date: Thu, 3 Oct 2019 06:50:28 +0000 Subject: [PATCH 086/100] release 1.10.0 Signed-off-by: Sovbot --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index 59adf7df7b..0400650bd1 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 10, 0, "rc", 1] +[1, 10, 0, "", ""] From de28748c37a46e5e9da0db7ccaaff117cbdc53b1 Mon Sep 17 00:00:00 2001 From: Andrew Nikitin Date: Tue, 29 Oct 2019 17:04:34 +0300 Subject: [PATCH 087/100] [RC-1.11.0.rc1] bump package version Signed-off-by: Andrew Nikitin --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index 8984406e57..3795aa3cf2 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 11, 0, "dev", 0] +[1, 11, 0, "rc", 1] From 5c7788c461c820a49e8583aeb566df49baf5773a Mon Sep 17 00:00:00 2001 From: Sovbot Date: Wed, 30 Oct 2019 06:47:47 +0000 Subject: [PATCH 088/100] release 1.11.0 Signed-off-by: Sovbot --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index 3795aa3cf2..1f591976a6 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 11, 0, "rc", 1] +[1, 11, 0, "", ""] From d7e0b45c2343fc5f66f4a95d6b19421fad911106 Mon Sep 17 00:00:00 2001 From: Andrew Nikitin Date: Tue, 26 Nov 2019 17:02:33 +0300 Subject: [PATCH 089/100] [RC-1.12.0.rc1] bump package version to 1.12.0.rc1 Signed-off-by: Andrew Nikitin --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index a5530c59f2..dd13031d49 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 12, 0, "dev", 0] +[1, 12, 0, "rc", 1] From 0c314a1f9c61a36e220587719c10d184c46f1b90 Mon Sep 17 00:00:00 2001 From: Andrew Nikitin Date: Thu, 28 Nov 2019 10:31:49 +0300 Subject: [PATCH 090/100] [RC-1.12.0.rc1] add new seed to exception set Signed-off-by: Andrew Nikitin --- plenum/test/consensus/view_change/test_sim_view_change.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/test/consensus/view_change/test_sim_view_change.py b/plenum/test/consensus/view_change/test_sim_view_change.py index 64ef54d0b7..954b665af0 100644 --- a/plenum/test/consensus/view_change/test_sim_view_change.py +++ b/plenum/test/consensus/view_change/test_sim_view_change.py @@ -38,7 +38,7 @@ def default_random(request): def random_random(request): seed = request.param # TODO: Remove after starting processing INSTANCE_CHANGE messages in simulation tests - if seed in {290370, 749952, 348636, 919685, 674863, 378187, 20271}: + if seed in {290370, 749952, 348636, 919685, 674863, 378187, 20271, 968600}: return DefaultSimRandom(0) return DefaultSimRandom(seed) From fd3f5b55069562a0ba00d122a2defc55cb878c08 Mon Sep 17 00:00:00 2001 From: Sovbot Date: Thu, 28 Nov 2019 08:57:58 +0000 Subject: [PATCH 091/100] release 1.12.0 Signed-off-by: Sovbot --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index dd13031d49..59c9a0b5ed 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 12, 0, "rc", 1] +[1, 12, 0, "", ""] From 318106f48dbcfa641b231313b4cdbcaa1b2b2443 Mon Sep 17 00:00:00 2001 From: ashcherbakov Date: Thu, 26 Dec 2019 10:17:36 +0300 Subject: [PATCH 092/100] bump release version Signed-off-by: ashcherbakov --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index a5530c59f2..84cc7a87f0 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 12, 0, "dev", 0] +[1, 12, 1, "rc", 1] From 6120d7cb9009abee346f7c876ff246466c33960e Mon Sep 17 00:00:00 2001 From: Sovbot Date: Thu, 26 Dec 2019 11:47:27 +0000 Subject: [PATCH 093/100] release 1.12.1 Signed-off-by: Sovbot --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index 84cc7a87f0..5037ae0556 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 12, 1, "rc", 1] +[1, 12, 1, "", ""] From 5d8705906c925378148b9c0a92a25f0d0642e94a Mon Sep 17 00:00:00 2001 From: Andrew Nikitin Date: Mon, 27 Jan 2020 10:55:51 +0300 Subject: [PATCH 094/100] [RC1-1.12.2] bump package version Signed-off-by: Andrew Nikitin --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index 1c0a1a3a8c..aca01bd3af 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 12, 2, "dev", 0] +[1, 12, 2, "rc", 1] From 5b16b9a2fe047b70193a00911e9713fb0139f043 Mon Sep 17 00:00:00 2001 From: Sovbot Date: Mon, 27 Jan 2020 10:07:14 +0000 Subject: [PATCH 095/100] release 1.12.2 Signed-off-by: Sovbot --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index aca01bd3af..643deacef0 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 12, 2, "rc", 1] +[1, 12, 2, "", ""] From 1c41f38df712afa66ced1e0b9e1d4ba2a4fbd101 Mon Sep 17 00:00:00 2001 From: toktar Date: Sun, 24 May 2020 06:41:43 +0300 Subject: [PATCH 096/100] Add validations for TAA_ACCEPTANCE_TIME and timestamps in the TAA txn Signed-off-by: toktar (cherry picked from commit dbbc0caa8f6c065f9248d2c1deeb990268247c1e) Signed-off-by: toktar --- .../txn_author_agreement_handler.py | 17 +++++++++++++- .../request_managers/write_request_manager.py | 7 +++++- .../test_taa_acceptance_validation.py | 15 ++++++++++++ .../test_txn_author_agreement.py | 23 +++++++++++++++++++ 4 files changed, 60 insertions(+), 2 deletions(-) diff --git a/plenum/server/request_handlers/txn_author_agreement_handler.py b/plenum/server/request_handlers/txn_author_agreement_handler.py index 5c8a829e8c..c7fb6d11c2 100644 --- a/plenum/server/request_handlers/txn_author_agreement_handler.py +++ b/plenum/server/request_handlers/txn_author_agreement_handler.py @@ -1,3 +1,4 @@ +from datetime import datetime from typing import Optional from common.exceptions import LogicError @@ -19,7 +20,12 @@ def __init__(self, database_manager: DatabaseManager): super().__init__(database_manager, TXN_AUTHOR_AGREEMENT, CONFIG_LEDGER_ID) def static_validation(self, request: Request): - pass + self._validate_request_type(request) + operation, identifier, req_id = request.operation, request.identifier, request.reqId + self._validate_ts(operation.get(TXN_AUTHOR_AGREEMENT_RETIREMENT_TS), + identifier, req_id, TXN_AUTHOR_AGREEMENT_RETIREMENT_TS) + self._validate_ts(operation.get(TXN_AUTHOR_AGREEMENT_RATIFICATION_TS), + identifier, req_id, TXN_AUTHOR_AGREEMENT_RATIFICATION_TS) def dynamic_validation(self, request: Request, req_pp_time: Optional[int]): self._validate_request_type(request) @@ -134,3 +140,12 @@ def _validate_update_taa(self, request, digest): if last_taa_digest == digest: raise InvalidClientRequest(request.identifier, request.reqId, "The latest transaction author agreement cannot be retired.") + + def _validate_ts(self, ts, identifier, req_id, field_name): + if not ts: + return + try: + datetime.utcfromtimestamp(ts) + except ValueError: + raise InvalidClientRequest(identifier, req_id, + "{} = {} is out of range.".format(field_name, ts)) diff --git a/plenum/server/request_managers/write_request_manager.py b/plenum/server/request_managers/write_request_manager.py index 37c6768377..bf2c796703 100644 --- a/plenum/server/request_managers/write_request_manager.py +++ b/plenum/server/request_managers/write_request_manager.py @@ -346,7 +346,12 @@ def do_taa_validation(self, request: Request, req_pp_time: int, config): ) r_taa_a_ts = request.taaAcceptance[f.TAA_ACCEPTANCE_TIME.nm] - datetime_r_taa = datetime.utcfromtimestamp(r_taa_a_ts) + try: + datetime_r_taa = datetime.utcfromtimestamp(r_taa_a_ts) + except ValueError: + raise InvalidClientTaaAcceptanceError( + request.identifier, request.reqId, + "TAA_ACCEPTANCE_TIME = {} is out of range".format(r_taa_a_ts)) if datetime_r_taa.time() != time(0): raise InvalidClientTaaAcceptanceError( request.identifier, request.reqId, diff --git a/plenum/test/txn_author_agreement/acceptance/test_taa_acceptance_validation.py b/plenum/test/txn_author_agreement/acceptance/test_taa_acceptance_validation.py index 68ed60ca41..2a400b4638 100644 --- a/plenum/test/txn_author_agreement/acceptance/test_taa_acceptance_validation.py +++ b/plenum/test/txn_author_agreement/acceptance/test_taa_acceptance_validation.py @@ -106,6 +106,21 @@ def test_taa_acceptance_mechanism_inappropriate( validate_taa_acceptance(request_dict) +def test_taa_acceptance_with_incorrect_time( + validate_taa_acceptance, validation_error, + request_dict +): + request_dict[f.TAA_ACCEPTANCE.nm][f.TAA_ACCEPTANCE_TIME.nm] *= 1000 + with pytest.raises( + validation_error, + match=( + r"TAA_ACCEPTANCE_TIME = {} is " + r"out of range.".format(request_dict[f.TAA_ACCEPTANCE.nm][f.TAA_ACCEPTANCE_TIME.nm]) + ) + ): + validate_taa_acceptance(request_dict) + + def test_taa_acceptance_time_near_lower_threshold( tconf, txnPoolNodeSet, validate_taa_acceptance, validation_error, turn_off_freshness_state_update, max_last_accepted_pre_prepare_time, diff --git a/plenum/test/txn_author_agreement/test_txn_author_agreement.py b/plenum/test/txn_author_agreement/test_txn_author_agreement.py index 31d32dd91e..c0f36ae484 100644 --- a/plenum/test/txn_author_agreement/test_txn_author_agreement.py +++ b/plenum/test/txn_author_agreement/test_txn_author_agreement.py @@ -97,6 +97,29 @@ def test_create_txn_author_agreement_with_ratified_from_future_fails(looper, set ratified=get_utc_epoch() + 600) +def test_create_txn_author_agreement_with_milliseconds_ratified_fails(looper, set_txn_author_agreement_aml, + sdk_pool_handle, sdk_wallet_trustee): + ratified = get_utc_epoch() * 1000 + with pytest.raises(RequestNackedException, + match="{} = {} is out of range.".format(TXN_AUTHOR_AGREEMENT_RATIFICATION_TS, ratified)): + sdk_send_txn_author_agreement(looper, sdk_pool_handle, sdk_wallet_trustee, + version=randomString(16), + text=randomString(1024), + ratified=ratified) + + +def test_create_txn_author_agreement_with_milliseconds_retired_fails(looper, set_txn_author_agreement_aml, + sdk_pool_handle, sdk_wallet_trustee): + retired = get_utc_epoch() * 1000 + with pytest.raises(RequestNackedException, + match="{} = {} is out of range.".format(TXN_AUTHOR_AGREEMENT_RETIREMENT_TS, retired)): + sdk_send_txn_author_agreement(looper, sdk_pool_handle, sdk_wallet_trustee, + version=randomString(16), + text=randomString(1024), + ratified=get_utc_epoch() - 600, + retired=retired) + + @pytest.mark.parametrize('retired_offset', [-600, 600]) def test_create_txn_author_agreement_with_retired_date_fails(looper, set_txn_author_agreement_aml, sdk_pool_handle, sdk_wallet_trustee, From be90d21b277c0c55c1969965784ea718918a2dd4 Mon Sep 17 00:00:00 2001 From: toktar Date: Sun, 24 May 2020 23:43:39 +0300 Subject: [PATCH 097/100] [RC1-1.12.3] bump package version Signed-off-by: toktar --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index 643deacef0..70d610a677 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 12, 2, "", ""] +[1, 12, 3, "rc", 1] From a8443ff247d3d9e0e23e357ddac818d097b36640 Mon Sep 17 00:00:00 2001 From: Sovbot Date: Tue, 26 May 2020 15:14:20 +0000 Subject: [PATCH 098/100] release 1.12.3 Signed-off-by: Sovbot --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index 70d610a677..e750151d05 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 12, 3, "rc", 1] +[1, 12, 3, "", ""] From 52735f3fb3c36a80713bcd7fd4a37b055ce828d2 Mon Sep 17 00:00:00 2001 From: Wade Barnes Date: Tue, 11 Aug 2020 08:54:39 -0700 Subject: [PATCH 099/100] [RC1-1.12.4] bump package version Signed-off-by: Wade Barnes --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index e750151d05..05b57ab7f7 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 12, 3, "", ""] +[1, 12, 4, "rc", 1] From cbc4f74d286988ef66583e012f36d97c48d9272d Mon Sep 17 00:00:00 2001 From: Sovbot Date: Wed, 12 Aug 2020 10:05:08 +0000 Subject: [PATCH 100/100] release 1.12.4 Signed-off-by: Sovbot --- plenum/__version__.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plenum/__version__.json b/plenum/__version__.json index 05b57ab7f7..4e1f762dbc 100644 --- a/plenum/__version__.json +++ b/plenum/__version__.json @@ -1 +1 @@ -[1, 12, 4, "rc", 1] +[1, 12, 4, "", ""]