forked from IntersectMBO/cardano-ledger
-
Notifications
You must be signed in to change notification settings - Fork 0
/
binary.tex
582 lines (476 loc) · 17.1 KB
/
binary.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
\documentclass{article}
\usepackage{amssymb}
\usepackage{amsmath}
\usepackage{appendix}
\usepackage{float}
\usepackage{enumitem}
\usepackage{syntax}
\usepackage{hyperref}
\usepackage{listings}
\usepackage{microtype}
\usepackage[a4paper]{geometry}
\floatstyle{boxed}
\restylefloat{figure}
\setlength{\grammarindent}{7em}
%% Define a'' `field' separator for records.
\def\fld{\\\llap{,\quad}}%
\newlist{record}{description}{3}
\setlist[record]{itemsep=0pt, format=$-$\bfseries}
\newcommand{\rhu}{\rightharpoonup}
\newcommand{\N}{\mathbb{N}}
\newcommand{\lists}[1]{\left[{#1}\right]}
\newcommand{\nelists}[1]{\left[{#1}\right]^+}
\newcommand{\sets}[1]{\{{#1}\}}
\newcommand{\maybe}[1]{$()$ | {#1}}
\newcommand{\idsof}[1]{\mathcal{I}\!_#1}
\newcommand{\txids}{\idsof{\txs}}
\newcommand{\blockids}{\idsof{\blocks}}
\newcommand{\agentids}{\idsof{\agents}}
\newcommand{\epochids}{\idsof{\mathcal{E}}}
\newcommand{\slotids}{\idsof{\mathcal{S}}}
\newcommand{\updids}{\idsof{\mathcal{UP}}}
\newcommand{\stakeholderids}{\idsof{\mathcal{K}}}
\newcommand{\pubkey}{\chi_{\textbf{pub}}}
\newcommand{\signature}{\chi_{\textbf{sig}}}
\newcommand{\vsspubkey}{\mathcal{VSS}_{\chi_{\textbf{pub}}}}
\newcommand{\vssproof}{\mathcal{VSS}_{\textbf{proof}}}
\newcommand{\vsssec}{\mathcal{VSS}_{\textbf{sec}}}
\newcommand{\vssencshare}{\mathcal{VSS}_{\textbf{enc}}}
\newcommand{\vssdecshare}{\mathcal{VSS}_{\textbf{dec}}}
\newcommand{\agents}{\mathcal{A}}
\newcommand{\txs}{\mathcal{T}}
\newcommand{\txins}{\txs_{\textbf{in}}}
\newcommand{\txfeepol}{\txs_{\textbf{feepol}}}
\newcommand{\txpf}{\txs_{\textbf{proof}}}
\newcommand{\twit}{\mathcal{TW}}
\newcommand{\twitpk}{\twit_{\textbf{pk}}}
\newcommand{\twitscript}{\twit_{\textbf{script}}}
\newcommand{\twitredeem}{\twit_{\textbf{redeem}}}
\newcommand{\ssc}{\mathcal{SSC}}
\newcommand{\ssccomm}{\ssc_{\textbf{comm}}}
\newcommand{\sscopen}{\ssc_{\textbf{open}}}
\newcommand{\sscshare}{\ssc_{\textbf{share}}}
\newcommand{\ssccert}{\ssc_{\textbf{cert}}}
\newcommand{\sscpf}{\ssc_{\textbf{proof}}}
\newcommand{\dlg}{\mathcal{DLG}}
\newcommand{\dlglight}{\dlg^{\textbf{l}}}
\newcommand{\proxysig}{\dlg_{\textbf{sig}}}
\newcommand{\proxysiglight}{\dlg^{\textbf{l}}_{\textbf{sig}}}
\newcommand{\upd}{\mathcal{UP}}
\newcommand{\updprop}{\upd_{\textbf{prop}}}
\newcommand{\upddata}{\upd_{\textbf{data}}}
\newcommand{\updvote}{\upd_{\textbf{vote}}}
\newcommand{\blocks}{\mathcal{B}}
\newcommand{\blockver}{\blocks_{\textbf{ver}}}
\newcommand{\blockvermod}{\blocks_{\textbf{ver}\Delta}}
\newcommand{\bbodies}{\blocks_{\textbf{body}}}
\newcommand{\bheads}{\blocks_{\textbf{head}}}
\newcommand{\bheadpf}{\blocks_{\textbf{proof}}}
\newcommand{\bheadcons}{\blocks_{\textbf{cons}}}
\newcommand{\bheadex}{\blocks_{\textbf{headex}}}
\newcommand{\blocksig}{\blocks_{\textbf{sig}}}
\newcommand{\blockpf}{\blocks_{\textbf{proof}}}
\newcommand{\ebb}{\mathcal{B}^\prime}
\newcommand{\ebbhead}{\ebb_{\textbf{head}}}
\newcommand{\ebbheadcons}{\ebb_{\textbf{cons}}}
\newcommand{\hstype}[1]{\textbf{#1}}
\newcommand{\String}{\hstype{String}}
\newcommand{\Word}[1]{\hstype{Word#1}}
\newcommand{\hash}{\hstype{Hash}}
\newcommand{\Integer}{\mathbb{I}}
\newcommand{\Bool}{\mathbb{B}}
\newcommand{\Bytes}{\hstype{Bytes}}
\title{Cardano on-the-wire specification}
\author{Nicholas Clarke}
\begin{document}
\maketitle
\tableofcontents
\section{Introduction}
This document describes the binary serialisation formats used for the Cardano
blockchain.
It proceeds as follows: section \ref{sec:types} describes the core types which
must be serialised for the purposes of Cardano communication. We then detail the
requirements on the binary format in \ref{sec:reqs}. In section \ref{sec:binfmt}
we describe the explicit binary serialisation of the core types satisfying these
requirements.
Appendix \ref{sec:currentfmt} details the current binary format.
\subsection{Notation}
\begin{description}
\item[Lists] Lists of type $A$ are written as $\lists{A}$.
\item[Non-empty lists] Non-empty lists of type $A$ are written as $\nelists{A}$.
\item[Sets] Sets of type $A$ are written as $\sets{A}$.
\item[Maps] A map from items of type $A$ to items of type $B$ is written as
$A\rhu B$.\footnote{We use this terminology for consistency with
\cite{spiwack}, where these are partial functions.}
\item[Records] We denote records as:
\begin{grammar}
<record> = \{
\fld fieldName :: type
\fld fieldName2 :: type2
\}
\end{grammar}
\item[Variants] We denote variants as:
\begin{grammar}
<variant> = Option1 \alt Option2 \alt Option3.
\end{grammar}
\item[Optional values] We denote an optional value of type $A$ as $\maybe{A}$
\end{description}
We will use capitalised words for variant constructors and lower-case
terms for field names, as per standard Haskell syntax.
\subsection{Relation to the abstract specification and to the Cardano implementation}
This document straddles the middle-ground between the rule-based specification
given in \cite{spiwack} and the concrete implementation at \cite{cardano}. As
such, it tries to compromise between the differing presentations of how the
blockchain is constructed.
In general, we take the following approach:
\begin{itemize}
\item Where possible, we take our nomenclature from \cite{spiwack}. So, for
example, we refer to the type of transactions as $\txs$ rather than
$\textbf{Tx}$. Where there is no name given in \cite{spiwack} we generally
invent a new identifier in the similar style.
\item Structure, on the other hand, typically derives from the code in
\cite{cardano}, with some exceptions:
\begin{itemize}
\item We drop \verb|newtype| wrappers.
\item We flatten nested records where doing so would not lead to significant
confusion (for example, by requiring convoluted or length names to
contextualise fields).
\item In general, we tend to be fairly free with translating between
isomorphic representations where it increases the readability of this spec.
\item We monomorphize any polymorphic variables to the type they are
instantiated to when they are serialised.
\item We ignore \verb|AsBinary| and its ilk. These provide `typed' wrappers
around \verb|ByteString| in order to defer encoding/decoding. They only
confuse an abstract definition.
\end{itemize}
\end{itemize}
\section{Core datatypes}
\label{sec:types}
The types in this section are derived from a combination of the blockchain spec
and Haskell datatypes in the codebase.
\subsection{Primitives}
Primitive types:
\begin{description}
\item [$\Bool$] Boolean value
\item [$\Word{8}$] 8-bit word
\item [$\Word{32}$] 32-bit word
\item [$\Word{64}$] 64-bit word
\item [$\String$] Arbitrary UTF8-encoded string type. We do not draw any
distinctions as to whether this is implemented as a $\hstype{String}$ or
$\hstype{Text}$.
\item [$\Bytes$] Arbitrary string of bytes.
\item [$\Integer$] arbitrary precision integers.
\end{description}
\subsection{Basic Cardano Types}
We start with sets of identifiers. These are represented in code as Blake2b-256
hashes. We also sometimes deal with these hashes directly.
\begin{itemize}
\item{Transaction identifiers $\txids$}
\item{Block identifiers $\blockids$}
\item Update identifiers $\updids$
\item Concrete hashes $\hash$
\end{itemize}
We also have addresses, which are represented as Blake2b-224 hashes.
\begin{itemize}
\item{Address (agent) identifiers $\agentids$}
\item Stakeholder identifiers $\stakeholderids$
\end{itemize}
Epochs are identified by their index as a $\Word{64}$. Slots are identified as a
pair of epoch identifier and a $\Word{16}$ index within the slot.
\begin{itemize}
\item Epoch indices $\epochids$
\item Slot indices $\slotids$
\end{itemize}
Cardano public keys are elliptic curve DH keys on curve 25519.
\begin{itemize}
\item Public keys $\pubkey$
\item Signatures $\signature$
\end{itemize}
\subsection{Addresses}
We have a set of addresses $\agents$. An address has an identifier, a type, and
some additional data held in an \verb|attributes| field. In particular, each
address contains a stake distribution $\agents_{\textbf{distr}}$.
\begin{figure}[H]
\begin{grammar}
<$\agents_{\textbf{distr}}$> = Bootstrap
\alt SingleKey $\agentids$
\alt MultiKey $\agentids \rhu \Word{64}$.
<$\agents_{\textbf{type}}$> = PubKey \alt Script \alt Redeem \alt Unknown $\Word{8}$.
<$\agents_{\textbf{attr}}$> = \{
\fld pkDerivationPath :: $\maybe{\Bytes}$
\fld stakeDistr :: $\agents_{\textbf{distr}}$
\fld unparsed :: $\Word{8}\rhu\Bytes$
\}.
<$\agents$> = \{
\fld root :: $\agentids$
\fld attributes :: $\agents_{\textbf{attr}}$
\fld type :: $\agents_{\textbf{type}}$
\}.
\end{grammar}
\caption{Address Types}
\end{figure}
\subsection{Transactions}
We have a set of transactions $\txs$.
\begin{figure}[H]
\caption{Transactions}
\label{fig:txin}
\begin{grammar}
<$\txins$> = Valid $\txids\times\Word{32}$.
\alt Invalid $\Word{8}\times\Bytes$.
<$\txs$> = \{
\fld inputs :: $\lists{\txins}$
\fld outputs :: $\nelists{\agentids \times \Word{64}}$
\}
<$\txpf$> = \{
\fld txCount :: $\Word{32}$
\fld merkleRoot :: $\hash$
\fld witnessHash :: $\hash$
\}
\end{grammar}
\end{figure}
We also have transaction witnesses, which provide proof that the transaction has
authority to spend its inputs.
\begin{figure}[H]
\caption{Transaction witnesses}
\begin{grammar}
<$\twitpk$> = \{
\fld key :: $\pubkey$
\fld signature :: $\signature$
\}
<$\twitscript$> = \{
\fld validator :: $\Word{16}\times\Bytes$
\fld redeemer :: $\Word{16}\times\Bytes$
\}
<$\twitredeem$> = \{
\fld key :: $\pubkey$
\fld signature :: $\signature$
\}
<$\twit$> = PublicKey $\twitpk$
\alt Script $\twitscript$
\alt Redeem $\twitredeem$
\alt Unknown $\Word{8}$ \Bytes
\end{grammar}
\end{figure}
\subsection{Shared Seed Computation}
Shared seed computation deals with commitments, openings, shares and
certificates.
We start with some basic types used for VSS. VSS uses its own public key
cryptography scheme.
\begin{itemize}
\item VSS public keys $\vsspubkey$
\item VSS secret $\vsssec$
\item VSS encrypted and decrypted shares $\vssencshare$ and $\vssdecshare$
\item VSS secret proof $\vssproof$
\end{itemize}
\begin{figure}[H]
\caption{Shared seed computation}
\begin{grammar}
<$\ssccomm$> = $\pubkey \times ((\vsspubkey \rhu \vssencshare) \times \vssproof)
\times \signature$
<$\sscopen$> = $\stakeholderids \rhu \vsssec$
<$\sscshare$> = $\stakeholderids \rhu (\stakeholderids \rhu \nelists{\vssdecshare})$
<$\ssccert$> = \{
\fld vsskey :: $\vsspubkey$
\fld signingKey :: $\pubkey$
\fld expiry :: $\epochids$
\fld signature :: $\signature$
\}
<$\ssc$> = Commitments $\lists{\ssccomm} \times \lists{\ssccert}$
\alt Openings $\sscopen \times \lists{\ssccert}$
\alt Shares $\sscshare \times \lists{\ssccert}$
\alt Certificates $\lists{\ssccert}$
<$\sscpf$> = CommitmentsProof $\hash \times \hash$
\alt OpeningsProof $\hash \times \hash$
\alt SharesProof $\hash \times \hash$
\alt CertificatesProof $\hash$
\end{grammar}
\end{figure}
\subsection{Delegation}
The heavyweight delegation system is used to allow stakeholders to authorise
other parties to issue blocks on their behalf.
\begin{figure}[H]
\begin{grammar}
<$\dlg$> = \{
\fld epoch :: $\epochids$
\fld issuer :: $\pubkey$
\fld delegate :: $\pubkey$
\fld certificate :: $\signature$
\}
<$\proxysig$> = \{
\fld psk :: $\dlg$
\fld sig :: $\signature$
\}
<$\dlglight$> = \{
\fld epoch :: $\epochids\times\epochids$
\fld issuer :: $\pubkey$
\fld delegate :: $\pubkey$
\fld certificate :: $\signature$
\}
<$\proxysiglight$> = \{
\fld psk :: $\dlglight$
\fld sig :: $\signature$
\}
\end{grammar}
\caption{Delegation}
\label{fig:dlgtypes}
\end{figure}
\subsection{Updates}
This section covers the types used to orchestrate updates of the system. We also
introduce the block version and transaction fee policy in this section, since
they are only serialised as part of the update system.
\begin{figure}[H]
\begin{grammar}
<$\blockver$> = $\Word{16}\times\Word{16}\times\Word{8}$
<$\txfeepol$> = Linear $\Integer$ $\Integer$
\alt Unknown \Word{8} \Bytes
<$\blockvermod$> = \{
\fld scriptVersion :: $\maybe{\Word{16}}$
\fld slotDuration :: $\maybe{\Integer}$
\fld maxBlockSize :: $\maybe{\Integer}$
\fld maxHeaderSize :: $\maybe{\Integer}$
\fld maxTxSize :: $\maybe{\Integer}$
\fld maxProposalSize :: $\maybe{\Integer}$
\fld mpcThd :: $\maybe{\Word{64}}$
\fld heavyDelThd :: $\maybe{\Word{64}}$
\fld updateVoteThd :: $\maybe{\Word{64}}$
\fld updateProposalThd :: $\maybe{\Word{64}}$
\fld updateImplicit :: $\maybe{\Word{64}}$
\fld softforkRule :: $\maybe{(\Word64\times\Word64\times\Word64)}$
\fld txFeePolicy :: $\maybe{\txfeepol}$
\fld unlockStakeEpoch :: $\maybe{\epochids}$
\}
<$\upddata$> = \{
\fld appDiffHash :: \hash
\fld pkgHash :: \hash
\fld updaterHash :: \hash
\fld mdHash :: \hash
\}
<$\updprop$> = \{
\fld blockVersion :: $\blockver$
\fld blockVersionMod :: $\blockvermod$
\fld softwareVersion :: $\String \times \Word{32}$
\fld data :: $\String \rhu \upddata$
\fld attributes :: $\Word{8} \rhu \Bytes$
\fld from :: $\pubkey$
\fld signature :: $\signature$
\}
<$\updvote$> = \{
\fld voter :: $\pubkey$
\fld proposalId :: $\updids$
\fld vote :: $\Bool$
\fld signature :: $\signature$
\}
<$\upd$> = \{
\fld proposal :: $\maybe{\updprop}$
\fld votes :: $\lists{\updvote}$
\}
\end{grammar}
\caption{Updates}
\label{fig:updtypes}
\end{figure}
\subsection{Blocks}
A block consists of a block header and a block body. The block header consists
of verification for the various components in the block body.
\begin{figure}[H]
\caption{Blocks}
\label{fig:block}
\begin{grammar}
<$\blocksig$> = Signature $\signature$
\alt ProxySigLight $\proxysiglight$
\alt ProxySigHeavy $\proxysig$
<$\bheadcons$> = \{
\fld slotId :: $\slotids$
\fld leaderKey :: $\pubkey$
\fld difficulty :: $\Word{64}$
\fld signature :: $\blocksig$
\}
<$\bheadex$> = \{
\fld blockVersion :: $\blockver$
\fld softwareVersion :: $\String \times \Word{32}$
\fld extraProof :: $\hash$
\fld attributes :: $\String \rhu \emptyset$
\}
<$\blockpf$> = \{
\fld txProof :: $\txpf$
\fld sscProof :: $\sscpf$
\fld dlgProof :: $\hash$
\fld updProof :: $\hash$
\}
<$\bheads$> = \{
\fld prevBlock :: $\blockids$
\fld bodyProof :: $\blockpf$
\fld consensusData :: $\bheadcons$
\fld extraData :: $\bheadex$
\}
<$\bbodies$> = \{
\fld txPayload :: $\lists{\txs\times\lists{\twit}}$
\fld sscPayload :: $\ssc$
\fld dlgPayload ::$\lists{\dlg}$
\fld updPayload :: $\upd$
\}
<$\blocks$> = \{
\fld header :: $\bheads$
\fld body :: $\bbodies$
\fld extra :: $\Word{8} \rhu \Bytes$
\}
\end{grammar}
\end{figure}
\subsubsection{Epoch Boundary Blocks}
In addition to regular blocks, epoch boundary blocks contain a list of slot
leaders for a given epoch. They are not conventionally distributed as part of
the blockchain, but can be requested as part of catch-up and as such form part
of the on-the-wire protocol.
\begin{figure}[H]
\begin{grammar}
<$\ebbheadcons$> = \{
\fld epoch :: $\epochids$
\fld chainDifficulty :: $\Word{64}$
\}
<$\ebbhead$> = \{
\fld prevBlock :: $\blockids$
\fld bodyProof :: $\hash$
\fld consensusData :: $\ebbheadcons$
\fld extraData :: $\Word{8} \rhu \Bytes$
\}
<$\ebb$> = \{
\fld header :: $\ebbhead$
\fld body :: $\nelists{\stakeholderids}$
\fld extra :: $\Word{8} \rhu \Bytes$
\}
\end{grammar}
\caption{Epoch Boundary Blocks}
\label{fig:ebb}
\end{figure}
\section{Requirements on the binary format}
\label{sec:reqs}
\subsection{Cryptographic properties}
\subsection{Dependencies on the binary format}
\section{Binary specification}
\label{sec:binfmt}
\begin{thebibliography}{9}
\bibitem{spiwack}
\textit{Rule-based specification of the blockchain logic}.
Erik de Castro Lopo, Nicholas Clarke \& Arnaud Spiwack.
\bibitem{cardano}
\textit{Cryptographic currency implementing Ouroboros PoS protocol}.
\url{https://github.com/input-output-hk/cardano-sl/}
\bibitem{cbor}
\textit{RFC 7049 Concise Binary Object Representation}.
\url{http://cbor.io}
\bibitem{cddl}
\textit{Concise data definition language (CDDL): a notational
convention to express CBOR data structures}
\url{https://tools.ietf.org/html/draft-ietf-cbor-cddl-00}
\end{thebibliography}
\begin{appendices}
\section{Current binary format}
\label{sec:currentfmt}
This section documents the current binary serialisation format, as of
2018-06-03.
The current blockchain is serialized using CBOR\cite{cbor}. Consequently we
present the current description as a CDDL\cite{cddl} document. Terms in use
below should be interpreted in that context and with reference to Appendix E
of \cite{cddl}, which defines a standard prelude available to such things.
\lstinputlisting{byron.cddl}
\end{appendices}
\end{document}