From 1d774e7545c729adff4b3bd3798996ee61755a77 Mon Sep 17 00:00:00 2001 From: Markus Bilz Date: Thu, 9 Feb 2023 18:03:54 +0100 Subject: [PATCH] =?UTF-8?q?Add=20chapter=20on=20tick=20rule=20=F0=9F=94=A2?= =?UTF-8?q?=20(#152)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses #10 --- references/obsidian/.obsidian/graph.json | 2 +- references/obsidian/.obsidian/workspace.json | 110 +++--------------- references/obsidian/IW-Queues/IW-Queue.md | 10 +- ...lection of supervised approaches notes.md" | 3 - .../\360\237\224\242Basic rules notes.md" | 4 + .../\360\237\224\242CLNV method notes.md" | 16 +++ .../\360\237\224\242Hybrid rules notes.md" | 15 --- .../\360\237\224\242Tick test notes.md" | 23 +++- .../\360\237\224\242CLNV method.md" | 4 +- .../\360\237\224\242Depth rule.md" | 4 +- .../\360\237\224\242EMO rule.md" | 4 +- .../\360\237\224\242LR algorithm.md" | 85 ++++++++++++++ .../\360\237\224\242Quote rule.md" | 3 + .../\360\237\224\242Tick test.md" | 37 +++++- .../\360\237\224\242Trade Size Rule.md" | 2 +- .../\360\237\244\226Transformer.md" | 2 + "references/obsidian/\360\237\227\277TOC.md" | 10 +- reports/Content/main.tex | 107 ++++++----------- reports/thesis.tex | 41 ------- 19 files changed, 230 insertions(+), 252 deletions(-) diff --git a/references/obsidian/.obsidian/graph.json b/references/obsidian/.obsidian/graph.json index 7d802530..433448b9 100644 --- a/references/obsidian/.obsidian/graph.json +++ b/references/obsidian/.obsidian/graph.json @@ -17,6 +17,6 @@ "repelStrength": 10, "linkStrength": 1, "linkDistance": 250, - "scale": 0.3823010968769974, + "scale": 1.476986098068651, "close": true } \ No newline at end of file diff --git a/references/obsidian/.obsidian/workspace.json b/references/obsidian/.obsidian/workspace.json index 05caed77..9f7173e1 100644 --- a/references/obsidian/.obsidian/workspace.json +++ b/references/obsidian/.obsidian/workspace.json @@ -3,111 +3,29 @@ "id": "550952c7dea0cc23", "type": "split", "children": [ - { - "id": "9d9a45cd3aa297af", - "type": "tabs", - "dimension": 35.10593220338983, - "children": [ - { - "id": "6a18cf819fe4635b", - "type": "leaf", - "pinned": true, - "state": { - "type": "markdown", - "state": { - "file": "📑notes/🍪Selection of supervised approaches notes.md", - "mode": "source", - "source": false - }, - "pinned": true - } - }, - { - "id": "106d26afbe6ac5ca", - "type": "leaf", - "state": { - "type": "markdown", - "state": { - "file": "📖chapters/🔢EMO rule.md", - "mode": "source", - "source": false - } - } - }, - { - "id": "cfcc451141d62be0", - "type": "leaf", - "state": { - "type": "markdown", - "state": { - "file": "📖chapters/🔢CLNV method.md", - "mode": "source", - "source": false - } - } - } - ], - "currentTab": 2 - }, { "id": "2785db0e51818bf2", "type": "tabs", - "dimension": 35.10593220338983, + "dimension": 49.568965517241374, "children": [ { - "id": "0b5c7cf912d57d27", - "type": "leaf", - "pinned": true, - "state": { - "type": "markdown", - "state": { - "file": "📑notes/🔢CLNV method notes.md", - "mode": "source", - "source": false - }, - "pinned": true - } - }, - { - "id": "45c897f094afeb47", - "type": "leaf", - "state": { - "type": "markdown", - "state": { - "file": "📑notes/🔢Tick test notes.md", - "mode": "source", - "source": false - } - } - }, - { - "id": "1934968841d72664", + "id": "94415710fad4ab13", "type": "leaf", "state": { "type": "markdown", "state": { - "file": "📑notes/🔢Hybrid rules notes.md", + "file": "📖chapters/🔢Tick Test.md", "mode": "source", "source": false } } - }, - { - "id": "94415710fad4ab13", - "type": "leaf", - "state": { - "type": "image", - "state": { - "file": "🖼️Media/pseudocode-of-algorithms.png" - } - } } ] }, { "id": "98f847228f79bc8c", "type": "tabs", - "dimension": 29.78813559322034, + "dimension": 50.431034482758626, "children": [ { "id": "f35a250acbe1a2dc", @@ -178,7 +96,7 @@ "state": { "type": "backlink", "state": { - "file": "📖chapters/🔢CLNV method.md", + "file": "📖chapters/🔢Tick Test.md", "collapseAll": false, "extraContext": false, "sortOrder": "alphabetical", @@ -206,7 +124,7 @@ "state": { "type": "outline", "state": { - "file": "📖chapters/🔢CLNV method.md" + "file": "📖chapters/🔢Tick Test.md" } } }, @@ -237,17 +155,17 @@ "markdown-importer:Open format converter": false } }, - "active": "cfcc451141d62be0", + "active": "94415710fad4ab13", "lastOpenFiles": [ - "📑notes/🔢CLNV method notes.md", + "📑notes/🍪Selection of supervised approaches notes.md", + "📖chapters/🔢Tick Test.md", "📑notes/🔢Tick test notes.md", "📑notes/🔢Hybrid rules notes.md", + "📑notes/🔢Basic rules notes.md", + "📖chapters/🔢Basic rules.md", + "📖chapters/🔢Quote Rule.md", + "📑notes/🔢LR algorithm notes.md", "🖼️Media/pseudocode-of-algorithms.png", - "📖chapters/🔢CLNV method.md", - "📖chapters/🔢EMO rule.md", - "📑notes/🍪Selection of supervised approaches notes.md", - "📖chapters/🔢Tick test.md", - "📖chapters/🔢Trade Size Rule.md", - "📑notes/🔢Trade Size Rule Notes.md" + "📖chapters/🔢Hybrid rules.md" ] } \ No newline at end of file diff --git a/references/obsidian/IW-Queues/IW-Queue.md b/references/obsidian/IW-Queues/IW-Queue.md index c6928972..f1e62d20 100644 --- a/references/obsidian/IW-Queues/IW-Queue.md +++ b/references/obsidian/IW-Queues/IW-Queue.md @@ -10,7 +10,7 @@ interval: 1 | [[🤖Transformer]] | 12.5 | | 2 | 2022-12-15 | | [[👨‍🍳Tain-Test-split]] | 15 | | 2 | 2022-12-15 | | [[🔢Trade Size Rule]] | 17.5 | | 1 | 1970-01-01 | -| [[🔢Tick test]] | 20 | | 1 | 1970-01-01 | +| [[🔢Tick Test]] | 20 | | 1 | 1970-01-01 | | [[🍪Selection Of Semisupervised Approaches]] | 22.5 | | 1 | 1970-01-01 | | [[🍪Selection Of Supervised Approaches]] | 25 | | 1 | 1970-01-01 | | [[🏅Robustness]] | 27.5 | | 1 | 1970-01-01 | @@ -18,7 +18,7 @@ interval: 1 | [[🏅Results of semi-supervised]] | 30 | | 1 | 1970-01-01 | | [[🏅Results]] | 32.5 | | 1 | 1970-01-01 | | [[👪Related Work]] | 35 | | 1 | 1970-01-01 | -| [[🔢Quote rule]] | 37.5 | | 1 | 1970-01-01 | +| [[🔢Quote Rule]] | 37.5 | | 1 | 1970-01-01 | | [[💡Training and tuning]] | 40 | | 1 | 1970-01-01 | | [[👨‍🍳Pre-Processing]] | 40 | | 1 | 1970-01-01 | | [[💡Tuning of models (semi-supervised)]] | 41 | | 1 | 1970-01-01 | @@ -36,11 +36,11 @@ interval: 1 | [[🚏Exploratory Data Analysis]] | 72.5 | | 1 | 1970-01-01 | | [[🧭Evaluation metric]] | 75 | | 1 | 1970-01-01 | | [[🌏Environment]] | 77.5 | | 1 | 1970-01-01 | -| [[🔢EMO rule]] | 80 | | 1 | 1970-01-01 | +| [[🔢EMO Rule]] | 80 | | 1 | 1970-01-01 | | [[🛌Token Embedding]] | 82.5 | | 1 | 1970-01-01 | | [[🤖Pre-training of Transformers]] | 85 | | 1 | 1970-01-01 | | [[🧓Discussion]] | 87.5 | | 1 | 1970-01-01 | -| [[🔢Depth rule]] | 90 | | 1 | 1970-01-01 | -| [[🔢CLNV method]] | 95 | | 1 | 1970-01-01 | +| [[🔢Depth Rule]] | 90 | | 1 | 1970-01-01 | +| [[🔢CLNV Method]] | 95 | | 1 | 1970-01-01 | | [[🔢Basic rules]] | 97.5 | | 1 | 1970-01-01 | | [[🐈Gradient Boosting]] | 5 | | 8 | 2023-02-07 | \ No newline at end of file diff --git "a/references/obsidian/\360\237\223\221notes/\360\237\215\252Selection of supervised approaches notes.md" "b/references/obsidian/\360\237\223\221notes/\360\237\215\252Selection of supervised approaches notes.md" index a290299d..e808f24d 100644 --- "a/references/obsidian/\360\237\223\221notes/\360\237\215\252Selection of supervised approaches notes.md" +++ "b/references/obsidian/\360\237\223\221notes/\360\237\215\252Selection of supervised approaches notes.md" @@ -1,9 +1,6 @@ #gbm #transformer #supervised-learning #deep-learning - - - **Why probabilistic classification:** - Due to a unsatisfactory research situation, for trade classification (see chapter [[👪Related Work]]) we base - Use classification methods (*probabilistic classifier*) that can return probabilities instead of class-only for better analysis. Using probabilistic trade classification rules might have been studied in [[@easleyDiscerningInformationTrade2016]] diff --git "a/references/obsidian/\360\237\223\221notes/\360\237\224\242Basic rules notes.md" "b/references/obsidian/\360\237\223\221notes/\360\237\224\242Basic rules notes.md" index 8d209b21..b84ab28a 100644 --- "a/references/obsidian/\360\237\223\221notes/\360\237\224\242Basic rules notes.md" +++ "b/references/obsidian/\360\237\223\221notes/\360\237\224\242Basic rules notes.md" @@ -5,6 +5,10 @@ Denote sells with $0$ and buys with $1$. “The trade indicator is a binary variable stating whether the buyer or seller of an asset has initiated the trade by submitting a market order or an immediately executed limit order.” (Frömmel et al., 2021, p. 4) +“Methods of inferring trade direction can be classified as: tick tests, which use changes in trade prices; the quote method, which compares trade prices to quotes;” (Finucane, 2000, p. 557) + + + **Synonyms** 1. Broader term is **trade site classification** = assign the side to a to a transaction and differentiate between buyer- and seller-initiated transactions 2. It's also sometimes called trade sign classification diff --git "a/references/obsidian/\360\237\223\221notes/\360\237\224\242CLNV method notes.md" "b/references/obsidian/\360\237\223\221notes/\360\237\224\242CLNV method notes.md" index 5ae0a2c2..6416deec 100644 --- "a/references/obsidian/\360\237\223\221notes/\360\237\224\242CLNV method notes.md" +++ "b/references/obsidian/\360\237\223\221notes/\360\237\224\242CLNV method notes.md" @@ -1,5 +1,21 @@ Tags: #trade-classification #CLVN +Long form: +$$ +  \begin{equation} + +    \text{Trade}_{i,t}= +    \begin{cases} +      \operatorname{tick}(), & \text{if}\ P_{i, t} \in \left(A_{i, t}, \infty\right) \\ +      1, & \text{if}\ P_{i, t} \in \left[\frac{3}{10} B_{i,t} + \frac{7}{10} A_{i,t}, A_{i, t}\right] \\ +      \operatorname{tick}(), & \text{if}\ P_{i, t} \in \left(\frac{7}{10} B_{i,t} + \frac{3}{10} A_{i,t}, \frac{3}{10} B_{i,t} + \frac{7}{10} A_{i,t}\right) \\ +      0, & \text{if} P_{i, t} \in \left[ B_{i,t}, \frac{7}{10} B_{i,t} + \frac{3}{10} A_{i,t}\right] \\ + \operatorname{tick}(), & \text{if} \ P_{i, t} \in \left(-\infty, B_{i, t}\right) \\ +    \end{cases} +  \end{equation} +$$ + + - “We compare the accuracy rates of various algorithms in classifying ECN trades. We divide trades according to their price distribution relative to quotes. We expect the classification rules to perform better when trades occur at the ask or bid. When trades receive price improvement, buys and sells will execute at prices inside the quotes. In these instances, not only does the quote rule meet with some challenge, the tick rule will also be more difficult as buys (sells) will more likely occur on downticks (upticks).” (Chakrabarty et al., 2007, p. 3811) - “We find that ECN trades are difficult to classify as the overall success rates of the LR, EMO, and tick rules are 74.42%, 75.80%, and 75.40%, respectively. Our algorithm modestly outperforms these three, with an overall success rate of 76.52%, but we show that our algorithm substantially outperforms the others for trades inside the quotes. For trades inside 1 Our primary sample is from the INET ECN. We use ArcaEx data to confirm the robustness of our results across another ECN. Table 3 reports results using the ArcaEx data. All other tables use the INET sample. B. Chakrabarty et al. / Journal of Banking & Finance 31 (2007) 3806–3821 380” (Chakrabarty et al., 2007, p. 3807) “the quotes our success rate is 76.32% compared to 71.85% for the LR rule and 71.35% for both the tick and EMO rules. We also find that a significant proportion of trades execute outside the quotes. This phenomenon, called a ‘‘trade-through’’, is prevalent for NASDAQ markets due to the absence of the trade-through rule in our study period.2” (Chakrabarty et al., 2007, p. 3808) diff --git "a/references/obsidian/\360\237\223\221notes/\360\237\224\242Hybrid rules notes.md" "b/references/obsidian/\360\237\223\221notes/\360\237\224\242Hybrid rules notes.md" index 45bdf280..e39f637e 100644 --- "a/references/obsidian/\360\237\223\221notes/\360\237\224\242Hybrid rules notes.md" +++ "b/references/obsidian/\360\237\223\221notes/\360\237\224\242Hybrid rules notes.md" @@ -1,20 +1,5 @@ Tags: #trade-classification -Long form: -$$ -  \begin{equation} - -    \text{Trade}_{i,t}= -    \begin{cases} -      \operatorname{tick}(), & \text{if}\ P_{i, t} \in \left(A_{i, t}, \infty\right) \\ -      1, & \text{if}\ P_{i, t} \in \left[\frac{3}{10} B_{i,t} + \frac{7}{10} A_{i,t}, A_{i, t}\right] \\ -      \operatorname{tick}(), & \text{if}\ P_{i, t} \in \left(\frac{7}{10} B_{i,t} + \frac{3}{10} A_{i,t}, \frac{3}{10} B_{i,t} + \frac{7}{10} A_{i,t}\right) \\ -      0, & \text{if} P_{i, t} \in \left[ B_{i,t}, \frac{7}{10} B_{i,t} + \frac{3}{10} A_{i,t}\right] \\ - \operatorname{tick}(), & \text{if} \ P_{i, t} \in \left(-\infty, B_{i, t}\right) \\ -    \end{cases} -  \end{equation} -$$ - - Why is there a need for hybrid classification rules? What are the problems of the tick and quote rule? - The previous trade classification rules are applicable to certain trades or come with their own drawbacks e. g., they perform poorly for a greater distance to quotes. To mitigate the disadvantages, rules are combined to hybrids through ensembeling. In certain cases through stacking. diff --git "a/references/obsidian/\360\237\223\221notes/\360\237\224\242Tick test notes.md" "b/references/obsidian/\360\237\223\221notes/\360\237\224\242Tick test notes.md" index c6d49094..a798f46c 100644 --- "a/references/obsidian/\360\237\223\221notes/\360\237\224\242Tick test notes.md" +++ "b/references/obsidian/\360\237\223\221notes/\360\237\224\242Tick test notes.md" @@ -2,6 +2,12 @@ Tags: #trade-classification #tick-rule One of the first works who mention the tick test is [[@holthausenEffectLargeBlock1987]] (referred to as tick classification rule) or [[@hasbrouckTradesQuotesInventories1988]] (referred to as transaction rule) +“The simplest approximation is the tick rule, which assigns a trade to be a buy if the trade price was an uptick relative to the previous trade and to be a sell if it was a downtick. (In the case of a zero-tick, the signing relies on the movement relative to the last price change.)” (Easley et al., 2016, p. 271) + +“his approach eschews any distributional assumptions and relies instead on the basic notion that buys raise prices and sells lower them. But how well this approximation works to infer trades, or underlying information, is debatable,” ([[@easleyDiscerningInformationTrade2016]], p. 271) + +“his approach eschews any distributional assumptions and relies instead on the basic notion that buys raise prices and sells lower them. But how well this approximation works to infer trades, or underlying information, is debatable,” (Easley et al., 2016, p. 271) + **Algorithm:** Formal description in [[@olbrysEvaluatingTradeSide2018]] and [[@carrionTradeSigningFast2020]] (see below) and [[@jurkatisInferringTradeDirections2022]]: Formally denoting the trade price of security $i$ at time $t$ as $P_{i, t}$ and $\Delta P_{i, t}$ as the price change between two successive trades and the assigned trade direction at time $t$ as Trade, we have: If $\Delta P_{i, t}>0$, Trade $_{i, t}=$ Buy, @@ -10,19 +16,30 @@ If $\Delta P_{i, t}=0$, Trade $_{i, t}=$ Trade $_{i, t-1}$. **Informal description:** Tick tests use changes in trade prices and look at previous trade prices to infer trade direction. If the trade occurs at a higher price, hence uptick, as the previous trade its classified as as buyer-initiated. If the trade occurs at a lower price its seller-initiated. If the price change is zero, the last price is taken, that is different from the current price. (see e. g., [[@grauerOptionTradeClassification2022]] or [[@finucaneDirectTestMethods2000]] or [[@leeInferringTradeDirection1991]] for similar framing) +“The tick test is a technique which infers the direction of a trade by comparing its price to the price of the preceding trade@).The test classifies each trade into four categories: an uptick, a downtick, a zero-uptick, and a zero-downtick. A trade is an uptick (downtick) if the price is higher (lower) than the price of the previous trade. When the price is the same as the previous trade (a zero tick), if the last price change was an uptick, then the trade is a zero-uptick.Similarly,if the last price change was a downtick, then the trade is a zero-downtick” ([[@leeInferringTradeDirection1991]], p. 3) + +“The tick rule is the simplest classification algorithm due to its neglect of quote data. Trades with a higher price than the previous (upticks) are classified as buys; trades with a lower price (downticks) are sells. In the case of equal prices, the last preceding price change is used. The tick rule is part of every complex trade-by-trade classification algorithm” ([[@poppeSensitivityVPINChoice2016]] p. 166) + **Variant:** +- The reverse tick test is similar, but uses the next trade price to classify the current trade. If the next trade occurs on an uptick or zero uptick, the current trade is classified as a sell. If the next trade occurs on a downtick or zero downtick, the current trade is classified as a buy.” (Finucane, 2000, p. 557) - A variant of the tick test is the *reverse tick test* as popularized by [[@leeInferringTradeDirection1991]]. Instead of using the previous distinguishable trade price, the subsequent trade price, that is different from the current trade is used. - Instead of the previous trade, the reverse tick rule uses the subsequent trade price to classify the current trade. - If the next trade price that is different from the current price, is below the current price the trade (on a down tick or zero down tick) is classified as buyer-initiated. If the next distinguishable price is above the current price (up tick or zero up tick), the current price the trade is seller-initiated. (loosely adapted from [[@grauerOptionTradeClassification2022]]) (see also [[@leeInferringTradeDirection1991]]) - “When quote data is not present, the TR yields a considerably lower accuracy.” (Frömmel et al., 2021, p. 9) **Lower bound:** [[@perlinPerformanceTickTest2014]] proved that the tick test performs better than random chance. **Data efficiency:** low data requirements, as only transaction data is needed. (see [[@theissenTestAccuracyLee2000]]). Could be good enough though. -**Data efficiency:** “The advantages of the tick method are that it requires only transaction data (quotes are not necessary) and that no trades are left unclassified. The disadvantage is that the tick method incorporates less information than the quote method since it does not use the posted quotes.” ([[@odders-whiteOccurrenceConsequencesInaccurate2000]], 2000, p. 264) **Data efficiency:** “The tick rule is the simplest classification algorithm due to its neglect of quote data. Trades with a higher price than the previous (upticks) are classified as buys; trades with a lower price (downticks) are sells. In the case of equal prices, the last preceding price change is used. The tick rule is part of every complex trade-by-trade classification algorithm” ([Pöppe et al., 2016, p. 166](zotero://select/library/items/5A83SDDB)) ([pdf](zotero://open-pdf/library/items/4XIK47X6?page=2&annotation=SRLPERF8)) +**Data efficienth:** “The advantages of the tick method are that it requires only transaction data (quotes are not necessary) and that no trades are left unclassifed. The disadvantage is that the tick method incorporates less information than the quote method since it does not use the posted quotes.” ([[@odders-whiteOccurrenceConsequencesInaccurate2000]], 2000, p. 264) + **Why the limitation:** “We compare the accuracy rates of various algorithms in classifying ECN trades. We divide trades according to their price distribution relative to quotes. We expect the classification rules to perform better when trades occur at the ask or bid. When trades receive price improvement, buys and sells will execute at prices inside the quotes. In these instances, not only does the quote rule meet with some challenge, the tick rule will also be more difficult as buys (sells) will more likely occur on downticks (upticks).” ([[@chakrabartyTradeClassificationAlgorithms2007]]., 2007, p. 3811) -**Strength:** “The advantages of the tick method are that it requires only transaction data (quotes are not necessary) and that no trades are left unclassifed. The disadvantage is that the tick method incorporates less information than the quote method since it does not use the posted quotes.” (Odders-White, 2000, p. 264) + +**Limitations:** “Figure 1 illustrates how trades can be misclassified when quotes change. When quotes rise between trades, sales at the bid on upticks and zero upticks will be misclassified as buys by the tick test, but should be correctly classified using quote-based methods. Trade 6a in Panel A of Figure 1 illustrates the case of a sell (at the bid) being misclassified by the tick test on a zero uptick. If quotes are falling, as is the case for trade 6b in Panel B, buys at the ask on downticks and zero downticks will be misclassified as sells by the tick test, and should be correctly classified by quote-based methods. Figure 1 also illustrates the tendency for mid-spread trades to be misclassified by the tick test, even when quotes do not” (Finucane, 2000, p. 557) + +“change. Trade 4a, a sale that occurs on an uptick, is misclassified as a buy, and trade 5b, a buy that occurs on a downtick with constant quotes, is misclassified as a sell. FIGURE 1 lllustrative Trade Sequences for Mid-Spread Trades Panel A - - - Ask Bid ?*? - ?X? - Sell Buy Sell Sell Cross Sell (1a) (2a) (3a) (4a) (5a) (6a) Panel B Bid ?*? ?*? - - - - Bid Sell Sell Buy Buy Buy Buy (1b) (2b) (3b) (4b) (5b) (6b) Solid lines represent ask and bid prices and Xs denote trades. Trade direction is indicated below each trade. Panel A illustrates the case where sells on upticks and zero upticks are misclassified by the tick test when quotes are increasing. Panel B illustrates the case where buys on downticks and zero downticks are misclassified when quotes are falling. Panel B also demonstrates how m” (Finucane, 2000, p. 558) + +**Limitation:** “Additionally, trade direction may not always be unambiguously determined. While LR assume that trades generally occur only when a market buy or sell order arrives, trades that do not involve market orders also can occur, such as when two limit orders are crossed. Although the trade can be classified by the tick test or LR's algorithm, the true direction of the trade is ambiguous. Classifying such trades as buys or sells may lead to erroneous conclusions in empirical studies.” (Finucane, 2000, p. 559) **Limitations:** 👩‍🚒“In theory, all trades can be classified as either a buy or a sell order by using a tick test.5In practice, certain trades are not classifiable because they are either reported out of sequence or are sold with special conditions attached” ([[@leeInferringTradeDirection1991]], p. 3) **Limitations:** 👩‍🚒“The primary limitation of the tick test is its relative imprecision when compared to a quote-based approach, particularly if the prevailing quote has changed or it has been a long time since the last trade.” ([[@leeInferringTradeDirection1991]], p. 3) @@ -43,7 +60,7 @@ If $\Delta P_{i, t}=0$, Trade $_{i, t}=$ Trade $_{i, t-1}$. **Results:** 💸 “This implies that the sample used for assessing the performance of the tick rule will consist of a higher proportion of index option trades (because some equity options trade infrequently and cannot be classified by the tick rule) and will be significantly smaller. Therefore, the performance measure will be biased downward for the tick rule.” (Savickas and Wilson, 2003, p. 887) **Results:** 💸 “When the tick rule is not “reset” at the beginning of each day, the number of trades classified by the rule increases (as expected), but the classification precision does not improve: only 55.73% of all 1,404,365 classifiable trades are labeled correctly by the tick rule.” (Savickas and Wilson, 2003, p. 886) **Results:** 💸 “Specifically, the only difference between the tick and the EMO rules is that the latter uses the quote rule for at-the-quote trades. Consequently, the EMO method outperforms the tick rule only for those trades. Similarly, the LR and EMO methods treat at-the-quote and at-midspread trades exactly the same, but the LR approach applies the quote rule to all other trades.” (Savickas and Wilson, 2003, p. 891) -**Results: 💸 “Generally, quote rules outperform tick rules by far. For the tick rule, a higher success rate can be achieved using prices across all exchanges and information from subsequent trades” (Grauer et al., 2022, p. 3) -> lead them to propose the [[🔢Depth rule]]. +**Results: 💸 “Generally, quote rules outperform tick rules by far. For the tick rule, a higher success rate can be achieved using prices across all exchanges and information from subsequent trades” (Grauer et al., 2022, p. 3) -> lead them to propose the [[🔢Depth Rule]]. **Results:** 💸 “For example, Easley, Lopez de Prado, and O’Hara (2012) show that the tick test works reasonably well for e-mini S&P 500 Futures, correctly classifying 86% of the trades, but it is less accurate in gold futures (79%) and in oil futures (67%).” (Grauer et al., 2022, p. 5) **Effective spread:** “The tick rule severely underestimates effective spread. This is a consequence of the method’s classifying correctly just slightly more than half of all trades.” ([[@savickasInferringDirectionOption2003]], 2003, p. 896) diff --git "a/references/obsidian/\360\237\223\226chapters/\360\237\224\242CLNV method.md" "b/references/obsidian/\360\237\223\226chapters/\360\237\224\242CLNV method.md" index 32b22bd4..a4d77e26 100644 --- "a/references/obsidian/\360\237\223\226chapters/\360\237\224\242CLNV method.md" +++ "b/references/obsidian/\360\237\223\226chapters/\360\237\224\242CLNV method.md" @@ -1,6 +1,6 @@ (What is the intuition?) -Like the previous two algorithms, the CLVN method ([[@chakrabartyTradeClassificationAlgorithms2012]] 3809) is a hybrid of the quote and tick rule and extends the EMO rule by a fragmented treatment of trades inside the quotes, which are notoriously hard to classify. ([[@chakrabartyTradeClassificationAlgorithms2012]] 3809) segment the spread into ten equal-width bins and classify trades around the midpoint (4th - 7th decile) by the tick rule and trades close to the quotes (1st-3rd, 8th-10th decile) by the quote rule. Like in the [[🔢EMO rule]] trades outside the quotes are categorized by the tick rule. +Like the previous two algorithms, the CLVN method ([[@chakrabartyTradeClassificationAlgorithms2012]] 3809) is a hybrid of the quote and tick rule and extends the EMO rule by a fragmented treatment of trades inside the quotes, which are notoriously hard to classify. ([[@chakrabartyTradeClassificationAlgorithms2012]] 3809) segment the spread into ten equal-width bins and classify trades around the midpoint (4th - 7th decile) by the tick rule and trades close to the quotes (1st-3rd, 8th-10th decile) by the quote rule. Like in the [[🔢EMO Rule]] trades outside the quotes are categorized by the tick rule. $$   \begin{equation}     \text{Trade}_{i,t}= @@ -13,7 +13,7 @@ $$ $$ The algorithm, as summarized in Equation $(11)$, is derived from a performance comparison of the tick rule / EMO rule against the quote rule / LR algorithm on stock data, whereby the accuracy was assessed separately for each decile. The classical CLVN method uses the backward-looking tick rule. In the spirit of ([[@leeInferringTradeDirection1991]] 735), the tick test could be replaced for the reverse tick test.[^1] -([[@chakrabartyTradeClassificationAlgorithms2012]]) test their algorithm NASDAQ stocks traded at INET and ArcaEX modestly outperform the [[🔢Tick test]], [[🔢EMO rule]], and [[🔢LR algorithm]] in terms of out-of-sample accuracy. The method has not yet been tested with option trades. Part of this might be due to the stronger reliance on the tick test, which has lead to a deteriorating performance in past studies (cp. [[@savickasInferringDirectionOption2003]]), ([[@grauerOptionTradeClassification2022]]1--39). +([[@chakrabartyTradeClassificationAlgorithms2012]]) test their algorithm NASDAQ stocks traded at INET and ArcaEX modestly outperform the [[🔢Tick Test]], [[🔢EMO Rule]], and [[🔢LR Algorithm]] in terms of out-of-sample accuracy. The method has not yet been tested with option trades. Part of this might be due to the stronger reliance on the tick test, which has lead to a deteriorating performance in past studies (cp. [[@savickasInferringDirectionOption2003]]), ([[@grauerOptionTradeClassification2022]]1--39). ([[@chakrabartyTradeClassificationAlgorithms2007]] 3811) continue the trend for more complex classification rules, leading to a higher fragmented decision surface, and eventually resulting in a improved classification accuracy. Since the decision, which of the base rules is applied, is inferred from *static* cut-off points at the decile boundaries of the spread -- including the midspread and the quotes -- current classification rules may not realize their full potential. A obvious question is, if classifiers, *learned* on price and quote data, can adapt to the data and thereby improve over traditional trade classification rules. diff --git "a/references/obsidian/\360\237\223\226chapters/\360\237\224\242Depth rule.md" "b/references/obsidian/\360\237\223\226chapters/\360\237\224\242Depth rule.md" index 0acaf498..50266979 100644 --- "a/references/obsidian/\360\237\223\226chapters/\360\237\224\242Depth rule.md" +++ "b/references/obsidian/\360\237\223\226chapters/\360\237\224\242Depth rule.md" @@ -1,4 +1,4 @@ -As the chapter [[🔢Tick test]] unveils, the tick rule yields significantly lower success rates than the quote rule. For midspread trades, that can otherwise not be classified by the advantageous [[🔢Quote rule]], ([[@grauerOptionTradeClassification2022]] p.14) propose the *depth rule*. +As the chapter [[🔢Tick Test]] unveils, the tick rule yields significantly lower success rates than the quote rule. For midspread trades, that can otherwise not be classified by the advantageous [[🔢Quote Rule]], ([[@grauerOptionTradeClassification2022]] p.14) propose the *depth rule*. The depth rule infers the trade initiator from the quoted size at the best bid and ask. Based on the observation that an exceeding bid or ask size relates to higher liquidity on one trade side, trades are classified as a buy for a larger ask size and sell for a higher bid size ([[@grauerOptionTradeClassification2022]] 14). @@ -17,7 +17,7 @@ $$ $$ As shown in Equation $(5)$, the depth rule classifies midspread trades only, if the ask size is different from the bid size, as the ratio between the ask and bid size is the sole criterion for inferring the trade's aggressor. Due to these restrictive conditions, the depth rule can only sign a small fraction of all trades and is best stacked with others rules. -Like the [[🔢Quote rule]], the depth rule has additional data requirements being dependent on quote data. Despite being applied to midspread trades only, ([[@grauerOptionTradeClassification2022]] p.4) report an improvement in the overall accuracy $1.2~\%$ for the CBOE data set and by $0.8~\%$ on the ISE sample merely through the depth rule. The rule has not yet been tested in other markets. +Like the [[🔢Quote Rule]], the depth rule has additional data requirements being dependent on quote data. Despite being applied to midspread trades only, ([[@grauerOptionTradeClassification2022]] p.4) report an improvement in the overall accuracy $1.2~\%$ for the CBOE data set and by $0.8~\%$ on the ISE sample merely through the depth rule. The rule has not yet been tested in other markets. **Notes:** [[🔢Depth rule notes]] \ No newline at end of file diff --git "a/references/obsidian/\360\237\223\226chapters/\360\237\224\242EMO rule.md" "b/references/obsidian/\360\237\223\226chapters/\360\237\224\242EMO rule.md" index d98b0ba4..6bb7ffb5 100644 --- "a/references/obsidian/\360\237\223\226chapters/\360\237\224\242EMO rule.md" +++ "b/references/obsidian/\360\237\223\226chapters/\360\237\224\242EMO rule.md" @@ -1,6 +1,6 @@ Ellis, Michaely and O’Hara (2000) study the accuracy of the quote, tick and Lee and Ready methods using NASDAQ data that contain 313 stocks traded between September 27, 1996, and September 29, 1997. -([[@ellisAccuracyTradeClassification2000]] 536) examine the performance of the previous algorithms for stocks traded at NASDAQ. By analysing miss-classified trades with regard to the proximity of the trade to the quotes, they observe, that the [[🔢Quote rule]] and by extension of the [[🔢LR algorithm]] performs particularly well at classifying trades executed at the bid and ask price but trail the performance of the tick rule for trades inside or outside the spread ([[@ellisAccuracyTradeClassification2000]] 535-536). The authors combine these observations into a single rule, known as the EMO algorithm. +([[@ellisAccuracyTradeClassification2000]] 536) examine the performance of the previous algorithms for stocks traded at NASDAQ. By analysing miss-classified trades with regard to the proximity of the trade to the quotes, they observe, that the [[🔢Quote Rule]] and by extension of the [[🔢LR Algorithm]] performs particularly well at classifying trades executed at the bid and ask price but trail the performance of the tick rule for trades inside or outside the spread ([[@ellisAccuracyTradeClassification2000]] 535-536). The authors combine these observations into a single rule, known as the EMO algorithm. As such, the EMO algorithm ([[@ellisAccuracyTradeClassification2000]] 540) extends the tick rule by classifying trades at the quotes using the quote rule, and all other trades with the tick test. Formally, the classification rule is given by: $$ @@ -15,7 +15,7 @@ $$     \end{cases}   \end{equation} $$ -Equation (...) embeds both the quote and tick rule. As trades off the quotes are classified by the tick rule, the algorithm's overall success rate is dominated by the tick test. For option markets this dependence caused the performance to lag behind quote-based approaches ([[@savickasInferringDirectionOption2003]] 891), ([[@grauerOptionTradeClassification2022]] 21), contrary to the successful adaption in the stock market ([[@ellisAccuracyTradeClassification2000]] 541)([[@chakrabartyTradeClassificationAlgorithms2007]] 3818). In ([[@grauerOptionTradeClassification2022]] 31-35) the authors achieve minor improvements in classification accuracy on option exchange data by applying the reverse tick test (see chapter [[🔢Tick test]]) as a proxy for the tick test. +Equation (...) embeds both the quote and tick rule. As trades off the quotes are classified by the tick rule, the algorithm's overall success rate is dominated by the tick test. For option markets this dependence caused the performance to lag behind quote-based approaches ([[@savickasInferringDirectionOption2003]] 891), ([[@grauerOptionTradeClassification2022]] 21), contrary to the successful adaption in the stock market ([[@ellisAccuracyTradeClassification2000]] 541)([[@chakrabartyTradeClassificationAlgorithms2007]] 3818). In ([[@grauerOptionTradeClassification2022]] 31-35) the authors achieve minor improvements in classification accuracy on option exchange data by applying the reverse tick test (see chapter [[🔢Tick Test]]) as a proxy for the tick test. **Notes:** [[🔢EMO rule notes]] diff --git "a/references/obsidian/\360\237\223\226chapters/\360\237\224\242LR algorithm.md" "b/references/obsidian/\360\237\223\226chapters/\360\237\224\242LR algorithm.md" index 672b0301..22fff019 100644 --- "a/references/obsidian/\360\237\223\226chapters/\360\237\224\242LR algorithm.md" +++ "b/references/obsidian/\360\237\223\226chapters/\360\237\224\242LR algorithm.md" @@ -5,5 +5,90 @@ - Use the problems of the single tick test to motivate extended rules like EMO / LR? - What lead to a fine-grained fragmentation? + + + + +``` +% ' + +% \begin{algorithm} + + + +%   % input/ouput names + +%   \SetKwInOut{Input}{Input} + +%   \SetKwInOut{Output}{Output} + + + +%   % caption + +%   % TODO: set input and output: e. g., $\hat{e} \leftarrow$ layer_norm $(e \mid \gamma, \beta)$ + +%   \caption{$\operatorname{\mathtt{lee-ready}}{(t_i, a_i, b_i)}$ \label{sec:alg:lee-ready-algorithm}} + + + +%   \Input{% + +%     $t_i$ trade price at $i$, $a_i$ ask price at $i$, and $b_i$ bid price at $i$. + +%   } + +%   \Output{% + +%     $o_i \in\{-1,1\}$ trade initiator at $i$. \\ + +%   } + + + +%   \BlankLine % blank line for spacing + + + +%   % start of the pseudocode + +%   $m_i \leftarrow \frac{1}{2}(a_i + b_i)$ \tcc*{mid spread at $i$} + + + +%   \For{$1, \cdots, I$}{ + +%     \uIf{$t_i > m_i$}{ + +%       \Return{$o_i = 1$} + +%     } + +%     \uElseIf{$t_i < m_i$}{ + +%       \Return{$o_i = -1$} + +%     } + +%     \Else{ + +%       \Return{$o_i = \operatorname{\mathtt{tick}}{(t_i, a_i, b_i)}$} \tcc*{see Section \ref{sec:tick-test}.} + +%     } + +%   } % end for i + +%   % TODO: set input and output params + +% \end{algorithm} + + + +% \subsubsection{Reverse Lee and Ready + +%   Algorithm (0.5~p)}\label{sec:reverse-lee-and-ready-algorithm} +``` + + **Notes:** [[🔢LR algorithm notes]] \ No newline at end of file diff --git "a/references/obsidian/\360\237\223\226chapters/\360\237\224\242Quote rule.md" "b/references/obsidian/\360\237\223\226chapters/\360\237\224\242Quote rule.md" index 84db8316..28fce08d 100644 --- "a/references/obsidian/\360\237\223\226chapters/\360\237\224\242Quote rule.md" +++ "b/references/obsidian/\360\237\223\226chapters/\360\237\224\242Quote rule.md" @@ -1,3 +1,6 @@ + +“Methods of inferring trade direction can be classified as: tick tests, which use changes in trade prices; the quote method, which compares trade prices to quotes;” (Finucane, 2000, p. 557) + The quote rule compares the trade price against the corresponding quotes at the time of the trade. (Intuition?) If the trade price $P_{i,t}$ is above the midpoint of the bid-ask spread, denoted by $m_{i,t}$, the trade is classified as a buy and if it is below the midpoint, as a sell ([[@harrisDayEndTransactionPrice1989]] p.41). Thus, the classification rule, is formally given by: $$ %\tag{10} diff --git "a/references/obsidian/\360\237\223\226chapters/\360\237\224\242Tick test.md" "b/references/obsidian/\360\237\223\226chapters/\360\237\224\242Tick test.md" index 6b8b8616..948475c9 100644 --- "a/references/obsidian/\360\237\223\226chapters/\360\237\224\242Tick test.md" +++ "b/references/obsidian/\360\237\223\226chapters/\360\237\224\242Tick test.md" @@ -1,7 +1,36 @@ -- Why was it proposed? -- Why is it included in this work? -- What is the central idea? -- How does it perform? What do other sources write about it? +Based on the rationale that buys increase the trade price and sells lower them, the *tick test* classifies trades by the change in trade price ([[@easleyDiscerningInformationTrade2016]] 271). Its first use is documented in (cp. [[@holthausenEffectLargeBlock1987]] 244) and ([[@hasbrouckTradesQuotesInventories1988]] 240). + +We denote the trade price of the $i$-th security at time $t$ as $P_{i,t}$ and the price change between two successive trades. The tick test is formally defined as + +$$ +  \begin{equation} + +    \text{Trade}_{i,t}= + +    \begin{cases} +      1, & \text{if}\ P_{i, t} > P_{i, t-1}\\ +      0, & \text{if}\ P_{i, t} < P_{i, t-1}\\ + P_{i,t-1} = P_{i,t-2},& \text{else}. +    \end{cases} +  \end{equation} +$$ +If the trade price is higher than the previous price (uptick) the trade is classified as a buy. Reversely, if it is below the previous price (downtick), the trade is classified as a sell. If the price change is zero (zero tick), the signing uses the last price different from the current price. ([[@leeInferringTradeDirection1991]] 3) + +By this means, the tick rule can sign all trades as long as there is a last distinguishable trade price. Being only dependent on transaction data makes the tick rule highly data efficient ([[@odders-whiteOccurrenceConsequencesInaccurate2000]] 264) or ([[@theissenTestAccuracyLee2000]] 1). Waiving any quote data for classification contributes to this efficiency, but also poses a major limitation with regard to trades at the bid or ask, as discussed in ([[@finucaneDirectTestMethods2000]] 557--558). For instance, if quotes rise between trades, then a sale at the bid on an uptick or zero uptick, is misclassified as buys by tick test due to the overall increased trade price. Similarly for falling quotes, buys at the ask on downticks or zero downticks will be erroneously classified as a sell. + +A variant of the tick test is the *reverse tick test* as applied in ([[@hasbrouckTradesQuotesInventories1988]] 241). It is similar to the tick rule but classifies based on the trade price of the next, distinguishable trade price. As donated in Equation ... the trade is classified as seller-initiated, if the next trade is on an uptick or a zero uptick, and classified as buyer-initiated for trades at a downtick or a zero downtick. ([[@leeInferringTradeDirection1991]] 735--636). + +$$ +  \begin{equation} +    \text{Trade}_{i,t}= +    \begin{cases} +      1, & \text{if}\ P_{i, t} > P_{i, t+1}\\ +      0, & \text{if}\ P_{i, t} < P_{i, t+1}\\ + P_{i,t+1} = P_{i,t+2},& \text{else} +    \end{cases} +  \end{equation} +$$ +Both the tick test and reverse tick test result in the same classification, if the current trade is bracketed by a price reversal and the price change after the trade is opposite from the change before the trade, but differ for price continuations when price changes are in the same direction ([[@leeInferringTradeDirection1991]] 736). In practice, ([[@grauerOptionTradeClassification2022]] 29--32) observe higher accuracies for the reverse tick test on a sample of option trades recorded at the ISE and CBOE. This result contradicts results in the stock market ([[@leeInferringTradeDirection1991]] 737). **Notes:** [[🔢Tick test notes]] \ No newline at end of file diff --git "a/references/obsidian/\360\237\223\226chapters/\360\237\224\242Trade Size Rule.md" "b/references/obsidian/\360\237\223\226chapters/\360\237\224\242Trade Size Rule.md" index d181be39..96c5ef03 100644 --- "a/references/obsidian/\360\237\223\226chapters/\360\237\224\242Trade Size Rule.md" +++ "b/references/obsidian/\360\237\223\226chapters/\360\237\224\242Trade Size Rule.md" @@ -1,5 +1,5 @@ -As the chapter [[🔢Quote rule]] derives, quote-based approaches are generally preferred due to the improved performance relative to the [[🔢Tick test]]. (More general background on the problem?) ([[@grauerOptionTradeClassification2022]] 13) stress that the quote rule, however, systematically misclassifies limit orders and propose an alternative procedure to identify and override predictions for them. +As the chapter [[🔢Quote Rule]] derives, quote-based approaches are generally preferred due to the improved performance relative to the [[🔢Tick Test]]. (More general background on the problem?) ([[@grauerOptionTradeClassification2022]] 13) stress that the quote rule, however, systematically misclassifies limit orders and propose an alternative procedure to identify and override predictions for them. $$ \tag{10} diff --git "a/references/obsidian/\360\237\223\226chapters/\360\237\244\226Transformer.md" "b/references/obsidian/\360\237\223\226chapters/\360\237\244\226Transformer.md" index d7178269..e5f6001d 100644 --- "a/references/obsidian/\360\237\223\226chapters/\360\237\244\226Transformer.md" +++ "b/references/obsidian/\360\237\223\226chapters/\360\237\244\226Transformer.md" @@ -1,5 +1,7 @@ ## TOC +- Check own explanations against🧨: https://www.youtube.com/watch?v=8zAP2qWAsKg&t=2410s -> Multi-head attention allows the model to learn different representatins w/o expanding the vector's (embeddings) dimension + ![[🗼Overview Transformer]] diff --git "a/references/obsidian/\360\237\227\277TOC.md" "b/references/obsidian/\360\237\227\277TOC.md" index 0e3fdd2a..37be2ed7 100644 --- "a/references/obsidian/\360\237\227\277TOC.md" +++ "b/references/obsidian/\360\237\227\277TOC.md" @@ -26,11 +26,11 @@ The following section introduces common rules for signing option trades. We star [[🔢Basic rules]] ### Quote Rule -[[🔢Quote rule]] +[[🔢Quote Rule]] ### Tick Test -[[🔢Tick test]] +[[🔢Tick Test]] ### Depth Rule 🟢 -[[🔢Depth rule]] +[[🔢Depth Rule]] ### Trade Size Rule [[🔢Trade Size Rule]] @@ -46,9 +46,9 @@ The following section introduces common rules for signing option trades. We star [[🔢LR algorithm notes]] ### Ellis-Michaely-O’Hara Rule -[[🔢EMO rule]] +[[🔢EMO Rule]] ### Chakrabarty-Li-Nguyen-Van-Ness Method -[[🔢CLNV method]] +[[🔢CLNV Method]] # 🧠 Supervised Approaches ## Selection of Approaches diff --git a/reports/Content/main.tex b/reports/Content/main.tex index c237fe45..70787c42 100644 --- a/reports/Content/main.tex +++ b/reports/Content/main.tex @@ -8,7 +8,7 @@ \section{Related Work (3~p)}\label{sec:related-work} While classical trade classification algorithms are extensively tested in the stock markets (e.g., \textcite{chakrabartyTradeClassificationAlgorithms2012}; \textcite{odders-whiteOccurrenceConsequencesInaccurate2000}), few works have examined trade classification in option markets. -\textcite[\pno~882f.]{savickasInferringDirectionOption2003} are the first to compare the tick rule, quote rule, the Lee and Ready algorithm and the EMO rule for options traded at the CBOE. The data set spans a period from July 3, 1995 - December 31, 1995 consisting of $869{,}217$ matched trades. The authors report the highest accuracies for the quote rule ($78.98~\%$) and find that all rules perform worst when applied to index options. In general, the trade classification rules exhibit significantly lower classification accuracies on options data than with stock data, urging the need for improved classifiers. +\textcite[882--883]{savickasInferringDirectionOption2003} are the first to compare the tick rule, quote rule, the Lee and Ready algorithm and the EMO rule for options traded at the CBOE. The data set spans a period from July 3, 1995 - December 31, 1995 consisting of $869{,}217$ matched trades. The authors report the highest accuracies for the quote rule ($78.98~\%$) and find that all rules perform worst when applied to index options. In general, the trade classification rules exhibit significantly lower classification accuracies on options data than with stock data, urging the need for improved classifiers. The most exhaustive study is the one of \textcite[1--39]{grauerOptionTradeClassification2022}. The authors test the accuracy of the classical quote rule and tick rule, and hybrids thereof on two large-scale data sets spanning a period from 2005 - 2017. Consistently for options traded at the ISE and CBOE classical rules like the popular Lee and Ready algorithm only achieve accuracies of $62.53~\%$ or $62.03~\%$ and are thus significantly smaller than in the stock market. In line with the research of \textcite[886]{savickasInferringDirectionOption2003}, the reported accuracies are inversely proportional to the rule's reliance on past transaction prices. In particular, the tick rule performs worst with accuracies marginally different from a random guess. Overall, the reported success rates deteriorate between both studies and over time. As a remedy, \textcite[14--17]{grauerOptionTradeClassification2022} introduce two additional rules based on the trade and quote sizes. The \textit{depth rule} is an alternative to the tick rule for classifying mid spread trades in the Lee and Ready algorithm and EMO rule. It assigns the aggressor of the trade based on the depth at the bid or ask. Together with the \textit{trade size rule}, their second rule, which classifies trades with a trade size matching the size of the bid or ask quote, can substantially improve the performance of classical algorithms. The ensemble of rules achieves an accuracy between $73~\%$ and $75~\%$ surpassing previous rules by more than $10~\%$, at the cost of data efficiency. @@ -57,11 +57,42 @@ \subsubsection{Quote Rule (0.75~p)}\label{sec:quote-rule} \subsubsection{Tick Test (0.75~p)}\label{sec:tick-test} +Based on the rationale that buys increase the trade price and sells lower them, the \emph{tick test} classifies trades by the change in trade price \textcite[][271]{easleyDiscerningInformationTrade2016}. Its first use is documented in \textcites[cp.][244]{holthausenEffectLargeBlock1987}[][240]{hasbrouckTradesQuotesInventories1988}. +We denote the trade price of the $i$-th security at time $t$ as $P_{i,t}$ and the price change between two successive trades. The tick test is formally defined as + +\begin{equation} + \text{Trade}_{i,t}= + \begin{cases} + 1, & \text{if}\ P_{i, t} > P_{i, t-1} \\ + 0, & \text{if}\ P_{i, t} < P_{i, t-1} \\ + P_{i,t-1} = P_{i,t-2}, & \text{else}. + \end{cases} + \label{eq:tick-test} +\end{equation} + + +If the trade price is higher than the previous price (uptick) the trade is classified as a buy. Reversely, if it is below the previous price (downtick), the trade is classified as a sell. If the price change is zero (zero tick), the signing uses the last price different from the current price. \autocite[][3]{leeInferringTradeDirection1991} + +By this means, the tick rule can sign all trades as long as there is a last distinguishable trade price. Being only dependent on transaction data makes the tick rule highly data-efficient. Waiving any quote data for classification contributes to this efficiency, but also poses a major limitation with regard to trades at the bid or ask, as discussed in \textcite[][557--558]{finucaneDirectTestMethods2000}. For instance, if quotes rise between trades, then a sale at the bid on an uptick or zero uptick, is misclassified as buys by tick test due to the overall increased trade price. Similarly for falling quotes, buys at the ask on downticks or zero downticks will be erroneously classified as a sell. + +A variant of the tick test is the \emph{reverse tick test} as applied in \textcite[][241]{hasbrouckTradesQuotesInventories1988}. It is similar to the tick rule but classifies based on the trade price of the next, distinguishable trade price. As donated in Equation \ref{eq:tick-test} the trade is classified as seller-initiated, if the next trade is on an uptick or a zero uptick, and classified as buyer-initiated for trades at a downtick or a zero downtick. \autocite[][735--636]{leeInferringTradeDirection1991}. + +\begin{equation} + \text{Trade}_{i,t}= + \begin{cases} + 1, & \text{if}\ P_{i, t} > P_{i, t+1} \\ + 0, & \text{if}\ P_{i, t} < P_{i, t+1} \\ + P_{i,t+1} = P_{i,t+2}, & \text{else} + \end{cases} + \label{eq:reverse-tick-test} +\end{equation} + +Both the tick test and reverse tick test result in the same classification, if the current trade is bracketed by a price reversal and the price change after the trade is opposite from the change before the trade, but differ for price continuations when price changes are in the same direction \autocite[][736]{leeInferringTradeDirection1991}. In practice, \textcite[][29--32]{grauerOptionTradeClassification2022} observe higher accuracies for the reverse tick test on a sample of option trades recorded at the ISE and CBOE. This result contradicts results in the stock market \textcite[][737]{leeInferringTradeDirection1991}. \subsubsection{Depth Rule (0.75~p)}\label{sec:depth-rule} -As the chapter tick test unveils, the tick rule yields significantly lower success rates than the quote rule. For midspread trades, that can otherwise not be classified by the advantageous quote rule,\textcite[][14]{grauerOptionTradeClassification2022} propose the \emph{depth rule}. +As the chapter \ref{sec:quote-rule} unveils, the tick rule yields significantly lower success rates than the quote rule. For midspread trades, that can otherwise not be classified by the advantageous quote rule, \textcite[][14]{grauerOptionTradeClassification2022} propose the \emph{depth rule}. The depth rule infers the trade initiator from the quoted size at the best bid and ask. Based on the observation that an exceeding bid or ask size relates to higher liquidity on one trade side, trades are classified as a buy for a larger ask size and sell for a higher bid size \textcite[][14]{grauerOptionTradeClassification2022}. @@ -100,45 +131,7 @@ \subsubsection{Trade Size Rule (0.75~p)}\label{sec:trade-size-rule} \subsection{Hybrid Rules (2.5~p)}\label{sec:hybrid-rules} \subsubsection{Lee and Ready Algorithm (1 p)}\label{sec:lee-and-ready-algorithm} -% ' -% \begin{algorithm} - -% % input/ouput names -% \SetKwInOut{Input}{Input} -% \SetKwInOut{Output}{Output} - -% % caption -% % TODO: set input and output: e. g., $\hat{e} \leftarrow$ layer_norm $(e \mid \gamma, \beta)$ -% \caption{$\operatorname{\mathtt{lee-ready}}{(t_i, a_i, b_i)}$ \label{sec:alg:lee-ready-algorithm}} - -% \Input{% -% $t_i$ trade price at $i$, $a_i$ ask price at $i$, and $b_i$ bid price at $i$. -% } -% \Output{% -% $o_i \in\{-1,1\}$ trade initiator at $i$. \\ -% } - -% \BlankLine % blank line for spacing - -% % start of the pseudocode -% $m_i \leftarrow \frac{1}{2}(a_i + b_i)$ \tcc*{mid spread at $i$} - -% \For{$1, \cdots, I$}{ -% \uIf{$t_i > m_i$}{ -% \Return{$o_i = 1$} -% } -% \uElseIf{$t_i < m_i$}{ -% \Return{$o_i = -1$} -% } -% \Else{ -% \Return{$o_i = \operatorname{\mathtt{tick}}{(t_i, a_i, b_i)}$} \tcc*{see Section \ref{sec:tick-test}.} -% } -% } % end for i -% % TODO: set input and output params -% \end{algorithm} - -% \subsubsection{Reverse Lee and Ready -% Algorithm (0.5~p)}\label{sec:reverse-lee-and-ready-algorithm} + \subsubsection{Ellis-Michaely-O'Hara Rule (0.75~p)}\label{sec:ellis-michaely-ohara-rule} @@ -168,7 +161,7 @@ \subsubsection{Chakrabarty-Li-Nguyen-Van-Ness \begin{cases} 1, & \text{if}\ P_{i, t} \in \left(\frac{3}{10} B_{i,t} + \frac{7}{10} A_{i,t}, A_{i, t}\right] \\ 0, & \text{if}\ P_{i, t} \in \left[ B_{i,t}, \frac{7}{10} B_{i,t} + \frac{3}{10} A_{i,t}\right) \\ - \operatorname{tick}(), & \text{else}. + \operatorname{tick}(), & \text{else}. \end{cases} \label{eq:clvn-rule} \end{equation} @@ -280,36 +273,6 @@ \subsubsection{Positional Encoding}\label{sec:positional-encoding} \subsubsection{Attention}\label{sec:attention} -% \begin{equation} -% \t{softmax}( \m{A})[t_\mathrm{z}, t_\mathrm{x}] ~:=~ \frac{\exp A[t_\mathrm{z},t_\mathrm{x}]}{ \sum_{t} \exp A[t,t_\mathrm{x}]}, -% \end{equation} -% \begin{equation}\label{eq:mask} -% \text{Mask}[t_\t{z},t_\t{x}] = \left\{{1~~~ \text{for bidirectional attention} -% \atop [\![t_\t{z}\!≤\!t_\t{x}]\!]~~ \text{for unidirectional att.}}\right. -% \end{equation} - -% %-------------------------------% -% \begin{algorithm}[h] % Attention -% %-------------------------------% -% \caption{$\m{\tilde V}\gets$~\texttt{Attention}$(\m{X},\m{Z}|\bmcWqkv,\text{Mask})$} -% \label{algo:attention} -% \KwIn{$\m{X}\in\mathbb{R}^{d_\t{x}\times\ell_\t{x}}, \m{Z}\in\mathbb{R}^{d_\t{z}\times\ell_\t{z}}$, vector representations of primary and context sequence.} -% \KwOut{$\m{\tilde V}\in\mathbb{R}^{d_\t{out}\times\ell_\t{x}} $, updated representations of tokens in $\m{X}$, folding in information from tokens in $\m{Z}$.} -% \KwParam{$\bmcWqkv$ consisting of: -% $\m{W_q}\in\mathbb{R}^{d_\t{attn}\times d_\t{x}}$, $\v{b_q}\in\mathbb{R}^{d_\t{attn}}$ -% $\m{W_k}\in\mathbb{R}^{d_\t{attn}\times d_\t{z}}$, $\v{b_k}\in\mathbb{R}^{d_\t{attn}}$ -% $\m{W_v}\in\mathbb{R}^{d_\t{out}\times d_\t{z}}$, ~$\v{b_v}\in\mathbb{R}^{d_\t{out}}$.} -% \KwHyper{Mask$\in\!\!\{0,\!1\}^{\ell_\t{z}\times\ell_\t{x}}$} %$\uparrow$\eqref{eq:mask}} -% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% $\m{Q} \gets \m{W_q} \m{X} + \v{b_q} \v{1}^\intercal$ ~ [\![$\m{Q}\t{uery}\in\mathbb{R}^{d_\t{attn}\times\ell_\t{x}}$]\!] \; -% $\m{K} \gets \m{W_k} \m{Z} + \v{b_k} \v{1}^\intercal$ ~ [\![$\m{K}\t{ey}\in\mathbb{R}^{d_\t{attn}\times\ell_\t{z}}$]\!] \; -% $\m{V} \gets \m{W_v} \m{Z} + \v{b_v} \v{1}^\intercal$ ~ [\![$\m{V}\t{alue}\in\mathbb{R}^{d_\t{out}\times\ell_\t{z}}$]\!] \; -% $\m{S} \gets \m{K}^\intercal \m{Q}$ ~ [\![$\m{S}\t{core}\in\mathbb{R}^{\ell_\t{z}\times\ell_\t{x}}$]\!] \; -% $\forall t_\t{z}, t_\t{x},$ if $\neg$Mask$[t_\t{z},t_\t{x}]$ then $S[t_\t{z},t_\t{x}] \gets -\infty $ \; -% \Return $\m{\tilde V} = \m{V}\cdot \t{softmax}\left( \m{S} / \sqrt{d_\t{attn}} \right)$ -% \end{algorithm} - - \subsubsection{Position-wise Feed-Forward Networks}\label{sec:position-wise-ffn} \subsubsection{Residual Connections}\label{sec:residual-connections} diff --git a/reports/thesis.tex b/reports/thesis.tex index bc1c0f8d..9ac70602 100644 --- a/reports/thesis.tex +++ b/reports/thesis.tex @@ -11,41 +11,6 @@ \usepackage[style=bwl-FU,backend=biber,natbib=true,maxcitenames=2]{biblatex} \addbibresource{Content/bibliography.bib} -% --------------------------------- Algorithm --------------------------------- -\usepackage[linesnumbered, ruled]{algorithm2e} - -% change algorithm font size -\SetAlFnt{\normalsize} - -% change algorithm caption style -\newcommand{\xAlCapSty}[1]{\small\sffamily\bfseries#1} -\SetAlCapSty{xAlCapSty} - -\SetAlCapSkip{1em} -\SetKwInput{KwParam}{Parameters} -\SetKwInput{KwHyper}{Hyperparameters} - -% comment style (algorithms) -\newcommand{\xCommentSty}[1]{\ttfamily\textcolor{blue}{#1}} -\SetCommentSty{xCommentSty} - -% change line number style -\newcommand\mynlfont[1]{\scriptsize\sffamily\textcolor{gray}{#1}} -\SetNlSty{mynlfont}{}{} - - % add the line numbers -\LinesNumbered - -% comments right justified -\SetSideCommentRight - -% don't print semicolon -\DontPrintSemicolon - -% ruled algorithm -\RestyleAlgo{algoruled} - - % Format and layout \usepackage[left=3cm,right=3cm,bottom=3cm]{geometry} % Specifies left and right side margins. \usepackage{setspace} % Package that enables modifing the line spacing. @@ -165,12 +130,6 @@ \newpage \tableofcontents -% List of Algorithms (comment out if there are no figures in the thesis) -\newpage -\listofalgorithms % Inserts the list of algorithms from algorithm package -\addcontentsline{toc}{section}{List of Algorithms} % Adds the list of figures to the table of contents. - - % List of Figures (comment out if there are no figures in the thesis) \newpage \listoffigures % Inserts the list of figures.