From 487fcb84153bc0162386a75882662415588191c8 Mon Sep 17 00:00:00 2001 From: nkaz001 Date: Sun, 19 May 2024 09:46:15 -0400 Subject: [PATCH] feat: Add the data preparation tutorial for Rust version. --- docs/tutorials/Data Preparation.ipynb | 381 ++++++++++++++++++++++--- examples/Data Preparation.ipynb | 391 +++++++++++++++++++++++--- hftbacktest/__init__.py | 2 +- hftbacktest/data/__init__.py | 10 +- hftbacktest/data/validation.py | 39 +++ 5 files changed, 753 insertions(+), 70 deletions(-) diff --git a/docs/tutorials/Data Preparation.ipynb b/docs/tutorials/Data Preparation.ipynb index 8a15003..d5574b0 100644 --- a/docs/tutorials/Data Preparation.ipynb +++ b/docs/tutorials/Data Preparation.ipynb @@ -73,7 +73,13 @@ "id": "2f27f86a", "metadata": {}, "source": [ - "The first token of the line is timestamp received by local." + "The first token of the line is timestamp received by local.\n", + "\n", + "
\n", + " \n", + "**Note:** There are currently two different implementations of the feed data collector: one in Python and another in Rust. The Python implementation records timestamps in microseconds, while the Rust implementation records timestamps in nanoseconds. Therefore, Python HftBacktest examples primarily use microseconds, whereas Rust HftBacktest examples use nanoseconds. Be mindful of the timestamp units.\n", + " \n", + "
" ] }, { @@ -85,6 +91,14 @@ "`convert` method also attempts to correct timestamps by reordering the rows." ] }, + { + "cell_type": "markdown", + "id": "68af262e-b88b-4dc5-a945-3d9cf380d176", + "metadata": {}, + "source": [ + "### For HftBacktest in Python, use the Python version of the Data Collector" + ] + }, { "cell_type": "code", "execution_count": 2, @@ -95,9 +109,9 @@ "name": "stdout", "output_type": "stream", "text": [ + "Correcting the latency\n", "local_timestamp is ahead of exch_timestamp by 18836.0\n", - "found 542 rows that exch_timestamp is ahead of the previous exch_timestamp\n", - "Correction is done.\n" + "Correcting the event order\n" ] } ], @@ -107,7 +121,7 @@ "from hftbacktest.data.utils import binancefutures\n", "\n", "data = binancefutures.convert('usdm/btcusdt_20230404.dat.gz')\n", - "np.savez('btcusdt_20230404', data=data)" + "np.savez_compressed('btcusdt_20230404', data=data)" ] }, { @@ -128,9 +142,9 @@ "name": "stdout", "output_type": "stream", "text": [ + "Correcting the latency\n", "local_timestamp is ahead of exch_timestamp by 26932.0\n", - "found 6555 rows that exch_timestamp is ahead of the previous exch_timestamp\n", - "Correction is done.\n", + "Correcting the event order\n", "Saving to btcusdt_20230405\n" ] }, @@ -158,7 +172,73 @@ } ], "source": [ - "binancefutures.convert('usdm/btcusdt_20230405.dat.gz', output_filename='btcusdt_20230405')" + "binancefutures.convert('usdm/btcusdt_20230405.dat.gz', output_filename='btcusdt_20230405', compress=True)" + ] + }, + { + "cell_type": "markdown", + "id": "b70ab9db-70d4-4ec1-936d-b19547fa3bc6", + "metadata": {}, + "source": [ + "### For HftBacktest in Python, use the Rust version of the Data Collector\n", + "\n", + "
\n", + " \n", + "**Note:** The timestamp is in nanoseconds.\n", + " \n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "e72631fd-93a2-4b1c-a753-9534511d6563", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Correcting the latency\n", + "Correcting the event order\n", + "Saving to SOLUSDT_20240420.npz\n" + ] + }, + { + "data": { + "text/plain": [ + "array([[ 1.0000000e+00, 1.7135712e+18, 1.7135712e+18, 1.0000000e+00,\n", + " 1.3380900e+02, 1.0000000e+00],\n", + " [ 1.0000000e+00, 1.7135712e+18, 1.7135712e+18, 1.0000000e+00,\n", + " 1.3702000e+02, 2.0000000e+00],\n", + " [ 1.0000000e+00, 1.7135712e+18, 1.7135712e+18, 1.0000000e+00,\n", + " 1.3739200e+02, 0.0000000e+00],\n", + " ...,\n", + " [ 1.0000000e+00, 1.7136576e+18, 1.7136576e+18, -1.0000000e+00,\n", + " 1.5133300e+02, 1.5000000e+01],\n", + " [ 1.0000000e+00, 1.7136576e+18, 1.7136576e+18, -1.0000000e+00,\n", + " 1.5133800e+02, 2.0000000e+00],\n", + " [ 1.0000000e+00, 1.7136576e+18, 1.7136576e+18, -1.0000000e+00,\n", + " 1.5134900e+02, 2.3000000e+01]])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "\n", + "from hftbacktest.data.utils import binancefutures\n", + "\n", + "binancefutures.convert(\n", + " \"SOLUSDT_20240420.gz\",\n", + " output_filename=\"SOLUSDT_20240420.npz\",\n", + " compress=True,\n", + " timestamp_unit=\"ns\",\n", + " combined_stream=False\n", + ")" ] }, { @@ -171,7 +251,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "eb3d445e", "metadata": {}, "outputs": [ @@ -326,7 +406,7 @@ "[71019 rows x 6 columns]" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -342,6 +422,60 @@ "df" ] }, + { + "cell_type": "markdown", + "id": "ec2a1aa8-8e05-47a9-b0f7-94cea292b1bd", + "metadata": {}, + "source": [ + "### For HftBacktest in Rust, use the Rust version of the Data Collector" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "68e845f1-07cd-4cd4-a4dc-4aae009286b1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Correcting the latency\n", + "Correcting the event order\n", + "Saving to SOLUSDT_20240420.npz\n" + ] + }, + { + "data": { + "text/plain": [ + "array([(3758096385, 1713571200043000064, 1713571200045828864, 133.809, 1.),\n", + " (3758096385, 1713571200043000064, 1713571200045828864, 137.02 , 2.),\n", + " (3758096385, 1713571200043000064, 1713571200045828864, 137.392, 0.),\n", + " ...,\n", + " (3489660929, 1713657599968000000, 1713657599976203008, 151.333, 15.),\n", + " (3489660929, 1713657599968000000, 1713657599976203008, 151.338, 2.),\n", + " (3489660929, 1713657599968000000, 1713657599976203008, 151.349, 23.)],\n", + " dtype=[('ev', '] 2.95M 6.02MB/s in 0.5s \n", + "\n", + "2024-05-19 09:39:06 (6.02 MB/s) - ‘BTCUSDT_trades.csv.gz’ saved [3090479/3090479]\n", + "\n", + "--2024-05-19 09:39:07-- https://datasets.tardis.dev/v1/binance-futures/incremental_book_L2/2020/02/01/BTCUSDT.csv.gz\n", + "Resolving datasets.tardis.dev (datasets.tardis.dev)... 172.64.147.51, 104.18.40.205, 2606:4700:4400::6812:28cd, ...\n", + "Connecting to datasets.tardis.dev (datasets.tardis.dev)|172.64.147.51|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 250016849 (238M) [text/csv]\n", + "Saving to: ‘BTCUSDT_book.csv.gz’\n", + "\n", + "BTCUSDT_book.csv.gz 100%[===================>] 238.43M 17.6MB/s in 12s \n", + "\n", + "2024-05-19 09:39:20 (19.3 MB/s) - ‘BTCUSDT_book.csv.gz’ saved [250016849/250016849]\n", + "\n" + ] + } + ], "source": [ "# https://docs.tardis.dev/historical-data-details/binance-futures\n", "\n", - "# Download sample Binance futures BTCUSDT trades\n", + "# Downloads sample Binance futures BTCUSDT trades\n", "!wget https://datasets.tardis.dev/v1/binance-futures/trades/2020/02/01/BTCUSDT.csv.gz -O BTCUSDT_trades.csv.gz\n", " \n", - "# Download sample Binance futures BTCUSDT book\n", + "# Downloads sample Binance futures BTCUSDT book\n", "!wget https://datasets.tardis.dev/v1/binance-futures/incremental_book_L2/2020/02/01/BTCUSDT.csv.gz -O BTCUSDT_book.csv.gz" ] }, + { + "cell_type": "markdown", + "id": "2d16bdd3-2843-457c-ac20-680b27b76692", + "metadata": {}, + "source": [ + "### For HftBacktest in Python" + ] + }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 12, "id": "2a94dc09", "metadata": {}, "outputs": [ @@ -626,8 +885,8 @@ "Reading BTCUSDT_trades.csv.gz\n", "Reading BTCUSDT_book.csv.gz\n", "Merging\n", - "found 20948 rows that exch_timestamp is ahead of the previous exch_timestamp\n", - "Correction is done.\n" + "Correcting the latency\n", + "Correcting the event order\n" ] } ], @@ -635,7 +894,7 @@ "from hftbacktest.data.utils import tardis\n", "\n", "data = tardis.convert(['BTCUSDT_trades.csv.gz', 'BTCUSDT_book.csv.gz'])\n", - "np.savez('btcusdt_20200201.npz', data=data)" + "np.savez_compressed('btcusdt_20200201.npz', data=data)" ] }, { @@ -648,7 +907,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 13, "id": "f2026ce5", "metadata": {}, "outputs": [ @@ -659,8 +918,8 @@ "Reading BTCUSDT_trades.csv.gz\n", "Reading BTCUSDT_book.csv.gz\n", "Merging\n", - "found 20948 rows that exch_timestamp is ahead of the previous exch_timestamp\n", - "Correction is done.\n", + "Correcting the latency\n", + "Correcting the event order\n", "Saving to btcusdt_20200201.npz\n" ] }, @@ -682,7 +941,7 @@ " 9.3481400e+03, 3.9800000e+00]])" ] }, - "execution_count": 9, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -691,7 +950,8 @@ "tardis.convert(\n", " ['BTCUSDT_trades.csv.gz', 'BTCUSDT_book.csv.gz'],\n", " output_filename='btcusdt_20200201.npz',\n", - " buffer_size=200_000_000\n", + " buffer_size=200_000_000,\n", + " compress=True\n", ")" ] }, @@ -702,6 +962,61 @@ "source": [ "You can also build the snapshot in the same way as described above." ] + }, + { + "cell_type": "markdown", + "id": "6e66c839-5b0f-41e1-86c1-bba4279d95b4", + "metadata": {}, + "source": [ + "### For HftBacktest in Rust" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "1d0d8eb3-2e9a-45e1-9712-278a05953e01", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reading BTCUSDT_trades.csv.gz\n", + "Reading BTCUSDT_book.csv.gz\n", + "Merging\n", + "Correcting the latency\n", + "Correcting the event order\n", + "Saving to btcusdt_20200201.npz\n" + ] + }, + { + "data": { + "text/plain": [ + "array([(3758096386, 1580515202342000128, 1580515202497051904, 9364.51, 1.197),\n", + " (3758096386, 1580515202342000128, 1580515202497346048, 9365.67, 0.02 ),\n", + " (3758096386, 1580515202342000128, 1580515202497351936, 9365.86, 0.01 ),\n", + " ...,\n", + " (3758096385, 1580601599836000000, 1580601599962960896, 9351.47, 3.914),\n", + " (3489660929, 1580601599836000000, 1580601599963461120, 9397.78, 0.1 ),\n", + " (3758096385, 1580601599848000000, 1580601599973647104, 9348.14, 3.98 )],\n", + " dtype=[('ev', '\n", + " \n", + "**Note:** There are currently two different implementations of the feed data collector: one in Python and another in Rust. The Python implementation records timestamps in microseconds, while the Rust implementation records timestamps in nanoseconds. Therefore, Python HftBacktest examples primarily use microseconds, whereas Rust HftBacktest examples use nanoseconds. Be mindful of the timestamp units.\n", + " \n", + "" ] }, { @@ -85,6 +91,14 @@ "`convert` method also attempts to correct timestamps by reordering the rows." ] }, + { + "cell_type": "markdown", + "id": "68af262e-b88b-4dc5-a945-3d9cf380d176", + "metadata": {}, + "source": [ + "### For HftBacktest in Python, use the Python version of the Data Collector" + ] + }, { "cell_type": "code", "execution_count": 2, @@ -95,9 +109,9 @@ "name": "stdout", "output_type": "stream", "text": [ + "Correcting the latency\n", "local_timestamp is ahead of exch_timestamp by 18836.0\n", - "found 542 rows that exch_timestamp is ahead of the previous exch_timestamp\n", - "Correction is done.\n" + "Correcting the event order\n" ] } ], @@ -107,7 +121,7 @@ "from hftbacktest.data.utils import binancefutures\n", "\n", "data = binancefutures.convert('usdm/btcusdt_20230404.dat.gz')\n", - "np.savez('btcusdt_20230404', data=data)" + "np.savez_compressed('btcusdt_20230404', data=data)" ] }, { @@ -128,9 +142,9 @@ "name": "stdout", "output_type": "stream", "text": [ + "Correcting the latency\n", "local_timestamp is ahead of exch_timestamp by 26932.0\n", - "found 6555 rows that exch_timestamp is ahead of the previous exch_timestamp\n", - "Correction is done.\n", + "Correcting the event order\n", "Saving to btcusdt_20230405\n" ] }, @@ -158,7 +172,73 @@ } ], "source": [ - "binancefutures.convert('usdm/btcusdt_20230405.dat.gz', output_filename='btcusdt_20230405')" + "binancefutures.convert('usdm/btcusdt_20230405.dat.gz', output_filename='btcusdt_20230405', compress=True)" + ] + }, + { + "cell_type": "markdown", + "id": "b70ab9db-70d4-4ec1-936d-b19547fa3bc6", + "metadata": {}, + "source": [ + "### For HftBacktest in Python, use the Rust version of the Data Collector\n", + "\n", + "
\n", + " \n", + "**Note:** The timestamp is in nanoseconds.\n", + " \n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "e72631fd-93a2-4b1c-a753-9534511d6563", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Correcting the latency\n", + "Correcting the event order\n", + "Saving to SOLUSDT_20240420.npz\n" + ] + }, + { + "data": { + "text/plain": [ + "array([[ 1.0000000e+00, 1.7135712e+18, 1.7135712e+18, 1.0000000e+00,\n", + " 1.3380900e+02, 1.0000000e+00],\n", + " [ 1.0000000e+00, 1.7135712e+18, 1.7135712e+18, 1.0000000e+00,\n", + " 1.3702000e+02, 2.0000000e+00],\n", + " [ 1.0000000e+00, 1.7135712e+18, 1.7135712e+18, 1.0000000e+00,\n", + " 1.3739200e+02, 0.0000000e+00],\n", + " ...,\n", + " [ 1.0000000e+00, 1.7136576e+18, 1.7136576e+18, -1.0000000e+00,\n", + " 1.5133300e+02, 1.5000000e+01],\n", + " [ 1.0000000e+00, 1.7136576e+18, 1.7136576e+18, -1.0000000e+00,\n", + " 1.5133800e+02, 2.0000000e+00],\n", + " [ 1.0000000e+00, 1.7136576e+18, 1.7136576e+18, -1.0000000e+00,\n", + " 1.5134900e+02, 2.3000000e+01]])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "\n", + "from hftbacktest.data.utils import binancefutures\n", + "\n", + "binancefutures.convert(\n", + " \"SOLUSDT_20240420.gz\",\n", + " output_filename=\"SOLUSDT_20240420.npz\",\n", + " compress=True,\n", + " timestamp_unit=\"ns\",\n", + " combined_stream=False\n", + ")" ] }, { @@ -171,7 +251,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "eb3d445e", "metadata": {}, "outputs": [ @@ -326,7 +406,7 @@ "[71019 rows x 6 columns]" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -342,6 +422,68 @@ "df" ] }, + { + "cell_type": "markdown", + "id": "ec2a1aa8-8e05-47a9-b0f7-94cea292b1bd", + "metadata": {}, + "source": [ + "### For HftBacktest in Rust, use the Rust version of the Data Collector" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "68e845f1-07cd-4cd4-a4dc-4aae009286b1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Correcting the latency\n", + "Correcting the event order\n", + "Saving to SOLUSDT_20240420.npz\n" + ] + }, + { + "data": { + "text/plain": [ + "array([(3758096385, 1713571200043000064, 1713571200045828864, 133.809, 1.),\n", + " (3758096385, 1713571200043000064, 1713571200045828864, 137.02 , 2.),\n", + " (3758096385, 1713571200043000064, 1713571200045828864, 137.392, 0.),\n", + " ...,\n", + " (3489660929, 1713657599968000000, 1713657599976203008, 151.333, 15.),\n", + " (3489660929, 1713657599968000000, 1713657599976203008, 151.338, 2.),\n", + " (3489660929, 1713657599968000000, 1713657599976203008, 151.349, 23.)],\n", + " dtype=[('ev', '] 2.95M 6.02MB/s in 0.5s \n", + "\n", + "2024-05-19 09:39:06 (6.02 MB/s) - ‘BTCUSDT_trades.csv.gz’ saved [3090479/3090479]\n", + "\n", + "--2024-05-19 09:39:07-- https://datasets.tardis.dev/v1/binance-futures/incremental_book_L2/2020/02/01/BTCUSDT.csv.gz\n", + "Resolving datasets.tardis.dev (datasets.tardis.dev)... 172.64.147.51, 104.18.40.205, 2606:4700:4400::6812:28cd, ...\n", + "Connecting to datasets.tardis.dev (datasets.tardis.dev)|172.64.147.51|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 250016849 (238M) [text/csv]\n", + "Saving to: ‘BTCUSDT_book.csv.gz’\n", + "\n", + "BTCUSDT_book.csv.gz 100%[===================>] 238.43M 17.6MB/s in 12s \n", + "\n", + "2024-05-19 09:39:20 (19.3 MB/s) - ‘BTCUSDT_book.csv.gz’ saved [250016849/250016849]\n", + "\n" + ] + } + ], "source": [ "# https://docs.tardis.dev/historical-data-details/binance-futures\n", "\n", - "# Download sample Binance futures BTCUSDT trades\n", + "# Downloads sample Binance futures BTCUSDT trades\n", "!wget https://datasets.tardis.dev/v1/binance-futures/trades/2020/02/01/BTCUSDT.csv.gz -O BTCUSDT_trades.csv.gz\n", " \n", - "# Download sample Binance futures BTCUSDT book\n", + "# Downloads sample Binance futures BTCUSDT book\n", "!wget https://datasets.tardis.dev/v1/binance-futures/incremental_book_L2/2020/02/01/BTCUSDT.csv.gz -O BTCUSDT_book.csv.gz" ] }, + { + "cell_type": "markdown", + "id": "2d16bdd3-2843-457c-ac20-680b27b76692", + "metadata": {}, + "source": [ + "### For HftBacktest in Python" + ] + }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 12, "id": "2a94dc09", "metadata": {}, "outputs": [ @@ -618,8 +885,8 @@ "Reading BTCUSDT_trades.csv.gz\n", "Reading BTCUSDT_book.csv.gz\n", "Merging\n", - "found 20948 rows that exch_timestamp is ahead of the previous exch_timestamp\n", - "Correction is done.\n" + "Correcting the latency\n", + "Correcting the event order\n" ] } ], @@ -627,7 +894,7 @@ "from hftbacktest.data.utils import tardis\n", "\n", "data = tardis.convert(['BTCUSDT_trades.csv.gz', 'BTCUSDT_book.csv.gz'])\n", - "np.savez('btcusdt_20200201.npz', data=data)" + "np.savez_compressed('btcusdt_20200201.npz', data=data)" ] }, { @@ -640,7 +907,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 13, "id": "f2026ce5", "metadata": {}, "outputs": [ @@ -651,8 +918,8 @@ "Reading BTCUSDT_trades.csv.gz\n", "Reading BTCUSDT_book.csv.gz\n", "Merging\n", - "found 20948 rows that exch_timestamp is ahead of the previous exch_timestamp\n", - "Correction is done.\n", + "Correcting the latency\n", + "Correcting the event order\n", "Saving to btcusdt_20200201.npz\n" ] }, @@ -674,7 +941,7 @@ " 9.3481400e+03, 3.9800000e+00]])" ] }, - "execution_count": 8, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -683,7 +950,8 @@ "tardis.convert(\n", " ['BTCUSDT_trades.csv.gz', 'BTCUSDT_book.csv.gz'],\n", " output_filename='btcusdt_20200201.npz',\n", - " buffer_size=200_000_000\n", + " buffer_size=200_000_000,\n", + " compress=True\n", ")" ] }, @@ -694,6 +962,61 @@ "source": [ "You can also build the snapshot in the same way as described above." ] + }, + { + "cell_type": "markdown", + "id": "6e66c839-5b0f-41e1-86c1-bba4279d95b4", + "metadata": {}, + "source": [ + "### For HftBacktest in Rust" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "1d0d8eb3-2e9a-45e1-9712-278a05953e01", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reading BTCUSDT_trades.csv.gz\n", + "Reading BTCUSDT_book.csv.gz\n", + "Merging\n", + "Correcting the latency\n", + "Correcting the event order\n", + "Saving to btcusdt_20200201.npz\n" + ] + }, + { + "data": { + "text/plain": [ + "array([(3758096386, 1580515202342000128, 1580515202497051904, 9364.51, 1.197),\n", + " (3758096386, 1580515202342000128, 1580515202497346048, 9365.67, 0.02 ),\n", + " (3758096386, 1580515202342000128, 1580515202497351936, 9365.86, 0.01 ),\n", + " ...,\n", + " (3758096385, 1580601599836000000, 1580601599962960896, 9351.47, 3.914),\n", + " (3489660929, 1580601599836000000, 1580601599963461120, 9397.78, 0.1 ),\n", + " (3758096385, 1580601599848000000, 1580601599973647104, 9348.14, 3.98 )],\n", + " dtype=[('ev', ' n tup_list, dtype=[('ev', 'i8'), ('exch_ts', 'i8'), ('local_ts', 'i8'), ('px', 'f4'), ('qty', 'f4')] ) + + +def convert_from_struct_arr(data: np.ndarray) -> np.ndarray: + r""" + Converts the structured array that can be used in Rust hftbacktest into the 2D ndarray currently used in Python + hftbacktest. + + Args: + data: the structured array to be converted. + + Returns: + Converted 2D ndarray. + """ + + out = np.empty((len(data), 6), np.float64) + for row in range(len(data)): + ev = data[row][0] + + if ev & EXCH_EVENT == EXCH_EVENT: + out[row, COL_EXCH_TIMESTAMP] = data[row][1] + else: + out[row, COL_EXCH_TIMESTAMP] = -1 + + if ev & LOCAL_EVENT == LOCAL_EVENT: + out[row, COL_LOCAL_TIMESTAMP] = data[row][2] + else: + out[row, COL_LOCAL_TIMESTAMP] = -1 + + if ev & BUY == BUY: + out[row, COL_SIDE] = 1 + elif ev & SELL == SELL: + out[row, COL_SIDE] = -1 + else: + out[row, COL_SIDE] = 0 + + out[row, COL_PRICE] = data[row][3] + out[row, COL_QTY] = data[row][4] + out[row, COL_EVENT] = ev & 0xFF + return out