From 2be24e89e64b5a976348d0b62fc29d452d72725e Mon Sep 17 00:00:00 2001 From: Erik Reppel Date: Tue, 22 Aug 2023 10:17:16 -0400 Subject: [PATCH 1/8] Decode logs based on event signature --- crates/freeze/examples/events.rs | 52 +++++++++ crates/freeze/src/datasets/logs.rs | 176 +++++++++++++++++++++++++++++ 2 files changed, 228 insertions(+) create mode 100644 crates/freeze/examples/events.rs diff --git a/crates/freeze/examples/events.rs b/crates/freeze/examples/events.rs new file mode 100644 index 00000000..b73049b4 --- /dev/null +++ b/crates/freeze/examples/events.rs @@ -0,0 +1,52 @@ +use std::collections::{HashMap, HashSet}; +use ethers::abi::Event; +use ethers_core::abi::{ethabi, HumanReadableParser, LogParam, RawLog, Token}; +use ethabi::param_type::ParamType; +use ethers::prelude::{Address, Log}; + +#[tokio::main] +async fn main() { + // let event: Event = serde_json::from_str(EVENT_S).unwrap(); + // println!("{:?}", event.inputs); + // println!("{:#?}", event); + + + let e = HumanReadableParser::parse_event("event NewMint(address indexed msgSender, uint256 indexed mintQuantity)").unwrap(); + + let raw_log = r#"{ + "address": "0x0000000000000000000000000000000000000000", + "topics": [ + "0x52277f0b4a9b555c5aa96900a13546f972bda413737ec164aac947c87eec6024", + "0x00000000000000000000000062a73d9116eda78a78f4cf81602bdc926fb4c0dd", + "0x0000000000000000000000000000000000000000000000000000000000000003" + ], + "data": "0x" + }"#; + + let log = serde_json::from_str::(raw_log).unwrap(); + + let m = parse_log_from_event(e, vec![log]); + println!("{:#?}", m); +} + +// this function assumes all logs are of the same type and skips them if they aren't +fn parse_log_from_event(event: ethers::abi::Event, logs: Vec) -> HashMap> { + let mut map: HashMap> = HashMap::new(); + + let known_keys = event.inputs.clone().into_iter().map(|i| i.name).collect::>(); + + for log in logs { + let l = event.parse_log(RawLog::from(log)).unwrap(); + for param in l.params { + if known_keys.contains(param.name.as_str()) { + let tokens = map.entry(param.name).or_insert(Vec::new()); + tokens.push(param.value); + } + } + } + map +} + + +// const EVENT_S: &str = r#"{"name":"bar","inputs":[{"name":"a","type":"uint256"},{"name":"b","type":"bool"}],"anonymous":false,"type":"event"}"#; +const EVENT_S: &str = r#"{"anonymous": false,"inputs": [{"indexed": true,"name": "msgSender","type": "address"},{"indexed": true,"name": "mintQuantity","type": "uint256"}],"name": "NewMint","type": "event"}"#; \ No newline at end of file diff --git a/crates/freeze/src/datasets/logs.rs b/crates/freeze/src/datasets/logs.rs index 7c135e28..db0e0803 100644 --- a/crates/freeze/src/datasets/logs.rs +++ b/crates/freeze/src/datasets/logs.rs @@ -1,6 +1,8 @@ use std::{collections::HashMap, sync::Arc}; use ethers::prelude::*; +use ethers_core::abi::{AbiEncode, HumanReadableParser, RawLog, Token}; +use polars::export::ahash::HashSet; use polars::prelude::*; use tokio::{sync::mpsc, task}; @@ -206,6 +208,20 @@ async fn logs_to_df( let mut topic3: Vec>> = Vec::new(); let mut data: Vec> = Vec::new(); + + let event = match std::env::var("EVENT_ABI") { + Ok(abi) => match HumanReadableParser::parse_event(abi.as_str()) { + Ok(event) => Some(event), + Err(_) => { + eprintln!("incorrectly formatted event {} (expect something like event Transfer(address indexed from, address indexed to, uint256 amount)", abi); + None + } + }, + Err(_) => None + }; + + let mut event_cols: HashMap> = HashMap::new(); + let mut n_rows = 0; // while let Some(Ok(logs)) = logs.recv().await { while let Some(message) = logs.recv().await { @@ -263,6 +279,11 @@ async fn logs_to_df( log_index.push(li.as_u32()); } } + if let Some(event) = event.clone() { + parse_log_from_event(event.clone(), logs).into_iter().for_each(|(k, v)| { + event_cols.entry(k).or_insert(Vec::new()).extend(v); + }); + } } _ => return Err(CollectError::TooManyRequestsError), } @@ -284,5 +305,160 @@ async fn logs_to_df( cols.push(Series::new("chain_id", vec![chain_id; n_rows])); } + for (name, data) in event_cols { + match to_series(name, data) { + Ok(s) => cols.push(s), + Err(e) => eprintln!("error creating frame: {}", e), // TODO: see how best to bubble up error + } + } + + DataFrame::new(cols).map_err(CollectError::PolarsError).sort_by_schema(schema) } + + +// this function assumes all logs are of the same type and skips fields if they don't match the passed event definition +fn parse_log_from_event(event: ethers::abi::Event, logs: Vec) -> HashMap> { + let mut map: HashMap> = HashMap::new(); + let known_keys = event.inputs.clone().into_iter().map(|i| i.name).collect::>(); + + for log in logs { + if let Ok(log) = event.parse_log(RawLog::from(log)) { + for param in log.params { + if known_keys.contains(param.name.as_str()) { + let tokens = map.entry(param.name).or_insert(Vec::new()); + tokens.push(param.value); + } + } + } + } + map +} + +/// data should never be mixed type, otherwise this will return inconsistent results +fn to_series(name: String, data: Vec) -> Result { + + // This is a smooth brain way of doing this, but I can't think of a better way right now + let mut ints: Vec = vec![]; + let mut str_ints: Vec = vec![]; + let mut bytes: Vec = vec![]; + let mut bools: Vec = vec![]; + let mut strings: Vec = vec![]; + let mut addresses: Vec = vec![]; + // TODO: support array & tuple types + + for token in data.clone() { + match token { + Token::Address(a) => addresses.push(format!("{:?}", a)), + Token::FixedBytes(b) => bytes.push(b.encode_hex()), + Token::Bytes(b) => bytes.push(b.encode_hex()), + // LogParam and Token both don't specify the size of the int, so we have to guess. + // try to cast the all to u64, if that fails store as string and collect the ones that + // succeed at the end. + // this may get problematic if 1 batch of logs happens to contain all u64-able ints and + // the next batch contains u256s. Might be worth just casting all as strings + Token::Int(i) | Token::Uint(i) => match i.try_into() { + Ok(i) => ints.push(i), + Err(_) => str_ints.push(i.to_string()), + }, + Token::Bool(b) => bools.push(b), + Token::String(s) => strings.push(s), + Token::Array(_) | Token::FixedArray(_) => {} + Token::Tuple(_) => {} + } + } + let mixed_length_err = format!("could not parse column {}, mixed type", name); + + + let data_len = data.clone().len(); + + // check each vector, see if it contains any values, if it does, check if it's the same length + // as the input data and map to a series + if ints.len() > 0 || str_ints.len() > 0 { + if str_ints.len() > 0 { + str_ints.extend(ints.into_iter().map(|i| i.to_string())); + if str_ints.len() != data_len { + return Err(mixed_length_err); + } + return Ok(Series::new(name.as_str(), str_ints)); + } + Ok(Series::new(name.as_str(), ints)) + } else if bytes.len() > 0 { + if bytes.len() != data_len { + return Err(mixed_length_err); + } + Ok(Series::new(name.as_str(), bytes)) + } else if bools.len() > 0 { + if bools.len() != data_len { + return Err(mixed_length_err); + } + Ok(Series::new(name.as_str(), bools)) + } else if strings.len() > 0 { + if strings.len() != data_len { + return Err(mixed_length_err); + } + Ok(Series::new(name.as_str(), strings)) + } else if addresses.len() > 0 { + if addresses.len() != data_len { + return Err(mixed_length_err); + } + Ok(Series::new(name.as_str(), addresses)) + } else { + Err(format!("could not parse column {}", name)) + } +} + +#[cfg(test)] +mod test { + use polars::prelude::DataType::Boolean; + use super::*; + + #[test] + fn test_mapping_log_into_type_columns() { + let e = HumanReadableParser::parse_event("event NewMint(address indexed msgSender, uint256 indexed mintQuantity)").unwrap(); + + let raw_log = r#"{ + "address": "0x0000000000000000000000000000000000000000", + "topics": [ + "0x52277f0b4a9b555c5aa96900a13546f972bda413737ec164aac947c87eec6024", + "0x00000000000000000000000062a73d9116eda78a78f4cf81602bdc926fb4c0dd", + "0x0000000000000000000000000000000000000000000000000000000000000003" + ], + "data": "0x" + }"#; + + let log = serde_json::from_str::(raw_log).unwrap(); + let m = parse_log_from_event(e, vec![log]); + assert_eq!(m.len(), 2); + assert_eq!(m.get("msgSender").unwrap().len(), 1); + assert_eq!(m.get("mintQuantity").unwrap().len(), 1); + } + + #[test] + fn test_parsing_bools() { + let s = to_series("bools".to_string(), vec![Token::Bool(true), Token::Bool(false)]).unwrap(); + assert_eq!(s.dtype(), &Boolean); + assert_eq!(s.len(), 2) + } + + #[test] + fn test_parsing_ints() { + let s = to_series("ints".to_string(), vec![Token::Int(1.into()), Token::Int(2.into())]).unwrap(); + assert_eq!(s.dtype(), &DataType::UInt64); + assert_eq!(s.len(), 2) + } + + #[test] + fn test_parsing_big_ints() { + let s = to_series("ints".to_string(), vec![Token::Int(U256::max_value()), Token::Int(2.into())]).unwrap(); + assert_eq!(s.dtype(), &DataType::Utf8); + assert_eq!(s.len(), 2) + } + + #[test] + fn test_parsing_addresses() { + let s = to_series("ints".to_string(), vec![Token::Address(Address::zero()), Token::Address(Address::zero())]).unwrap(); + assert_eq!(s.dtype(), &DataType::Utf8); + assert_eq!(s.len(), 2) + } +} \ No newline at end of file From 89854101fbcb6913493aeca1c48377ee8b51a189 Mon Sep 17 00:00:00 2001 From: Erik Reppel Date: Tue, 22 Aug 2023 10:24:06 -0400 Subject: [PATCH 2/8] Remove examples --- crates/freeze/examples/events.rs | 52 -------------------------------- 1 file changed, 52 deletions(-) delete mode 100644 crates/freeze/examples/events.rs diff --git a/crates/freeze/examples/events.rs b/crates/freeze/examples/events.rs deleted file mode 100644 index b73049b4..00000000 --- a/crates/freeze/examples/events.rs +++ /dev/null @@ -1,52 +0,0 @@ -use std::collections::{HashMap, HashSet}; -use ethers::abi::Event; -use ethers_core::abi::{ethabi, HumanReadableParser, LogParam, RawLog, Token}; -use ethabi::param_type::ParamType; -use ethers::prelude::{Address, Log}; - -#[tokio::main] -async fn main() { - // let event: Event = serde_json::from_str(EVENT_S).unwrap(); - // println!("{:?}", event.inputs); - // println!("{:#?}", event); - - - let e = HumanReadableParser::parse_event("event NewMint(address indexed msgSender, uint256 indexed mintQuantity)").unwrap(); - - let raw_log = r#"{ - "address": "0x0000000000000000000000000000000000000000", - "topics": [ - "0x52277f0b4a9b555c5aa96900a13546f972bda413737ec164aac947c87eec6024", - "0x00000000000000000000000062a73d9116eda78a78f4cf81602bdc926fb4c0dd", - "0x0000000000000000000000000000000000000000000000000000000000000003" - ], - "data": "0x" - }"#; - - let log = serde_json::from_str::(raw_log).unwrap(); - - let m = parse_log_from_event(e, vec![log]); - println!("{:#?}", m); -} - -// this function assumes all logs are of the same type and skips them if they aren't -fn parse_log_from_event(event: ethers::abi::Event, logs: Vec) -> HashMap> { - let mut map: HashMap> = HashMap::new(); - - let known_keys = event.inputs.clone().into_iter().map(|i| i.name).collect::>(); - - for log in logs { - let l = event.parse_log(RawLog::from(log)).unwrap(); - for param in l.params { - if known_keys.contains(param.name.as_str()) { - let tokens = map.entry(param.name).or_insert(Vec::new()); - tokens.push(param.value); - } - } - } - map -} - - -// const EVENT_S: &str = r#"{"name":"bar","inputs":[{"name":"a","type":"uint256"},{"name":"b","type":"bool"}],"anonymous":false,"type":"event"}"#; -const EVENT_S: &str = r#"{"anonymous": false,"inputs": [{"indexed": true,"name": "msgSender","type": "address"},{"indexed": true,"name": "mintQuantity","type": "uint256"}],"name": "NewMint","type": "event"}"#; \ No newline at end of file From d265b5fcd64fe3f8583022594c3a328019a6656b Mon Sep 17 00:00:00 2001 From: Erik Reppel Date: Fri, 25 Aug 2023 12:53:37 -0400 Subject: [PATCH 3/8] event-signature flag, add to schema even in event of no logs in batch --- .gitignore | 1 + crates/cli/src/args.rs | 4 + crates/cli/src/parse/query.rs | 8 +- crates/freeze/src/datasets/logs.rs | 229 +++++++++++++++------------ crates/freeze/src/datasets/mod.rs | 2 + crates/freeze/src/lib.rs | 1 + crates/freeze/src/types/schemas.rs | 15 +- crates/python/src/collect_adapter.rs | 3 + crates/python/src/freeze_adapter.rs | 3 + 9 files changed, 166 insertions(+), 100 deletions(-) diff --git a/.gitignore b/.gitignore index 19086668..6c7b5f2d 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ target *.json_tmp TODO.md graveyard +.idea diff --git a/crates/cli/src/args.rs b/crates/cli/src/args.rs index 82f46928..b9a14493 100644 --- a/crates/cli/src/args.rs +++ b/crates/cli/src/args.rs @@ -170,6 +170,10 @@ pub struct Args { help_heading = "Dataset-specific Options" )] pub inner_request_size: u64, + + /// [logs] event signature to parse + #[arg(long, help_heading = "Dataset-specific Options")] + pub event_signature: Option, } pub(crate) fn get_styles() -> clap_cryo::builder::Styles { diff --git a/crates/cli/src/parse/query.rs b/crates/cli/src/parse/query.rs index 4504822b..2c0557bc 100644 --- a/crates/cli/src/parse/query.rs +++ b/crates/cli/src/parse/query.rs @@ -3,7 +3,8 @@ use std::{collections::HashMap, sync::Arc}; use ethers::prelude::*; use hex::FromHex; -use cryo_freeze::{ColumnEncoding, Datatype, FileFormat, MultiQuery, ParseError, RowFilter, Table}; +use cryo_freeze::{ColumnEncoding, Datatype, FileFormat, LogDecoder, MultiQuery, ParseError, RowFilter, Table}; +use cryo_freeze::schemas::TableMeta; use super::{blocks, file_output, transactions}; use crate::args::Args; @@ -97,6 +98,11 @@ fn parse_schemas(args: &Args) -> Result, ParseError> { &args.exclude_columns, &args.columns, sort[datatype].clone(), + match &args.event_signature { + Some(sig) => Some(TableMeta{log_decoder: LogDecoder::new(sig.clone())}), + None => None, + + } ) .map(|schema| (*datatype, schema)) .map_err(|_e| { diff --git a/crates/freeze/src/datasets/logs.rs b/crates/freeze/src/datasets/logs.rs index db0e0803..c79ef64a 100644 --- a/crates/freeze/src/datasets/logs.rs +++ b/crates/freeze/src/datasets/logs.rs @@ -209,15 +209,9 @@ async fn logs_to_df( let mut data: Vec> = Vec::new(); - let event = match std::env::var("EVENT_ABI") { - Ok(abi) => match HumanReadableParser::parse_event(abi.as_str()) { - Ok(event) => Some(event), - Err(_) => { - eprintln!("incorrectly formatted event {} (expect something like event Transfer(address indexed from, address indexed to, uint256 amount)", abi); - None - } - }, - Err(_) => None + let decoder = match schema.clone().meta { + Some(tm) => tm.log_decoder, + None => None, }; let mut event_cols: HashMap> = HashMap::new(); @@ -279,8 +273,8 @@ async fn logs_to_df( log_index.push(li.as_u32()); } } - if let Some(event) = event.clone() { - parse_log_from_event(event.clone(), logs).into_iter().for_each(|(k, v)| { + if let Some(decoder) = decoder.clone() { + decoder.parse_log_from_event(logs).into_iter().for_each(|(k, v)| { event_cols.entry(k).or_insert(Vec::new()).extend(v); }); } @@ -290,7 +284,7 @@ async fn logs_to_df( } let mut cols = Vec::new(); - with_series!(cols, "block_number", block_number, schema); + with_series!(cols, "block_number", block_number.clone(), schema); with_series!(cols, "transaction_index", transaction_index, schema); with_series!(cols, "log_index", log_index, schema); with_series_binary!(cols, "transaction_hash", transaction_hash, schema); @@ -305,10 +299,23 @@ async fn logs_to_df( cols.push(Series::new("chain_id", vec![chain_id; n_rows])); } - for (name, data) in event_cols { - match to_series(name, data) { - Ok(s) => cols.push(s), - Err(e) => eprintln!("error creating frame: {}", e), // TODO: see how best to bubble up error + if let Some(decoder) = decoder { + // Write columns even if there are no values decoded - indicates empty dataframe + let chunk_len = block_number.len(); + if event_cols.is_empty() { + for name in decoder.field_names().iter() { + cols.push(Series::new(name.as_str(), vec![None::; chunk_len])); + } + } else { + for (name, data) in event_cols { + match LogDecoder::make_series(name.clone(), data, chunk_len.clone()) { + Ok(s) => { + println!("Pushing col {:?}", name.clone()); + cols.push(s); + } + Err(e) => eprintln!("error creating frame: {}", e), // TODO: see how best to bubble up error + } + } } } @@ -317,94 +324,119 @@ async fn logs_to_df( } -// this function assumes all logs are of the same type and skips fields if they don't match the passed event definition -fn parse_log_from_event(event: ethers::abi::Event, logs: Vec) -> HashMap> { - let mut map: HashMap> = HashMap::new(); - let known_keys = event.inputs.clone().into_iter().map(|i| i.name).collect::>(); +#[derive(Clone, Debug, PartialEq)] +pub struct LogDecoder { + pub raw: String, + pub event: abi::Event, +} - for log in logs { - if let Ok(log) = event.parse_log(RawLog::from(log)) { - for param in log.params { - if known_keys.contains(param.name.as_str()) { - let tokens = map.entry(param.name).or_insert(Vec::new()); - tokens.push(param.value); - } +impl LogDecoder { + /// create a new LogDecoder from an event signature + /// ex: LogDecoder::new("event Transfer(address indexed from, address indexed to, uint256 amount)".to_string()) + pub fn new(event_signature: String) -> Option { + match HumanReadableParser::parse_event(event_signature.as_str()) + { + Ok(event) => Some(Self { event, raw: event_signature.clone() }), + Err(_) => { + eprintln!("incorrectly formatted event {} (expect something like event Transfer(address indexed from, address indexed to, uint256 amount)", event_signature); + None } } } - map -} -/// data should never be mixed type, otherwise this will return inconsistent results -fn to_series(name: String, data: Vec) -> Result { - - // This is a smooth brain way of doing this, but I can't think of a better way right now - let mut ints: Vec = vec![]; - let mut str_ints: Vec = vec![]; - let mut bytes: Vec = vec![]; - let mut bools: Vec = vec![]; - let mut strings: Vec = vec![]; - let mut addresses: Vec = vec![]; - // TODO: support array & tuple types - - for token in data.clone() { - match token { - Token::Address(a) => addresses.push(format!("{:?}", a)), - Token::FixedBytes(b) => bytes.push(b.encode_hex()), - Token::Bytes(b) => bytes.push(b.encode_hex()), - // LogParam and Token both don't specify the size of the int, so we have to guess. - // try to cast the all to u64, if that fails store as string and collect the ones that - // succeed at the end. - // this may get problematic if 1 batch of logs happens to contain all u64-able ints and - // the next batch contains u256s. Might be worth just casting all as strings - Token::Int(i) | Token::Uint(i) => match i.try_into() { - Ok(i) => ints.push(i), - Err(_) => str_ints.push(i.to_string()), - }, - Token::Bool(b) => bools.push(b), - Token::String(s) => strings.push(s), - Token::Array(_) | Token::FixedArray(_) => {} - Token::Tuple(_) => {} + fn field_names(&self) -> Vec { + self.event.inputs.iter().map(|i| i.name.clone()).collect() + } + + /// converts from a log type to an abi token type + /// this function assumes all logs are of the same type and skips fields if they don't match the passed event definition + pub fn parse_log_from_event(&self, logs: Vec) -> HashMap> { + let mut map: HashMap> = HashMap::new(); + let known_keys = self.event.inputs.clone().into_iter().map(|i| i.name).collect::>(); + + for log in logs { + if let Ok(log) = self.event.parse_log(RawLog::from(log)) { + for param in log.params { + if known_keys.contains(param.name.as_str()) { + let tokens = map.entry(param.name).or_insert(Vec::new()); + tokens.push(param.value); + } + } + } } + map } - let mixed_length_err = format!("could not parse column {}, mixed type", name); + /// data should never be mixed type, otherwise this will return inconsistent results + pub fn make_series(name: String, data: Vec, chunk_len: usize) -> Result { + + // This is a smooth brain way of doing this, but I can't think of a better way right now + let mut ints: Vec = vec![]; + let mut str_ints: Vec = vec![]; + let mut bytes: Vec = vec![]; + let mut bools: Vec = vec![]; + let mut strings: Vec = vec![]; + let mut addresses: Vec = vec![]; + // TODO: support array & tuple types + + for token in data.clone() { + match token { + Token::Address(a) => addresses.push(format!("{:?}", a)), + Token::FixedBytes(b) => bytes.push(b.encode_hex()), + Token::Bytes(b) => bytes.push(b.encode_hex()), + // LogParam and Token both don't specify the size of the int, so we have to guess. + // try to cast the all to u64, if that fails store as string and collect the ones that + // succeed at the end. + // this may get problematic if 1 batch of logs happens to contain all u64-able ints and + // the next batch contains u256s. Might be worth just casting all as strings + Token::Int(i) | Token::Uint(i) => match i.try_into() { + Ok(i) => ints.push(i), + Err(_) => str_ints.push(i.to_string()), + }, + Token::Bool(b) => bools.push(b), + Token::String(s) => strings.push(s), + Token::Array(_) | Token::FixedArray(_) => {} + Token::Tuple(_) => {} + } + } + let mixed_length_err = format!("could not parse column {}, mixed type", name); - let data_len = data.clone().len(); - // check each vector, see if it contains any values, if it does, check if it's the same length - // as the input data and map to a series - if ints.len() > 0 || str_ints.len() > 0 { - if str_ints.len() > 0 { - str_ints.extend(ints.into_iter().map(|i| i.to_string())); - if str_ints.len() != data_len { + // check each vector, see if it contains any values, if it does, check if it's the same length + // as the input data and map to a series + if ints.len() > 0 || str_ints.len() > 0 { + if str_ints.len() > 0 { + str_ints.extend(ints.into_iter().map(|i| i.to_string())); + if str_ints.len() != chunk_len { + return Err(mixed_length_err); + } + return Ok(Series::new(name.as_str(), str_ints)); + } + Ok(Series::new(name.as_str(), ints)) + } else if bytes.len() > 0 { + if bytes.len() != chunk_len { return Err(mixed_length_err); } - return Ok(Series::new(name.as_str(), str_ints)); - } - Ok(Series::new(name.as_str(), ints)) - } else if bytes.len() > 0 { - if bytes.len() != data_len { - return Err(mixed_length_err); - } - Ok(Series::new(name.as_str(), bytes)) - } else if bools.len() > 0 { - if bools.len() != data_len { - return Err(mixed_length_err); - } - Ok(Series::new(name.as_str(), bools)) - } else if strings.len() > 0 { - if strings.len() != data_len { - return Err(mixed_length_err); - } - Ok(Series::new(name.as_str(), strings)) - } else if addresses.len() > 0 { - if addresses.len() != data_len { - return Err(mixed_length_err); + Ok(Series::new(name.as_str(), bytes)) + } else if bools.len() > 0 { + if bools.len() != chunk_len { + return Err(mixed_length_err); + } + Ok(Series::new(name.as_str(), bools)) + } else if strings.len() > 0 { + if strings.len() != chunk_len { + return Err(mixed_length_err); + } + Ok(Series::new(name.as_str(), strings)) + } else if addresses.len() > 0 { + if addresses.len() != chunk_len { + return Err(mixed_length_err); + } + Ok(Series::new(name.as_str(), addresses)) + } else { + // case where no data was passed + Ok(Series::new(name.as_str(), vec![None::; chunk_len])) } - Ok(Series::new(name.as_str(), addresses)) - } else { - Err(format!("could not parse column {}", name)) } } @@ -415,7 +447,8 @@ mod test { #[test] fn test_mapping_log_into_type_columns() { - let e = HumanReadableParser::parse_event("event NewMint(address indexed msgSender, uint256 indexed mintQuantity)").unwrap(); + let raw = "event NewMint(address indexed msgSender, uint256 indexed mintQuantity)"; + let e = HumanReadableParser::parse_event(raw).unwrap(); let raw_log = r#"{ "address": "0x0000000000000000000000000000000000000000", @@ -427,8 +460,10 @@ mod test { "data": "0x" }"#; + let decoder = LogDecoder { raw: raw.to_string(), event: e.clone() }; + let log = serde_json::from_str::(raw_log).unwrap(); - let m = parse_log_from_event(e, vec![log]); + let m = decoder.parse_log_from_event(vec![log]); assert_eq!(m.len(), 2); assert_eq!(m.get("msgSender").unwrap().len(), 1); assert_eq!(m.get("mintQuantity").unwrap().len(), 1); @@ -436,28 +471,28 @@ mod test { #[test] fn test_parsing_bools() { - let s = to_series("bools".to_string(), vec![Token::Bool(true), Token::Bool(false)]).unwrap(); + let s = LogDecoder::make_series("bools".to_string(), vec![Token::Bool(true), Token::Bool(false)]).unwrap(); assert_eq!(s.dtype(), &Boolean); assert_eq!(s.len(), 2) } #[test] fn test_parsing_ints() { - let s = to_series("ints".to_string(), vec![Token::Int(1.into()), Token::Int(2.into())]).unwrap(); + let s = LogDecoder::make_series("ints".to_string(), vec![Token::Int(1.into()), Token::Int(2.into())]).unwrap(); assert_eq!(s.dtype(), &DataType::UInt64); assert_eq!(s.len(), 2) } #[test] fn test_parsing_big_ints() { - let s = to_series("ints".to_string(), vec![Token::Int(U256::max_value()), Token::Int(2.into())]).unwrap(); + let s = LogDecoder::make_series("ints".to_string(), vec![Token::Int(U256::max_value()), Token::Int(2.into())]).unwrap(); assert_eq!(s.dtype(), &DataType::Utf8); assert_eq!(s.len(), 2) } #[test] fn test_parsing_addresses() { - let s = to_series("ints".to_string(), vec![Token::Address(Address::zero()), Token::Address(Address::zero())]).unwrap(); + let s = LogDecoder::make_series("ints".to_string(), vec![Token::Address(Address::zero()), Token::Address(Address::zero())]).unwrap(); assert_eq!(s.dtype(), &DataType::Utf8); assert_eq!(s.len(), 2) } diff --git a/crates/freeze/src/datasets/mod.rs b/crates/freeze/src/datasets/mod.rs index 506715ea..f62deae2 100644 --- a/crates/freeze/src/datasets/mod.rs +++ b/crates/freeze/src/datasets/mod.rs @@ -11,3 +11,5 @@ mod storage_diffs; mod traces; mod transactions; mod vm_traces; + +pub use logs::LogDecoder; \ No newline at end of file diff --git a/crates/freeze/src/lib.rs b/crates/freeze/src/lib.rs index a9be83c9..437a2750 100644 --- a/crates/freeze/src/lib.rs +++ b/crates/freeze/src/lib.rs @@ -15,3 +15,4 @@ mod types; pub use collect::{collect, collect_multiple}; pub use freeze::freeze; pub use types::*; +pub use datasets::LogDecoder; diff --git a/crates/freeze/src/types/schemas.rs b/crates/freeze/src/types/schemas.rs index 83cbb5ff..edf300d8 100644 --- a/crates/freeze/src/types/schemas.rs +++ b/crates/freeze/src/types/schemas.rs @@ -2,11 +2,12 @@ use std::collections::HashSet; use indexmap::IndexMap; use thiserror::Error; +use crate::datasets::LogDecoder; use crate::types::{ColumnEncoding, Datatype}; /// Schema for a particular table -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, PartialEq)] pub struct Table { columns: IndexMap, @@ -15,6 +16,15 @@ pub struct Table { /// sort order for rows pub sort_columns: Option>, + + // metadata + pub meta: Option +} + +/// metadata to associated with a table +#[derive(Clone, Debug, PartialEq)] +pub struct TableMeta { + pub log_decoder: Option, } impl Table { @@ -91,6 +101,7 @@ impl Datatype { exclude_columns: &Option>, columns: &Option>, sort: Option>, + table_meta: Option, ) -> Result { let column_types = self.dataset().column_types(); let default_columns = self.dataset().default_columns(); @@ -104,7 +115,7 @@ impl Datatype { } columns.insert((*column.clone()).to_string(), *ctype); } - let schema = Table { datatype: *self, sort_columns: sort, columns }; + let schema = Table { datatype: *self, sort_columns: sort, columns, meta: table_meta }; Ok(schema) } } diff --git a/crates/python/src/collect_adapter.rs b/crates/python/src/collect_adapter.rs index a09176dd..0449cb4f 100644 --- a/crates/python/src/collect_adapter.rs +++ b/crates/python/src/collect_adapter.rs @@ -44,6 +44,7 @@ use cryo_freeze::collect; topic3 = None, inner_request_size = 1, no_verbose = false, + event_signature = None, ) )] #[allow(clippy::too_many_arguments)] @@ -85,6 +86,7 @@ pub fn _collect( topic3: Option, inner_request_size: u64, no_verbose: bool, + event_signature: Option, ) -> PyResult<&PyAny> { let args = Args { datatype: vec![datatype], @@ -123,6 +125,7 @@ pub fn _collect( topic3, inner_request_size, no_verbose, + event_signature, }; pyo3_asyncio::tokio::future_into_py(py, async move { diff --git a/crates/python/src/freeze_adapter.rs b/crates/python/src/freeze_adapter.rs index 0d6d88d3..91c6803b 100644 --- a/crates/python/src/freeze_adapter.rs +++ b/crates/python/src/freeze_adapter.rs @@ -45,6 +45,7 @@ use cryo_cli::{run, Args}; topic3 = None, inner_request_size = 1, no_verbose = false, + event_signature = None, ) )] #[allow(clippy::too_many_arguments)] @@ -86,6 +87,7 @@ pub fn _freeze( topic3: Option, inner_request_size: u64, no_verbose: bool, + event_signature: Option, ) -> PyResult<&PyAny> { let args = Args { datatype, @@ -124,6 +126,7 @@ pub fn _freeze( topic3, inner_request_size, no_verbose, + event_signature, }; pyo3_asyncio::tokio::future_into_py(py, async move { From b63881b584e1ba7d14e4b8554d4399e206cdae2a Mon Sep 17 00:00:00 2001 From: Erik Reppel Date: Fri, 25 Aug 2023 12:57:21 -0400 Subject: [PATCH 4/8] fmt --- crates/cli/src/parse/query.rs | 11 +++-- crates/freeze/src/datasets/logs.rs | 76 ++++++++++++++++++------------ crates/freeze/src/datasets/mod.rs | 2 +- crates/freeze/src/lib.rs | 2 +- crates/freeze/src/types/schemas.rs | 4 +- 5 files changed, 55 insertions(+), 40 deletions(-) diff --git a/crates/cli/src/parse/query.rs b/crates/cli/src/parse/query.rs index 2c0557bc..53cbe5e2 100644 --- a/crates/cli/src/parse/query.rs +++ b/crates/cli/src/parse/query.rs @@ -3,8 +3,10 @@ use std::{collections::HashMap, sync::Arc}; use ethers::prelude::*; use hex::FromHex; -use cryo_freeze::{ColumnEncoding, Datatype, FileFormat, LogDecoder, MultiQuery, ParseError, RowFilter, Table}; -use cryo_freeze::schemas::TableMeta; +use cryo_freeze::{ + schemas::TableMeta, ColumnEncoding, Datatype, FileFormat, LogDecoder, MultiQuery, ParseError, + RowFilter, Table, +}; use super::{blocks, file_output, transactions}; use crate::args::Args; @@ -99,10 +101,9 @@ fn parse_schemas(args: &Args) -> Result, ParseError> { &args.columns, sort[datatype].clone(), match &args.event_signature { - Some(sig) => Some(TableMeta{log_decoder: LogDecoder::new(sig.clone())}), + Some(sig) => Some(TableMeta { log_decoder: LogDecoder::new(sig.clone()) }), None => None, - - } + }, ) .map(|schema| (*datatype, schema)) .map_err(|_e| { diff --git a/crates/freeze/src/datasets/logs.rs b/crates/freeze/src/datasets/logs.rs index c79ef64a..137f5c3d 100644 --- a/crates/freeze/src/datasets/logs.rs +++ b/crates/freeze/src/datasets/logs.rs @@ -2,8 +2,7 @@ use std::{collections::HashMap, sync::Arc}; use ethers::prelude::*; use ethers_core::abi::{AbiEncode, HumanReadableParser, RawLog, Token}; -use polars::export::ahash::HashSet; -use polars::prelude::*; +use polars::{export::ahash::HashSet, prelude::*}; use tokio::{sync::mpsc, task}; use crate::{ @@ -208,7 +207,6 @@ async fn logs_to_df( let mut topic3: Vec>> = Vec::new(); let mut data: Vec> = Vec::new(); - let decoder = match schema.clone().meta { Some(tm) => tm.log_decoder, None => None, @@ -313,17 +311,16 @@ async fn logs_to_df( println!("Pushing col {:?}", name.clone()); cols.push(s); } - Err(e) => eprintln!("error creating frame: {}", e), // TODO: see how best to bubble up error + Err(e) => eprintln!("error creating frame: {}", e), /* TODO: see how best to + * bubble up error */ } } } } - DataFrame::new(cols).map_err(CollectError::PolarsError).sort_by_schema(schema) } - #[derive(Clone, Debug, PartialEq)] pub struct LogDecoder { pub raw: String, @@ -332,10 +329,10 @@ pub struct LogDecoder { impl LogDecoder { /// create a new LogDecoder from an event signature - /// ex: LogDecoder::new("event Transfer(address indexed from, address indexed to, uint256 amount)".to_string()) + /// ex: LogDecoder::new("event Transfer(address indexed from, address indexed to, uint256 + /// amount)".to_string()) pub fn new(event_signature: String) -> Option { - match HumanReadableParser::parse_event(event_signature.as_str()) - { + match HumanReadableParser::parse_event(event_signature.as_str()) { Ok(event) => Some(Self { event, raw: event_signature.clone() }), Err(_) => { eprintln!("incorrectly formatted event {} (expect something like event Transfer(address indexed from, address indexed to, uint256 amount)", event_signature); @@ -349,10 +346,12 @@ impl LogDecoder { } /// converts from a log type to an abi token type - /// this function assumes all logs are of the same type and skips fields if they don't match the passed event definition + /// this function assumes all logs are of the same type and skips fields if they don't match the + /// passed event definition pub fn parse_log_from_event(&self, logs: Vec) -> HashMap> { let mut map: HashMap> = HashMap::new(); - let known_keys = self.event.inputs.clone().into_iter().map(|i| i.name).collect::>(); + let known_keys = + self.event.inputs.clone().into_iter().map(|i| i.name).collect::>(); for log in logs { if let Ok(log) = self.event.parse_log(RawLog::from(log)) { @@ -369,7 +368,6 @@ impl LogDecoder { /// data should never be mixed type, otherwise this will return inconsistent results pub fn make_series(name: String, data: Vec, chunk_len: usize) -> Result { - // This is a smooth brain way of doing this, but I can't think of a better way right now let mut ints: Vec = vec![]; let mut str_ints: Vec = vec![]; @@ -385,10 +383,11 @@ impl LogDecoder { Token::FixedBytes(b) => bytes.push(b.encode_hex()), Token::Bytes(b) => bytes.push(b.encode_hex()), // LogParam and Token both don't specify the size of the int, so we have to guess. - // try to cast the all to u64, if that fails store as string and collect the ones that - // succeed at the end. - // this may get problematic if 1 batch of logs happens to contain all u64-able ints and - // the next batch contains u256s. Might be worth just casting all as strings + // try to cast the all to u64, if that fails store as string and collect the ones + // that succeed at the end. + // this may get problematic if 1 batch of logs happens to contain all u64-able ints + // and the next batch contains u256s. Might be worth just casting + // all as strings Token::Int(i) | Token::Uint(i) => match i.try_into() { Ok(i) => ints.push(i), Err(_) => str_ints.push(i.to_string()), @@ -401,36 +400,35 @@ impl LogDecoder { } let mixed_length_err = format!("could not parse column {}, mixed type", name); - - // check each vector, see if it contains any values, if it does, check if it's the same length - // as the input data and map to a series + // check each vector, see if it contains any values, if it does, check if it's the same + // length as the input data and map to a series if ints.len() > 0 || str_ints.len() > 0 { if str_ints.len() > 0 { str_ints.extend(ints.into_iter().map(|i| i.to_string())); if str_ints.len() != chunk_len { - return Err(mixed_length_err); + return Err(mixed_length_err) } - return Ok(Series::new(name.as_str(), str_ints)); + return Ok(Series::new(name.as_str(), str_ints)) } Ok(Series::new(name.as_str(), ints)) } else if bytes.len() > 0 { if bytes.len() != chunk_len { - return Err(mixed_length_err); + return Err(mixed_length_err) } Ok(Series::new(name.as_str(), bytes)) } else if bools.len() > 0 { if bools.len() != chunk_len { - return Err(mixed_length_err); + return Err(mixed_length_err) } Ok(Series::new(name.as_str(), bools)) } else if strings.len() > 0 { if strings.len() != chunk_len { - return Err(mixed_length_err); + return Err(mixed_length_err) } Ok(Series::new(name.as_str(), strings)) } else if addresses.len() > 0 { if addresses.len() != chunk_len { - return Err(mixed_length_err); + return Err(mixed_length_err) } Ok(Series::new(name.as_str(), addresses)) } else { @@ -442,8 +440,8 @@ impl LogDecoder { #[cfg(test)] mod test { - use polars::prelude::DataType::Boolean; use super::*; + use polars::prelude::DataType::Boolean; #[test] fn test_mapping_log_into_type_columns() { @@ -471,29 +469,45 @@ mod test { #[test] fn test_parsing_bools() { - let s = LogDecoder::make_series("bools".to_string(), vec![Token::Bool(true), Token::Bool(false)]).unwrap(); + let s = LogDecoder::make_series( + "bools".to_string(), + vec![Token::Bool(true), Token::Bool(false)], + ) + .unwrap(); assert_eq!(s.dtype(), &Boolean); assert_eq!(s.len(), 2) } #[test] fn test_parsing_ints() { - let s = LogDecoder::make_series("ints".to_string(), vec![Token::Int(1.into()), Token::Int(2.into())]).unwrap(); + let s = LogDecoder::make_series( + "ints".to_string(), + vec![Token::Int(1.into()), Token::Int(2.into())], + ) + .unwrap(); assert_eq!(s.dtype(), &DataType::UInt64); assert_eq!(s.len(), 2) } #[test] fn test_parsing_big_ints() { - let s = LogDecoder::make_series("ints".to_string(), vec![Token::Int(U256::max_value()), Token::Int(2.into())]).unwrap(); + let s = LogDecoder::make_series( + "ints".to_string(), + vec![Token::Int(U256::max_value()), Token::Int(2.into())], + ) + .unwrap(); assert_eq!(s.dtype(), &DataType::Utf8); assert_eq!(s.len(), 2) } #[test] fn test_parsing_addresses() { - let s = LogDecoder::make_series("ints".to_string(), vec![Token::Address(Address::zero()), Token::Address(Address::zero())]).unwrap(); + let s = LogDecoder::make_series( + "ints".to_string(), + vec![Token::Address(Address::zero()), Token::Address(Address::zero())], + ) + .unwrap(); assert_eq!(s.dtype(), &DataType::Utf8); assert_eq!(s.len(), 2) } -} \ No newline at end of file +} diff --git a/crates/freeze/src/datasets/mod.rs b/crates/freeze/src/datasets/mod.rs index f62deae2..a06981eb 100644 --- a/crates/freeze/src/datasets/mod.rs +++ b/crates/freeze/src/datasets/mod.rs @@ -12,4 +12,4 @@ mod traces; mod transactions; mod vm_traces; -pub use logs::LogDecoder; \ No newline at end of file +pub use logs::LogDecoder; diff --git a/crates/freeze/src/lib.rs b/crates/freeze/src/lib.rs index 437a2750..e1f83298 100644 --- a/crates/freeze/src/lib.rs +++ b/crates/freeze/src/lib.rs @@ -13,6 +13,6 @@ mod freeze; mod types; pub use collect::{collect, collect_multiple}; +pub use datasets::LogDecoder; pub use freeze::freeze; pub use types::*; -pub use datasets::LogDecoder; diff --git a/crates/freeze/src/types/schemas.rs b/crates/freeze/src/types/schemas.rs index edf300d8..b5cb3702 100644 --- a/crates/freeze/src/types/schemas.rs +++ b/crates/freeze/src/types/schemas.rs @@ -1,8 +1,8 @@ use std::collections::HashSet; +use crate::datasets::LogDecoder; use indexmap::IndexMap; use thiserror::Error; -use crate::datasets::LogDecoder; use crate::types::{ColumnEncoding, Datatype}; @@ -18,7 +18,7 @@ pub struct Table { pub sort_columns: Option>, // metadata - pub meta: Option + pub meta: Option, } /// metadata to associated with a table From 3e281182de63a81ee56bfdd1fa90adac23c4615d Mon Sep 17 00:00:00 2001 From: Erik Reppel Date: Wed, 30 Aug 2023 15:40:47 -0700 Subject: [PATCH 5/8] fix tests, doc comments --- crates/freeze/src/datasets/logs.rs | 43 +++++++++++++++++----------- crates/freeze/src/types/schemas.rs | 3 +- crates/python/src/collect_adapter.rs | 4 +-- 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/crates/freeze/src/datasets/logs.rs b/crates/freeze/src/datasets/logs.rs index 137f5c3d..d6187cfc 100644 --- a/crates/freeze/src/datasets/logs.rs +++ b/crates/freeze/src/datasets/logs.rs @@ -221,7 +221,7 @@ async fn logs_to_df( Ok(logs) => { for log in logs.iter() { if let Some(true) = log.removed { - continue + continue; } if let (Some(bn), Some(tx), Some(ti), Some(li)) = ( log.block_number, @@ -308,7 +308,6 @@ async fn logs_to_df( for (name, data) in event_cols { match LogDecoder::make_series(name.clone(), data, chunk_len.clone()) { Ok(s) => { - println!("Pushing col {:?}", name.clone()); cols.push(s); } Err(e) => eprintln!("error creating frame: {}", e), /* TODO: see how best to @@ -321,9 +320,12 @@ async fn logs_to_df( DataFrame::new(cols).map_err(CollectError::PolarsError).sort_by_schema(schema) } +/// container for log decoding context #[derive(Clone, Debug, PartialEq)] pub struct LogDecoder { + /// the raw event signature string ex: event Transfer(address indexed from, address indexed to, uint256 amount) pub raw: String, + /// decoded abi type of event signature string pub event: abi::Event, } @@ -354,13 +356,16 @@ impl LogDecoder { self.event.inputs.clone().into_iter().map(|i| i.name).collect::>(); for log in logs { - if let Ok(log) = self.event.parse_log(RawLog::from(log)) { - for param in log.params { - if known_keys.contains(param.name.as_str()) { - let tokens = map.entry(param.name).or_insert(Vec::new()); - tokens.push(param.value); + match self.event.parse_log(RawLog::from(log)) { + Ok(log) => { + for param in log.params { + if known_keys.contains(param.name.as_str()) { + let tokens = map.entry(param.name).or_insert(Vec::new()); + tokens.push(param.value); + } } } + Err(e) => eprintln!("error parsing log: {:?}", e), } } map @@ -406,29 +411,29 @@ impl LogDecoder { if str_ints.len() > 0 { str_ints.extend(ints.into_iter().map(|i| i.to_string())); if str_ints.len() != chunk_len { - return Err(mixed_length_err) + return Err(mixed_length_err); } - return Ok(Series::new(name.as_str(), str_ints)) + return Ok(Series::new(name.as_str(), str_ints)); } Ok(Series::new(name.as_str(), ints)) } else if bytes.len() > 0 { if bytes.len() != chunk_len { - return Err(mixed_length_err) + return Err(mixed_length_err); } Ok(Series::new(name.as_str(), bytes)) } else if bools.len() > 0 { if bools.len() != chunk_len { - return Err(mixed_length_err) + return Err(mixed_length_err); } Ok(Series::new(name.as_str(), bools)) } else if strings.len() > 0 { if strings.len() != chunk_len { - return Err(mixed_length_err) + return Err(mixed_length_err); } Ok(Series::new(name.as_str(), strings)) } else if addresses.len() > 0 { if addresses.len() != chunk_len { - return Err(mixed_length_err) + return Err(mixed_length_err); } Ok(Series::new(name.as_str(), addresses)) } else { @@ -472,8 +477,9 @@ mod test { let s = LogDecoder::make_series( "bools".to_string(), vec![Token::Bool(true), Token::Bool(false)], + 2, ) - .unwrap(); + .unwrap(); assert_eq!(s.dtype(), &Boolean); assert_eq!(s.len(), 2) } @@ -483,8 +489,9 @@ mod test { let s = LogDecoder::make_series( "ints".to_string(), vec![Token::Int(1.into()), Token::Int(2.into())], + 2, ) - .unwrap(); + .unwrap(); assert_eq!(s.dtype(), &DataType::UInt64); assert_eq!(s.len(), 2) } @@ -494,8 +501,9 @@ mod test { let s = LogDecoder::make_series( "ints".to_string(), vec![Token::Int(U256::max_value()), Token::Int(2.into())], + 2, ) - .unwrap(); + .unwrap(); assert_eq!(s.dtype(), &DataType::Utf8); assert_eq!(s.len(), 2) } @@ -505,8 +513,9 @@ mod test { let s = LogDecoder::make_series( "ints".to_string(), vec![Token::Address(Address::zero()), Token::Address(Address::zero())], + 2, ) - .unwrap(); + .unwrap(); assert_eq!(s.dtype(), &DataType::Utf8); assert_eq!(s.len(), 2) } diff --git a/crates/freeze/src/types/schemas.rs b/crates/freeze/src/types/schemas.rs index b5cb3702..45fee48b 100644 --- a/crates/freeze/src/types/schemas.rs +++ b/crates/freeze/src/types/schemas.rs @@ -17,13 +17,14 @@ pub struct Table { /// sort order for rows pub sort_columns: Option>, - // metadata + /// metadata about a table pub meta: Option, } /// metadata to associated with a table #[derive(Clone, Debug, PartialEq)] pub struct TableMeta { + /// log decoder for table pub log_decoder: Option, } diff --git a/crates/python/src/collect_adapter.rs b/crates/python/src/collect_adapter.rs index 0449cb4f..ef93ac55 100644 --- a/crates/python/src/collect_adapter.rs +++ b/crates/python/src/collect_adapter.rs @@ -140,10 +140,10 @@ pub fn _collect( async fn run_collect(args: Args) -> PolarsResult { let (query, source, _sink) = match parse_opts(&args).await { Ok(opts) => opts, - Err(_e) => panic!(), + Err(e) => panic!("error parsing opts {:?}", e), }; match collect(query.into(), source).await { Ok(df) => Ok(df), - Err(_e) => panic!(), + Err(e) => panic!("error collecting {:?}", e), } } From 15d14109dd7514f3d21716ce978a93ad6b5fa023 Mon Sep 17 00:00:00 2001 From: sslivkoff Date: Thu, 31 Aug 2023 23:48:05 -0700 Subject: [PATCH 6/8] apply clippy lints + formatter --- crates/cli/src/parse/query.rs | 7 ++-- crates/freeze/src/datasets/logs.rs | 44 +++++++++++---------- crates/freeze/src/types/schemas.rs | 61 +++++++++++++++++++++++++++--- 3 files changed, 81 insertions(+), 31 deletions(-) diff --git a/crates/cli/src/parse/query.rs b/crates/cli/src/parse/query.rs index 02617ce9..b32404b2 100644 --- a/crates/cli/src/parse/query.rs +++ b/crates/cli/src/parse/query.rs @@ -132,10 +132,9 @@ fn parse_schemas(args: &Args) -> Result, ParseError> { &args.exclude_columns, &args.columns, sort[datatype].clone(), - match &args.event_signature { - Some(sig) => Some(TableMeta { log_decoder: LogDecoder::new(sig.clone()) }), - None => None, - }, + args.event_signature + .as_ref() + .map(|sig| TableMeta { log_decoder: LogDecoder::new(sig.clone()) }), ) .map(|schema| (*datatype, schema)) .map_err(|_e| { diff --git a/crates/freeze/src/datasets/logs.rs b/crates/freeze/src/datasets/logs.rs index 1cb01013..c724d927 100644 --- a/crates/freeze/src/datasets/logs.rs +++ b/crates/freeze/src/datasets/logs.rs @@ -270,7 +270,7 @@ impl LogColumns { Some(tm) => tm.log_decoder, None => None, }; - if let Some(decoder) = decoder.clone() { + if let Some(decoder) = decoder { decoder.parse_log_from_event(logs).into_iter().for_each(|(k, v)| { self.event_cols.entry(k).or_insert(Vec::new()).extend(v); }); @@ -310,11 +310,12 @@ impl LogColumns { } } else { for (name, data) in self.event_cols { - match LogDecoder::make_series(name.clone(), data, chunk_len.clone()) { + match LogDecoder::make_series(name.clone(), data, chunk_len) { Ok(s) => { cols.push(s); } - Err(e) => eprintln!("error creating frame: {}", e), /* TODO: see how best to + Err(e) => eprintln!("error creating frame: {}", e), /* TODO: see how best + * to * bubble up error */ } } @@ -344,7 +345,8 @@ async fn logs_to_df( /// container for log decoding context #[derive(Clone, Debug, PartialEq)] pub struct LogDecoder { - /// the raw event signature string ex: event Transfer(address indexed from, address indexed to, uint256 amount) + /// the raw event signature string ex: event Transfer(address indexed from, address indexed to, + /// uint256 amount) pub raw: String, /// decoded abi type of event signature string pub event: abi::Event, @@ -403,7 +405,7 @@ impl LogDecoder { let mut addresses: Vec = vec![]; // TODO: support array & tuple types - for token in data.clone() { + for token in data { match token { Token::Address(a) => addresses.push(format!("{:?}", a)), Token::FixedBytes(b) => bytes.push(b.encode_hex()), @@ -428,33 +430,33 @@ impl LogDecoder { // check each vector, see if it contains any values, if it does, check if it's the same // length as the input data and map to a series - if ints.len() > 0 || str_ints.len() > 0 { - if str_ints.len() > 0 { + if !ints.is_empty() || !str_ints.is_empty() { + if !str_ints.is_empty() { str_ints.extend(ints.into_iter().map(|i| i.to_string())); if str_ints.len() != chunk_len { - return Err(mixed_length_err); + return Err(mixed_length_err) } - return Ok(Series::new(name.as_str(), str_ints)); + return Ok(Series::new(name.as_str(), str_ints)) } Ok(Series::new(name.as_str(), ints)) - } else if bytes.len() > 0 { + } else if !bytes.is_empty() { if bytes.len() != chunk_len { - return Err(mixed_length_err); + return Err(mixed_length_err) } Ok(Series::new(name.as_str(), bytes)) - } else if bools.len() > 0 { + } else if !bools.is_empty() { if bools.len() != chunk_len { - return Err(mixed_length_err); + return Err(mixed_length_err) } Ok(Series::new(name.as_str(), bools)) - } else if strings.len() > 0 { + } else if !strings.is_empty() { if strings.len() != chunk_len { - return Err(mixed_length_err); + return Err(mixed_length_err) } Ok(Series::new(name.as_str(), strings)) - } else if addresses.len() > 0 { + } else if !addresses.is_empty() { if addresses.len() != chunk_len { - return Err(mixed_length_err); + return Err(mixed_length_err) } Ok(Series::new(name.as_str(), addresses)) } else { @@ -500,7 +502,7 @@ mod test { vec![Token::Bool(true), Token::Bool(false)], 2, ) - .unwrap(); + .unwrap(); assert_eq!(s.dtype(), &Boolean); assert_eq!(s.len(), 2) } @@ -512,7 +514,7 @@ mod test { vec![Token::Int(1.into()), Token::Int(2.into())], 2, ) - .unwrap(); + .unwrap(); assert_eq!(s.dtype(), &DataType::UInt64); assert_eq!(s.len(), 2) } @@ -524,7 +526,7 @@ mod test { vec![Token::Int(U256::max_value()), Token::Int(2.into())], 2, ) - .unwrap(); + .unwrap(); assert_eq!(s.dtype(), &DataType::Utf8); assert_eq!(s.len(), 2) } @@ -536,7 +538,7 @@ mod test { vec![Token::Address(Address::zero()), Token::Address(Address::zero())], 2, ) - .unwrap(); + .unwrap(); assert_eq!(s.dtype(), &DataType::Utf8); assert_eq!(s.len(), 2) } diff --git a/crates/freeze/src/types/schemas.rs b/crates/freeze/src/types/schemas.rs index c95aea4c..c975ca7f 100644 --- a/crates/freeze/src/types/schemas.rs +++ b/crates/freeze/src/types/schemas.rs @@ -155,6 +155,7 @@ pub enum SchemaError { impl Datatype { /// get schema for a particular datatype + #[allow(clippy::too_many_arguments)] pub fn table_schema( &self, u256_types: &HashSet, @@ -256,7 +257,15 @@ mod tests { fn test_table_schema_include_cols() { let inc_cols = Some(vec!["chain_id".to_string(), "receipts_root".to_string()]); let table = Datatype::Blocks - .table_schema(&get_u256_types(), &ColumnEncoding::Hex, &inc_cols, &None, &None, None, None) + .table_schema( + &get_u256_types(), + &ColumnEncoding::Hex, + &inc_cols, + &None, + &None, + None, + None, + ) .unwrap(); assert_eq!(9, table.columns().len()); assert_eq!(["chain_id", "receipts_root"], table.columns()[7..9]); @@ -264,7 +273,15 @@ mod tests { // Non-existing include is skipped let inc_cols = Some(vec!["chain_id".to_string(), "foo_bar".to_string()]); let table = Datatype::Blocks - .table_schema(&get_u256_types(), &ColumnEncoding::Hex, &inc_cols, &None, &None, None, None) + .table_schema( + &get_u256_types(), + &ColumnEncoding::Hex, + &inc_cols, + &None, + &None, + None, + None, + ) .unwrap(); assert_eq!(Some(&"chain_id"), table.columns().last()); assert!(!table.columns().contains(&"foo_bar")); @@ -272,7 +289,15 @@ mod tests { // "all" marker support let inc_cols = Some(vec!["all".to_string()]); let table = Datatype::Blocks - .table_schema(&get_u256_types(), &ColumnEncoding::Hex, &inc_cols, &None, &None, None, None) + .table_schema( + &get_u256_types(), + &ColumnEncoding::Hex, + &inc_cols, + &None, + &None, + None, + None, + ) .unwrap(); assert_eq!(15, table.columns().len()); assert!(table.columns().contains(&"hash")); @@ -291,7 +316,15 @@ mod tests { let ex_cols = Some(vec!["author".to_string(), "extra_data".to_string()]); let table = Datatype::Blocks - .table_schema(&get_u256_types(), &ColumnEncoding::Hex, &None, &ex_cols, &None, None, None) + .table_schema( + &get_u256_types(), + &ColumnEncoding::Hex, + &None, + &ex_cols, + &None, + None, + None, + ) .unwrap(); assert_eq!(5, table.columns().len()); assert!(!table.columns().contains(&"author")); @@ -300,7 +333,15 @@ mod tests { // Non-existing exclude is ignored let ex_cols = Some(vec!["timestamp".to_string(), "foo_bar".to_string()]); let table = Datatype::Blocks - .table_schema(&get_u256_types(), &ColumnEncoding::Hex, &None, &ex_cols, &None, None, None) + .table_schema( + &get_u256_types(), + &ColumnEncoding::Hex, + &None, + &ex_cols, + &None, + None, + None, + ) .unwrap(); assert_eq!(6, table.columns().len()); assert!(!table.columns().contains(&"timestamp")); @@ -312,7 +353,15 @@ mod tests { let inc_cols = Some(vec!["chain_id".to_string(), "receipts_root".to_string()]); let ex_cols = Some(vec!["author".to_string(), "extra_data".to_string()]); let table = Datatype::Blocks - .table_schema(&get_u256_types(), &ColumnEncoding::Hex, &inc_cols, &ex_cols, &None, None, None) + .table_schema( + &get_u256_types(), + &ColumnEncoding::Hex, + &inc_cols, + &ex_cols, + &None, + None, + None, + ) .unwrap(); assert!(!table.columns().contains(&"author")); assert!(!table.columns().contains(&"extra_data")); From 7b658d438cdc37ac19ac9431d193847c406df035 Mon Sep 17 00:00:00 2001 From: sslivkoff Date: Fri, 15 Sep 2023 02:22:35 -0700 Subject: [PATCH 7/8] initial commit --- Cargo.lock | 1 + Cargo.toml | 1 + README.md | 3 +- collect.py | 29 ++ collect.rs | 10 + crates/cli/src/args.rs | 34 +- crates/cli/src/parse/blocks.rs | 12 +- crates/cli/src/parse/mod.rs | 7 +- crates/cli/src/parse/parse_utils.rs | 83 +++ crates/cli/src/parse/query.rs | 275 ++++------ crates/cli/src/parse/schemas.rs | 204 ++++++++ crates/cli/src/parse/transactions.rs | 31 +- crates/freeze/Cargo.toml | 1 + crates/freeze/src/datasets/balances.rs | 172 ++++++ crates/freeze/src/datasets/codes.rs | 171 ++++++ crates/freeze/src/datasets/erc20_balances.rs | 160 ++++++ crates/freeze/src/datasets/erc20_metadata.rs | 224 ++++++++ crates/freeze/src/datasets/erc20_supplies.rs | 124 +++++ crates/freeze/src/datasets/erc20_transfers.rs | 204 ++++++++ crates/freeze/src/datasets/erc721_metadata.rs | 30 ++ .../freeze/src/datasets/erc721_transfers.rs | 194 +++++++ crates/freeze/src/datasets/eth_calls.rs | 234 +++++++++ crates/freeze/src/datasets/logs.rs | 4 +- crates/freeze/src/datasets/mod.rs | 13 + crates/freeze/src/datasets/nonces.rs | 172 ++++++ crates/freeze/src/datasets/storages.rs | 188 +++++++ crates/freeze/src/datasets/trace_calls.rs | 424 +++++++++++++++ crates/freeze/src/datasets/traces.rs | 6 +- .../src/datasets/transaction_addresses.rs | 493 ++++++++++++++++++ .../freeze/src/types/chunks/binary_chunk.rs | 10 + crates/freeze/src/types/chunks/chunk.rs | 6 + crates/freeze/src/types/chunks/mod.rs | 2 +- crates/freeze/src/types/conversions.rs | 8 +- crates/freeze/src/types/dataframes/mod.rs | 2 + crates/freeze/src/types/dataframes/read.rs | 31 ++ crates/freeze/src/types/datatypes/scalar.rs | 97 +++- crates/freeze/src/types/mod.rs | 6 +- crates/freeze/src/types/queries.rs | 116 ++++- crates/python/src/collect_adapter.rs | 23 +- crates/python/src/freeze_adapter.rs | 23 +- 40 files changed, 3580 insertions(+), 248 deletions(-) create mode 100644 collect.py create mode 100644 collect.rs create mode 100644 crates/cli/src/parse/parse_utils.rs create mode 100644 crates/cli/src/parse/schemas.rs create mode 100644 crates/freeze/src/datasets/balances.rs create mode 100644 crates/freeze/src/datasets/codes.rs create mode 100644 crates/freeze/src/datasets/erc20_balances.rs create mode 100644 crates/freeze/src/datasets/erc20_metadata.rs create mode 100644 crates/freeze/src/datasets/erc20_supplies.rs create mode 100644 crates/freeze/src/datasets/erc20_transfers.rs create mode 100644 crates/freeze/src/datasets/erc721_metadata.rs create mode 100644 crates/freeze/src/datasets/erc721_transfers.rs create mode 100644 crates/freeze/src/datasets/eth_calls.rs create mode 100644 crates/freeze/src/datasets/nonces.rs create mode 100644 crates/freeze/src/datasets/storages.rs create mode 100644 crates/freeze/src/datasets/trace_calls.rs create mode 100644 crates/freeze/src/datasets/transaction_addresses.rs create mode 100644 crates/freeze/src/types/dataframes/read.rs diff --git a/Cargo.lock b/Cargo.lock index 7a28ce13..2824dd19 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -883,6 +883,7 @@ dependencies = [ "governor", "indexmap 2.0.0", "indicatif", + "lazy_static", "polars", "prefix-hex", "serde", diff --git a/Cargo.toml b/Cargo.toml index 080ea8b4..29742dde 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ governor = "0.5.1" hex = "0.4.3" indexmap = "2.0.0" indicatif = "0.17.5" +lazy_static = "1.4.0" polars = { version = "0.32.1", features = [ "parquet", "string_encoding", diff --git a/README.md b/README.md index 5614a3c6..c8e91264 100644 --- a/README.md +++ b/README.md @@ -90,12 +90,13 @@ Many `cryo` cli options will affect output schemas by adding/removing columns or #### Schema Design Guide An attempt is made to ensure that the dataset schemas conform to a common set of design guidelines: -- By default, rows should contain enough information be order-able +- By default, rows should contain enough information in their columns to be order-able (unless the rows do not have an intrinsic order) - Columns should be named by their JSON-RPC or ethers.rs defaults, except in cases where a much more explicit name is available - To make joins across tables easier, a given piece of information should use the same datatype and column name across tables when possible - Large ints such as `u256` should allow multiple conversions. A `value` column of type `u256` should allow: `value_binary`, `value_string`, `value_f32`, `value_f64`, `value_u32`, `value_u64`, and `value_d128` - By default, columns related to non-identifying cryptographic signatures are omitted by default. For example, `state_root` of a block or `v`/`r`/`s` of a transaction - Integer values that can never be negative should be stored as unsigned integers +- Every table should allow an optional `chain_id` column so that data from multiple chains can be easily stored in the same table. Standard types across tables: - `block_number`: `u32` diff --git a/collect.py b/collect.py new file mode 100644 index 00000000..dc24b507 --- /dev/null +++ b/collect.py @@ -0,0 +1,29 @@ +# The Ordering Dimension +# - blocks vs transactions are the same dimension, just different levels of granularity +# - selections in the ordering dimension are +# - sometimes over a single point in time +# - sometimes over a range of time + + +def freeze(query, datasets): + # a query starts off as a seires of lists of chunks + subqueries = partition_query(query, partition_by) + for dataset in datasets: + for subquery in subqueries: + collect_partition(subquery, dataset) + + +# break a query into subqueries +def partition_query(query, partition_by): + return [ + create_parition_query(partition, query) + for partition in create_partitions(query, partition_by): + ] + + +def collect_partition(query, dataset): + for request in get_query_requests(query): + dataset.perform_request(request) + df = results_to_df() + + diff --git a/collect.rs b/collect.rs new file mode 100644 index 00000000..8630e138 --- /dev/null +++ b/collect.rs @@ -0,0 +1,10 @@ + + + +fn collect(filter: RowFitler) { + +} + + + + diff --git a/crates/cli/src/args.rs b/crates/cli/src/args.rs index 400ca8a0..d87440a0 100644 --- a/crates/cli/src/args.rs +++ b/crates/cli/src/args.rs @@ -147,9 +147,37 @@ pub struct Args { #[arg(long, help_heading = "Output Options")] pub no_report: bool, + /// Address + #[arg(long, help_heading = "Dataset-specific Options", num_args(1..))] + pub address: Option>, + + /// To Address + #[arg(long, help_heading = "Dataset-specific Options", num_args(1..))] + pub to_address: Option>, + + /// From Address + #[arg(long, help_heading = "Dataset-specific Options", num_args(1..))] + pub from_address: Option>, + + /// [eth_calls] Call data to use for eth_calls + #[arg(long, help_heading = "Dataset-specific Options", num_args(1..))] + pub call_data: Option>, + + /// [eth_calls] Function to use for eth_calls + #[arg(long, help_heading = "Dataset-specific Options", num_args(1..))] + pub function: Option>, + + /// [eth_calls] Inputs to use for eth_calls + #[arg(long, help_heading = "Dataset-specific Options", num_args(1..))] + pub inputs: Option>, + + /// [slots] Slots + #[arg(long, help_heading = "Dataset-specific Options", num_args(1..))] + pub slots: Option>, + /// [logs] filter logs by contract address #[arg(long, help_heading = "Dataset-specific Options")] - pub contract: Option, + pub contract: Option>, /// [logs] filter logs by topic0 #[arg(long, visible_alias = "event", help_heading = "Dataset-specific Options")] @@ -170,14 +198,14 @@ pub struct Args { /// [logs] Blocks per request #[arg( long, - value_name = "BLOCKS", + value_name = "SIZE", default_value_t = 1, help_heading = "Dataset-specific Options" )] pub inner_request_size: u64, /// [logs] event signature to parse - #[arg(long, help_heading = "Dataset-specific Options")] + #[arg(long, value_name = "SIGNATURE", help_heading = "Dataset-specific Options")] pub event_signature: Option, } diff --git a/crates/cli/src/parse/blocks.rs b/crates/cli/src/parse/blocks.rs index bbe77b89..a28ed222 100644 --- a/crates/cli/src/parse/blocks.rs +++ b/crates/cli/src/parse/blocks.rs @@ -1,7 +1,8 @@ use ethers::prelude::*; use polars::prelude::*; +use std::collections::HashMap; -use cryo_freeze::{BlockChunk, Chunk, ChunkData, ParseError, Subchunk}; +use cryo_freeze::{BlockChunk, Chunk, ChunkData, Datatype, ParseError, Subchunk, Table}; use crate::args::Args; @@ -123,8 +124,15 @@ async fn postprocess_block_chunks( pub(crate) async fn get_default_block_chunks( args: &Args, provider: Arc>, + schemas: &HashMap, ) -> Result)>, ParseError> { - let block_chunks = parse_block_inputs(&String::from(r"0:latest"), &provider).await?; + let default_blocks = schemas + .keys() + .map(|datatype| datatype.dataset().default_blocks()) + .find(|blocks| !blocks.is_none()) + .unwrap_or(Some("0:latest".to_string())) + .unwrap(); + let block_chunks = parse_block_inputs(&default_blocks, &provider).await?; postprocess_block_chunks(block_chunks, args, provider).await } diff --git a/crates/cli/src/parse/mod.rs b/crates/cli/src/parse/mod.rs index 18e2fed0..38ffcc25 100644 --- a/crates/cli/src/parse/mod.rs +++ b/crates/cli/src/parse/mod.rs @@ -1,12 +1,11 @@ mod args; mod blocks; mod file_output; +mod parse_utils; mod query; +mod schemas; mod source; mod transactions; pub use args::*; -// use blocks::*; -// use file_output::*; -// use query::*; -// use source::*; +use schemas::*; diff --git a/crates/cli/src/parse/parse_utils.rs b/crates/cli/src/parse/parse_utils.rs new file mode 100644 index 00000000..d7a2be9c --- /dev/null +++ b/crates/cli/src/parse/parse_utils.rs @@ -0,0 +1,83 @@ +use cryo_freeze::ParseError; +use std::collections::HashMap; + +pub(crate) fn hex_string_to_binary(hex_string: &String) -> Result, ParseError> { + let hex_string = hex_string.strip_prefix("0x").unwrap_or(hex_string); + hex::decode(hex_string) + .map_err(|_| ParseError::ParseError("could not parse data as hex".to_string())) +} + +pub(crate) fn hex_strings_to_binary(hex_strings: &[String]) -> Result>, ParseError> { + hex_strings + .iter() + .map(|x| { + hex::decode(x.strip_prefix("0x").unwrap_or(x)) + .map_err(|_| ParseError::ParseError("could not parse data as hex".to_string())) + }) + .collect::, _>>() +} + +#[derive(Eq, PartialEq, Hash)] +pub(crate) enum BinaryInputList { + Explicit, + ParquetColumn(String, String), +} + +type ParsedBinaryArg = HashMap>>; + +/// parse binary argument list +/// each argument can be a hex string or a parquet column reference +/// each parquet column is loaded into its own list, hex strings loaded into another +pub(crate) fn parse_binary_arg( + inputs: &[String], + default_column: &str, +) -> Result { + let mut parsed = HashMap::new(); + + // separate into files vs explicit + let (files, hex_strings): (Vec<&String>, Vec<&String>) = + inputs.iter().partition(|tx| std::path::Path::new(tx).exists()); + + // files columns + for path in files { + let reference = parse_file_column_reference(path, default_column)?; + let values = cryo_freeze::read_binary_column(&reference.path, &reference.column) + .map_err(|_e| ParseError::ParseError("could not read input".to_string()))?; + let key = BinaryInputList::ParquetColumn(reference.path, reference.column); + parsed.insert(key, values); + } + + // explicit binary strings + if !hex_strings.is_empty() { + let hex_strings: Vec = hex_strings.into_iter().cloned().collect(); + let binary_vec = hex_strings_to_binary(&hex_strings)?; + parsed.insert(BinaryInputList::Explicit, binary_vec); + }; + + Ok(parsed) +} + +struct FileColumnReference { + path: String, + column: String, +} + +fn parse_file_column_reference( + path: &str, + default_column: &str, +) -> Result { + let (path, column) = if path.contains(':') { + let pieces: Vec<&str> = path.split(':').collect(); + if pieces.len() == 2 { + (pieces[0], pieces[1]) + } else { + return Err(ParseError::ParseError("could not parse path column".to_string())) + } + } else { + (path, default_column) + }; + + let parsed = FileColumnReference { path: path.to_string(), column: column.to_string() }; + + Ok(parsed) +} diff --git a/crates/cli/src/parse/query.rs b/crates/cli/src/parse/query.rs index b32404b2..5dfc2d99 100644 --- a/crates/cli/src/parse/query.rs +++ b/crates/cli/src/parse/query.rs @@ -1,230 +1,141 @@ -use std::{ - collections::{HashMap, HashSet}, - sync::Arc, -}; +use std::{collections::HashMap, sync::Arc}; use ethers::prelude::*; use hex::FromHex; use cryo_freeze::{ - schemas::TableMeta, ColumnEncoding, Datatype, FileFormat, LogDecoder, MultiQuery, ParseError, - RowFilter, Table, + AddressChunk, CallDataChunk, Datatype, MultiQuery, ParseError, RowFilter, SlotChunk, }; -use super::{blocks, file_output, transactions}; +use super::{ + blocks, parse_schemas, + parse_utils::{hex_string_to_binary, hex_strings_to_binary, parse_binary_arg}, + transactions, +}; use crate::args::Args; -use cryo_freeze::U256Type; pub(crate) async fn parse_query( args: &Args, provider: Arc>, ) -> Result { + // process schemas + let schemas = parse_schemas(args)?; + + // process chunks let chunks = match (&args.blocks, &args.txs) { (Some(_), None) => blocks::parse_blocks(args, provider).await?, (None, Some(txs)) => transactions::parse_transactions(txs)?, - (None, None) => blocks::get_default_block_chunks(args, provider).await?, + (None, None) => blocks::get_default_block_chunks(args, provider, &schemas).await?, (Some(_), Some(_)) => { return Err(ParseError::ParseError("specify only one of --blocks or --txs".to_string())) } }; - // process schemas - let schemas = parse_schemas(args)?; + // deprecated + let address = if let Some(contract) = &args.contract { + parse_address(&Some(contract[0].clone())) + } else { + None + }; // build row filters - let contract = parse_address(&args.contract); + let call_data_chunks = parse_call_datas(&args.call_data, &args.function, &args.inputs)?; + let address_chunks = parse_address_chunks(&args.address, "address")?; + let contract_chunks = parse_address_chunks(&args.contract, "contract_address")?; + let to_address_chunks = parse_address_chunks(&args.to_address, "to_address")?; + let slot_chunks = parse_slot_chunks(&args.slots, "slot")?; let topics = [ parse_topic(&args.topic0), parse_topic(&args.topic1), parse_topic(&args.topic2), parse_topic(&args.topic3), ]; - let row_filter = RowFilter { address: contract, topics }; + let row_filter = RowFilter { + address, + topics, + address_chunks, + contract_chunks, + to_address_chunks, + slot_chunks, + call_data_chunks, + }; let mut row_filters: HashMap = HashMap::new(); - row_filters.insert(Datatype::Logs, row_filter); + for datatype in schemas.keys() { + let row_filter = row_filter.apply_arg_aliases(datatype.dataset().arg_aliases()); + row_filters.insert(*datatype, row_filter); + } let query = MultiQuery { schemas, chunks, row_filters }; Ok(query) } -fn parse_datatypes(raw_inputs: &Vec) -> Result, ParseError> { - let mut datatypes = Vec::new(); - - for raw_input in raw_inputs { - match raw_input.as_str() { - "state_diffs" => { - datatypes.push(Datatype::BalanceDiffs); - datatypes.push(Datatype::CodeDiffs); - datatypes.push(Datatype::NonceDiffs); - datatypes.push(Datatype::StorageDiffs); - } - datatype => { - let datatype = match datatype { - "balance_diffs" => Datatype::BalanceDiffs, - "blocks" => Datatype::Blocks, - "code_diffs" => Datatype::CodeDiffs, - "logs" => Datatype::Logs, - "events" => Datatype::Logs, - "nonce_diffs" => Datatype::NonceDiffs, - "storage_diffs" => Datatype::StorageDiffs, - "transactions" => Datatype::Transactions, - "txs" => Datatype::Transactions, - "traces" => Datatype::Traces, - "vm_traces" => Datatype::VmTraces, - "opcode_traces" => Datatype::VmTraces, - "native_transfers" => Datatype::NativeTransfers, - "contracts" => Datatype::Contracts, - _ => { - return Err(ParseError::ParseError(format!("invalid datatype {}", datatype))) - } - }; - datatypes.push(datatype) - } - } - } - Ok(datatypes) -} - -fn parse_schemas(args: &Args) -> Result, ParseError> { - let datatypes = parse_datatypes(&args.datatype)?; - let output_format = file_output::parse_output_format(args)?; - - let u256_types = if let Some(raw_u256_types) = &args.u256_types { - let mut u256_types: HashSet = HashSet::new(); - for raw in raw_u256_types.iter() { - let u256_type = match raw.to_lowercase() { - raw if raw == "binary" => U256Type::Binary, - raw if raw == "string" => U256Type::String, - raw if raw == "str" => U256Type::String, - raw if raw == "f32" => U256Type::F32, - raw if raw == "float32" => U256Type::F32, - raw if raw == "f64" => U256Type::F64, - raw if raw == "float64" => U256Type::F64, - raw if raw == "float" => U256Type::F64, - raw if raw == "u32" => U256Type::U32, - raw if raw == "uint32" => U256Type::U32, - raw if raw == "u64" => U256Type::U64, - raw if raw == "uint64" => U256Type::U64, - raw if raw == "decimal128" => U256Type::Decimal128, - raw if raw == "d128" => U256Type::Decimal128, - _ => return Err(ParseError::ParseError("bad u256 type".to_string())), - }; - u256_types.insert(u256_type); - } - u256_types - } else { - HashSet::from_iter(vec![U256Type::Binary, U256Type::String, U256Type::F64]) - }; - let binary_column_format = match args.hex | (output_format != FileFormat::Parquet) { - true => ColumnEncoding::Hex, - false => ColumnEncoding::Binary, - }; - - let sort = parse_sort(&args.sort, &datatypes)?; - let schemas: Result, ParseError> = datatypes - .iter() - .map(|datatype| { - datatype - .table_schema( - &u256_types, - &binary_column_format, - &args.include_columns, - &args.exclude_columns, - &args.columns, - sort[datatype].clone(), - args.event_signature - .as_ref() - .map(|sig| TableMeta { log_decoder: LogDecoder::new(sig.clone()) }), - ) - .map(|schema| (*datatype, schema)) - .map_err(|_e| { - ParseError::ParseError(format!( - "Failed to get schema for datatype: {:?}", - datatype - )) - }) - }) - .collect(); - - // make sure all included columns ended up in at least one schema - if let (Ok(schemas), Some(include_columns)) = (&schemas, &args.include_columns) { - let mut unknown_columns = Vec::new(); - for column in include_columns.iter() { - let mut in_a_schema = false; - - for schema in schemas.values() { - if schema.has_column(column) { - in_a_schema = true; - break +fn parse_call_datas( + call_datas: &Option>, + function: &Option>, + inputs: &Option>, +) -> Result>, ParseError> { + let call_datas = match (call_datas, function, inputs) { + (None, None, None) => return Ok(None), + (Some(call_data), None, None) => hex_strings_to_binary(call_data)?, + (None, Some(function), None) => hex_strings_to_binary(function)?, + (None, Some(function), Some(inputs)) => { + let mut call_datas = Vec::new(); + for f in function.iter() { + for i in inputs.iter() { + let mut call_data = hex_string_to_binary(f)?.clone(); + call_data.extend(hex_string_to_binary(i)?); + call_datas.push(call_data); } } - - if !in_a_schema { - unknown_columns.push(column); - } + call_datas } - if !unknown_columns.is_empty() { - return Err(ParseError::ParseError(format!( - "datatypes do not support these columns: {:?}", - unknown_columns - ))) + (None, None, Some(_)) => { + let message = "must specify function if specifying inputs"; + return Err(ParseError::ParseError(message.to_string())) } - }; - - // make sure all excluded columns are excluded from at least one schema - if let (Ok(schemas), Some(exclude_columns)) = (&schemas, &args.exclude_columns) { - let mut unknown_columns = Vec::new(); - for column in exclude_columns.iter() { - let mut in_a_schema = false; - - for datatype in schemas.keys() { - if datatype.dataset().column_types().contains_key(&column.as_str()) { - in_a_schema = true; - break - } - } - - if !in_a_schema { - unknown_columns.push(column); - } + (Some(_), Some(_), None) => { + let message = "cannot specify both call_data and function"; + return Err(ParseError::ParseError(message.to_string())) + } + (Some(_), None, Some(_)) => { + let message = "cannot specify both call_data and inputs"; + return Err(ParseError::ParseError(message.to_string())) } - if !unknown_columns.is_empty() { - return Err(ParseError::ParseError(format!( - "datatypes do not support these columns: {:?}", - unknown_columns - ))) + (Some(_), Some(_), Some(_)) => { + let message = "cannot specify both call_data and function"; + return Err(ParseError::ParseError(message.to_string())) } }; + Ok(Some(vec![CallDataChunk::Values(call_datas)])) +} - schemas +pub(crate) fn parse_address_chunks( + address: &Option>, + default_column: &str, +) -> Result>, ParseError> { + if let Some(address) = address { + let chunks = parse_binary_arg(address, default_column)? + .values() + .map(|a| AddressChunk::Values(a.clone())) + .collect(); + Ok(Some(chunks)) + } else { + Ok(None) + } } -fn parse_sort( - raw_sort: &Option>, - datatypes: &Vec, -) -> Result>>, ParseError> { - match raw_sort { - None => Ok(HashMap::from_iter( - datatypes.iter().map(|datatype| (*datatype, Some(datatype.dataset().default_sort()))), - )), - Some(raw_sort) => { - if (raw_sort.len() == 1) && (raw_sort[0] == "none") { - Ok(HashMap::from_iter(datatypes.iter().map(|datatype| (*datatype, None)))) - } else if raw_sort.is_empty() { - Err(ParseError::ParseError( - "must specify columns to sort by, use `none` to disable sorting".to_string(), - )) - } else if datatypes.len() > 1 { - Err(ParseError::ParseError( - "custom sort not supported for multiple datasets".to_string(), - )) - } else { - match datatypes.iter().next() { - Some(datatype) => Ok(HashMap::from_iter([(*datatype, Some(raw_sort.clone()))])), - None => Err(ParseError::ParseError("schemas map is empty".to_string())), - } - } - } +pub(crate) fn parse_slot_chunks( + slots: &Option>, + default_column: &str, +) -> Result>, ParseError> { + if let Some(values) = slots { + let chunks = parse_binary_arg(values, default_column)? + .values() + .map(|a| SlotChunk::Values(a.clone())) + .collect(); + Ok(Some(chunks)) + } else { + Ok(None) } } diff --git a/crates/cli/src/parse/schemas.rs b/crates/cli/src/parse/schemas.rs new file mode 100644 index 00000000..796336f5 --- /dev/null +++ b/crates/cli/src/parse/schemas.rs @@ -0,0 +1,204 @@ +use std::collections::{HashMap, HashSet}; + +use cryo_freeze::{ + schemas::TableMeta, ColumnEncoding, Datatype, FileFormat, LogDecoder, ParseError, Table, +}; + +use super::file_output; +use crate::args::Args; +use cryo_freeze::U256Type; + +fn parse_datatypes(raw_inputs: &Vec) -> Result, ParseError> { + let mut datatypes = Vec::new(); + + for raw_input in raw_inputs { + match raw_input.as_str() { + "state_diffs" => { + datatypes.push(Datatype::BalanceDiffs); + datatypes.push(Datatype::CodeDiffs); + datatypes.push(Datatype::NonceDiffs); + datatypes.push(Datatype::StorageDiffs); + } + datatype => { + let datatype = match datatype { + "balance_diffs" => Datatype::BalanceDiffs, + "balances" => Datatype::Balances, + "blocks" => Datatype::Blocks, + "codes" => Datatype::Codes, + "code_diffs" => Datatype::CodeDiffs, + "contracts" => Datatype::Contracts, + "erc20_balances" => Datatype::Erc20Balances, + "erc20_metadata" => Datatype::Erc20Metadata, + "erc20_supplies" => Datatype::Erc20Supplies, + "erc20_transfers" => Datatype::Erc20Transfers, + "erc721_metadata" => Datatype::Erc721Metadata, + "erc721_transfers" => Datatype::Erc721Transfers, + "eth_calls" => Datatype::EthCalls, + "events" => Datatype::Logs, + "logs" => Datatype::Logs, + "native_transfers" => Datatype::NativeTransfers, + "nonce_diffs" => Datatype::NonceDiffs, + "nonces" => Datatype::Nonces, + "opcode_traces" => Datatype::VmTraces, + "storage_diffs" => Datatype::StorageDiffs, + "storages" => Datatype::Storages, + "trace_calls" => Datatype::TraceCalls, + "traces" => Datatype::Traces, + "transactions" => Datatype::Transactions, + "transaction_addresses" => Datatype::TransactionAddresses, + "txs" => Datatype::Transactions, + "vm_traces" => Datatype::VmTraces, + _ => { + let name = format!("invalid datatype {}", datatype); + return Err(ParseError::ParseError(name)) + } + }; + datatypes.push(datatype) + } + } + } + Ok(datatypes) +} + +pub(crate) fn parse_schemas(args: &Args) -> Result, ParseError> { + let datatypes = parse_datatypes(&args.datatype)?; + let output_format = file_output::parse_output_format(args)?; + + let u256_types = if let Some(raw_u256_types) = &args.u256_types { + let mut u256_types: HashSet = HashSet::new(); + for raw in raw_u256_types.iter() { + let u256_type = match raw.to_lowercase() { + raw if raw == "binary" => U256Type::Binary, + raw if raw == "string" => U256Type::String, + raw if raw == "str" => U256Type::String, + raw if raw == "f32" => U256Type::F32, + raw if raw == "float32" => U256Type::F32, + raw if raw == "f64" => U256Type::F64, + raw if raw == "float64" => U256Type::F64, + raw if raw == "float" => U256Type::F64, + raw if raw == "u32" => U256Type::U32, + raw if raw == "uint32" => U256Type::U32, + raw if raw == "u64" => U256Type::U64, + raw if raw == "uint64" => U256Type::U64, + raw if raw == "decimal128" => U256Type::Decimal128, + raw if raw == "d128" => U256Type::Decimal128, + _ => return Err(ParseError::ParseError("bad u256 type".to_string())), + }; + u256_types.insert(u256_type); + } + u256_types + } else { + HashSet::from_iter(vec![U256Type::Binary, U256Type::String, U256Type::F64]) + }; + let binary_column_format = match args.hex | (output_format != FileFormat::Parquet) { + true => ColumnEncoding::Hex, + false => ColumnEncoding::Binary, + }; + + let sort = parse_sort(&args.sort, &datatypes)?; + let schemas: Result, ParseError> = datatypes + .iter() + .map(|datatype| { + datatype + .table_schema( + &u256_types, + &binary_column_format, + &args.include_columns, + &args.exclude_columns, + &args.columns, + sort[datatype].clone(), + args.event_signature + .as_ref() + .map(|sig| TableMeta { log_decoder: LogDecoder::new(sig.clone()) }), + ) + .map(|schema| (*datatype, schema)) + .map_err(|e| { + ParseError::ParseError(format!( + "Failed to get schema for datatype: {:?}, {:?}", + datatype, e + )) + }) + }) + .collect(); + + // make sure all included columns ended up in at least one schema + if let (Ok(schemas), Some(include_columns)) = (&schemas, &args.include_columns) { + let mut unknown_columns = Vec::new(); + for column in include_columns.iter() { + let mut in_a_schema = false; + + for schema in schemas.values() { + if schema.has_column(column) { + in_a_schema = true; + break + } + } + + if !in_a_schema && column != "all" { + unknown_columns.push(column); + } + } + if !unknown_columns.is_empty() { + return Err(ParseError::ParseError(format!( + "datatypes do not support these columns: {:?}", + unknown_columns + ))) + } + }; + + // make sure all excluded columns are excluded from at least one schema + if let (Ok(schemas), Some(exclude_columns)) = (&schemas, &args.exclude_columns) { + let mut unknown_columns = Vec::new(); + for column in exclude_columns.iter() { + let mut in_a_schema = false; + + for datatype in schemas.keys() { + if datatype.dataset().column_types().contains_key(&column.as_str()) { + in_a_schema = true; + break + } + } + + if !in_a_schema { + unknown_columns.push(column); + } + } + if !unknown_columns.is_empty() { + return Err(ParseError::ParseError(format!( + "datatypes do not support these columns: {:?}", + unknown_columns + ))) + } + }; + + schemas +} + +fn parse_sort( + raw_sort: &Option>, + datatypes: &Vec, +) -> Result>>, ParseError> { + match raw_sort { + None => Ok(HashMap::from_iter( + datatypes.iter().map(|datatype| (*datatype, Some(datatype.dataset().default_sort()))), + )), + Some(raw_sort) => { + if (raw_sort.len() == 1) && (raw_sort[0] == "none") { + Ok(HashMap::from_iter(datatypes.iter().map(|datatype| (*datatype, None)))) + } else if raw_sort.is_empty() { + Err(ParseError::ParseError( + "must specify columns to sort by, use `none` to disable sorting".to_string(), + )) + } else if datatypes.len() > 1 { + Err(ParseError::ParseError( + "custom sort not supported for multiple datasets".to_string(), + )) + } else { + match datatypes.iter().next() { + Some(datatype) => Ok(HashMap::from_iter([(*datatype, Some(raw_sort.clone()))])), + None => Err(ParseError::ParseError("schemas map is empty".to_string())), + } + } + } + } +} diff --git a/crates/cli/src/parse/transactions.rs b/crates/cli/src/parse/transactions.rs index 432bc2a1..17f6a6fa 100644 --- a/crates/cli/src/parse/transactions.rs +++ b/crates/cli/src/parse/transactions.rs @@ -1,5 +1,4 @@ use cryo_freeze::{Chunk, ParseError, TransactionChunk}; -use polars::prelude::*; pub(crate) fn parse_transactions( txs: &[String], @@ -17,7 +16,7 @@ pub(crate) fn parse_transactions( } else { "transaction_hash" }; - let tx_hashes = read_binary_column(path, column) + let tx_hashes = cryo_freeze::read_binary_column(path, column) .map_err(|_e| ParseError::ParseError("could not read input".to_string()))?; let chunk = TransactionChunk::Values(tx_hashes); let chunk_label = path @@ -44,31 +43,3 @@ pub(crate) fn parse_transactions( file_chunks.extend(hash_chunks); Ok(file_chunks) } - -fn read_binary_column(path: &str, column: &str) -> Result>, ParseError> { - let file = std::fs::File::open(path) - .map_err(|_e| ParseError::ParseError("could not open file path".to_string()))?; - - let df = ParquetReader::new(file) - .with_columns(Some(vec![column.to_string()])) - .finish() - .map_err(|_e| ParseError::ParseError("could not read data from column".to_string()))?; - - let series = df - .column(column) - .map_err(|_e| ParseError::ParseError("could not get column".to_string()))? - .unique() - .map_err(|_e| ParseError::ParseError("could not get column".to_string()))?; - - let ca = series - .binary() - .map_err(|_e| ParseError::ParseError("could not convert to binary column".to_string()))?; - - ca.into_iter() - .map(|value| { - value - .ok_or_else(|| ParseError::ParseError("transaction hash missing".to_string())) - .map(|data| data.into()) - }) - .collect() -} diff --git a/crates/freeze/Cargo.toml b/crates/freeze/Cargo.toml index 4a6c51fa..166e2cd9 100644 --- a/crates/freeze/Cargo.toml +++ b/crates/freeze/Cargo.toml @@ -17,6 +17,7 @@ futures = { workspace = true } governor = { workspace = true } indexmap = { workspace = true } indicatif = { workspace = true } +lazy_static = { workspace = true } polars = { workspace = true } prefix-hex = { workspace = true } serde = { workspace = true } diff --git a/crates/freeze/src/datasets/balances.rs b/crates/freeze/src/datasets/balances.rs new file mode 100644 index 00000000..abf0d37d --- /dev/null +++ b/crates/freeze/src/datasets/balances.rs @@ -0,0 +1,172 @@ +// required args:: address + +use crate::{types::Balances, ColumnType, Dataset, Datatype}; +use std::collections::HashMap; + +use std::sync::Arc; + +use ethers::prelude::*; +use polars::prelude::*; +use tokio::{sync::mpsc, task}; + +use crate::{ + dataframes::SortableDataFrame, + types::{ + conversions::{ToVecHex, ToVecU8}, + AddressChunk, BlockChunk, CollectError, RowFilter, Source, Table, + }, + with_series, with_series_binary, with_series_u256, U256Type, +}; + +#[async_trait::async_trait] +impl Dataset for Balances { + fn datatype(&self) -> Datatype { + Datatype::Balances + } + + fn name(&self) -> &'static str { + "balances" + } + + fn column_types(&self) -> HashMap<&'static str, ColumnType> { + HashMap::from_iter(vec![ + ("block_number", ColumnType::UInt32), + ("address", ColumnType::Binary), + ("balance", ColumnType::UInt256), + ("chain_id", ColumnType::UInt64), + ]) + } + + fn default_columns(&self) -> Vec<&'static str> { + vec!["block_number", "address", "balance", "chain_id"] + } + + fn default_sort(&self) -> Vec { + vec!["block_number".to_string(), "address".to_string()] + } + + async fn collect_block_chunk( + &self, + chunk: &BlockChunk, + source: &Source, + schema: &Table, + filter: Option<&RowFilter>, + ) -> Result { + let address_chunks = match filter { + Some(filter) => match &filter.address_chunks { + Some(address_chunks) => address_chunks.clone(), + _ => return Err(CollectError::CollectError("must specify addresses".to_string())), + }, + _ => return Err(CollectError::CollectError("must specify addresses".to_string())), + }; + let rx = fetch_balances(vec![chunk], address_chunks, source).await; + balances_to_df(rx, schema, source.chain_id).await + } +} + +pub(crate) type BlockAddressBalance = (u64, Vec, U256); + +async fn fetch_balances( + block_chunks: Vec<&BlockChunk>, + address_chunks: Vec, + source: &Source, +) -> mpsc::Receiver> { + let (tx, rx) = mpsc::channel(100); + + for block_chunk in block_chunks { + for number in block_chunk.numbers() { + for address_chunk in &address_chunks { + for address in address_chunk.values().iter() { + let address = address.clone(); + let address_h160 = H160::from_slice(&address); + let tx = tx.clone(); + let provider = Arc::clone(&source.provider); + let semaphore = source.semaphore.clone(); + let rate_limiter = source.rate_limiter.as_ref().map(Arc::clone); + task::spawn(async move { + let _permit = match semaphore { + Some(semaphore) => Some(Arc::clone(&semaphore).acquire_owned().await), + _ => None, + }; + if let Some(limiter) = rate_limiter { + Arc::clone(&limiter).until_ready().await; + } + let balance = provider.get_balance(address_h160, Some(number.into())).await; + let result = match balance { + Ok(value) => Ok((number, address, value)), + Err(e) => Err(CollectError::ProviderError(e)), + }; + match tx.send(result).await { + Ok(_) => {} + Err(tokio::sync::mpsc::error::SendError(_e)) => { + eprintln!("send error, try using a rate limit with --requests-per-second or limiting max concurrency with --max-concurrent-requests"); + std::process::exit(1) + } + } + }); + } + } + } + } + + rx +} + +async fn balances_to_df( + mut stream: mpsc::Receiver>, + schema: &Table, + chain_id: u64, +) -> Result { + // initialize + let mut columns = BalanceColumns::default(); + + // parse stream of blocks + while let Some(message) = stream.recv().await { + match message { + Ok(block_address_balance) => { + columns.process_balance(block_address_balance, schema); + } + Err(e) => { + println!("{:?}", e); + return Err(CollectError::TooManyRequestsError) + } + } + } + + // convert to dataframes + columns.create_df(schema, chain_id) +} + +#[derive(Default)] +struct BalanceColumns { + n_rows: usize, + block_number: Vec, + address: Vec>, + balance: Vec, +} + +impl BalanceColumns { + fn process_balance(&mut self, block_address_balance: BlockAddressBalance, schema: &Table) { + let (block, address, balance) = block_address_balance; + self.n_rows += 1; + if schema.has_column("block_number") { + self.block_number.push(block as u32); + } + if schema.has_column("address") { + self.address.push(address); + } + if schema.has_column("balance") { + self.balance.push(balance); + } + } + + fn create_df(self, schema: &Table, chain_id: u64) -> Result { + let mut cols = Vec::with_capacity(schema.columns().len()); + with_series!(cols, "block_number", self.block_number, schema); + with_series_binary!(cols, "address", self.address, schema); + with_series_u256!(cols, "balance", self.balance, schema); + with_series!(cols, "chain_id", vec![chain_id; self.n_rows], schema); + + DataFrame::new(cols).map_err(CollectError::PolarsError).sort_by_schema(schema) + } +} diff --git a/crates/freeze/src/datasets/codes.rs b/crates/freeze/src/datasets/codes.rs new file mode 100644 index 00000000..2b81c9c2 --- /dev/null +++ b/crates/freeze/src/datasets/codes.rs @@ -0,0 +1,171 @@ +// required args:: address + +use crate::{types::Codes, ColumnType, Dataset, Datatype}; +use std::collections::HashMap; + +use std::sync::Arc; + +use ethers::prelude::*; +use polars::prelude::*; +use tokio::{sync::mpsc, task}; + +use crate::{ + dataframes::SortableDataFrame, + types::{ + conversions::ToVecHex, AddressChunk, BlockChunk, CollectError, RowFilter, Source, Table, + }, + with_series, with_series_binary, +}; + +#[async_trait::async_trait] +impl Dataset for Codes { + fn datatype(&self) -> Datatype { + Datatype::Codes + } + + fn name(&self) -> &'static str { + "codes" + } + + fn column_types(&self) -> HashMap<&'static str, ColumnType> { + HashMap::from_iter(vec![ + ("block_number", ColumnType::UInt32), + ("address", ColumnType::Binary), + ("code", ColumnType::Binary), + ("chain_id", ColumnType::UInt64), + ]) + } + + fn default_columns(&self) -> Vec<&'static str> { + vec!["block_number", "address", "code"] + } + + fn default_sort(&self) -> Vec { + vec!["block_number".to_string(), "address".to_string()] + } + + async fn collect_block_chunk( + &self, + chunk: &BlockChunk, + source: &Source, + schema: &Table, + filter: Option<&RowFilter>, + ) -> Result { + let address_chunks = match filter { + Some(filter) => match &filter.address_chunks { + Some(address_chunks) => address_chunks.clone(), + _ => return Err(CollectError::CollectError("must specify addresses".to_string())), + }, + _ => return Err(CollectError::CollectError("must specify addresses".to_string())), + }; + let rx = fetch_codes(vec![chunk], address_chunks, source).await; + codes_to_df(rx, schema, source.chain_id).await + } +} + +pub(crate) type BlockAddressCode = (u64, Vec, Vec); + +async fn fetch_codes( + block_chunks: Vec<&BlockChunk>, + address_chunks: Vec, + source: &Source, +) -> mpsc::Receiver> { + let (tx, rx) = mpsc::channel(100); + + for block_chunk in block_chunks { + for number in block_chunk.numbers() { + for address_chunk in &address_chunks { + for address in address_chunk.values().iter() { + let address = address.clone(); + let address_h160 = H160::from_slice(&address); + let tx = tx.clone(); + let provider = Arc::clone(&source.provider); + let semaphore = source.semaphore.clone(); + let rate_limiter = source.rate_limiter.as_ref().map(Arc::clone); + task::spawn(async move { + let _permit = match semaphore { + Some(semaphore) => Some(Arc::clone(&semaphore).acquire_owned().await), + _ => None, + }; + if let Some(limiter) = rate_limiter { + Arc::clone(&limiter).until_ready().await; + } + let result = provider.get_code(address_h160, Some(number.into())).await; + let result = match result { + Ok(value) => Ok((number, address, value.to_vec())), + Err(e) => Err(CollectError::ProviderError(e)), + }; + match tx.send(result).await { + Ok(_) => {} + Err(tokio::sync::mpsc::error::SendError(_e)) => { + eprintln!("send error, try using a rate limit with --requests-per-second or limiting max concurrency with --max-concurrent-requests"); + std::process::exit(1) + } + } + }); + } + } + } + } + + rx +} + +async fn codes_to_df( + mut stream: mpsc::Receiver>, + schema: &Table, + chain_id: u64, +) -> Result { + // initialize + let mut columns = CodeColumns::default(); + + // parse stream of blocks + while let Some(message) = stream.recv().await { + match message { + Ok(block_address_code) => { + columns.process_code(block_address_code, schema); + } + Err(e) => { + println!("{:?}", e); + return Err(CollectError::TooManyRequestsError) + } + } + } + + // convert to dataframes + columns.create_df(schema, chain_id) +} + +#[derive(Default)] +struct CodeColumns { + n_rows: usize, + block_number: Vec, + address: Vec>, + code: Vec>, +} + +impl CodeColumns { + fn process_code(&mut self, block_address_code: BlockAddressCode, schema: &Table) { + let (block, address, code) = block_address_code; + self.n_rows += 1; + if schema.has_column("block_number") { + self.block_number.push(block as u32); + } + if schema.has_column("address") { + self.address.push(address); + } + if schema.has_column("code") { + self.code.push(code); + } + } + + fn create_df(self, schema: &Table, chain_id: u64) -> Result { + let mut cols = Vec::with_capacity(schema.columns().len()); + with_series!(cols, "block_number", self.block_number, schema); + with_series_binary!(cols, "address", self.address, schema); + with_series_binary!(cols, "code", self.code, schema); + with_series!(cols, "chain_id", vec![chain_id; self.n_rows], schema); + + DataFrame::new(cols).map_err(CollectError::PolarsError).sort_by_schema(schema) + } +} diff --git a/crates/freeze/src/datasets/erc20_balances.rs b/crates/freeze/src/datasets/erc20_balances.rs new file mode 100644 index 00000000..3e637c70 --- /dev/null +++ b/crates/freeze/src/datasets/erc20_balances.rs @@ -0,0 +1,160 @@ +// reqired args:: address, contract + +// single erc20 / many erc20s +// single block / many blocks +// single address / many addresses + +use crate::{types::Erc20Balances, ColumnType, Dataset, Datatype}; +use std::collections::HashMap; + +use ethers::prelude::*; +use polars::prelude::*; +use tokio::sync::mpsc; + +use crate::{ + dataframes::SortableDataFrame, + types::{ + conversions::{ToVecHex, ToVecU8}, + AddressChunk, BlockChunk, CallDataChunk, CollectError, RowFilter, Source, Table, + }, + with_series, with_series_binary, with_series_u256, +}; + +use super::eth_calls; +use crate::U256Type; + +#[async_trait::async_trait] +impl Dataset for Erc20Balances { + fn datatype(&self) -> Datatype { + Datatype::Erc20Balances + } + + fn name(&self) -> &'static str { + "erc20_balances" + } + + fn column_types(&self) -> HashMap<&'static str, ColumnType> { + HashMap::from_iter(vec![ + ("block_number", ColumnType::UInt32), + ("erc20", ColumnType::Binary), + ("address", ColumnType::Binary), + ("balance", ColumnType::UInt256), + ("chain_id", ColumnType::UInt64), + ]) + } + + fn default_columns(&self) -> Vec<&'static str> { + vec!["block_number", "erc20", "address", "balance", "chain_id"] + } + + fn default_sort(&self) -> Vec { + vec!["block_number".to_string()] + } + + async fn collect_block_chunk( + &self, + chunk: &BlockChunk, + source: &Source, + schema: &Table, + filter: Option<&RowFilter>, + ) -> Result { + let (contract_chunks, call_data_chunks) = match filter { + Some(filter) => { + (filter.contract_chunks()?, create_balance_of_call_datas(filter.address_chunks()?)?) + } + None => return Err(CollectError::CollectError("must specify RowFilter".to_string())), + }; + + let rx = eth_calls::fetch_eth_calls(vec![chunk], contract_chunks, call_data_chunks, source) + .await; + balance_calls_to_df(rx, schema, source.chain_id).await + } +} + +fn create_balance_of_call_datas( + address_chunks: Vec, +) -> Result, CollectError> { + let signature: Vec = prefix_hex::decode("0x70a08231").expect("Decoding failed"); + let mut call_data_chunks: Vec = Vec::new(); + for address_chunk in address_chunks.iter() { + match address_chunk { + AddressChunk::Values(addresses) => { + let call_datas: Vec> = addresses + .iter() + .map(|a| { + let mut call_data = signature.clone(); + call_data.extend(a); + call_data + }) + .collect(); + call_data_chunks.push(CallDataChunk::Values(call_datas)) + } + _ => return Err(CollectError::CollectError("bad AddressChunk".to_string())), + } + } + Ok(call_data_chunks) +} + +async fn balance_calls_to_df( + mut stream: mpsc::Receiver>, + schema: &Table, + chain_id: u64, +) -> Result { + // initialize + let mut columns = Erc20BalanceColumns::default(); + + // parse stream of blocks + while let Some(message) = stream.recv().await { + match message { + Ok(call_data_output) => { + columns.process_calls(call_data_output, schema); + } + Err(e) => { + println!("{:?}", e); + return Err(CollectError::TooManyRequestsError); + } + } + } + + // convert to dataframes + columns.create_df(schema, chain_id) +} + +#[derive(Default)] +struct Erc20BalanceColumns { + n_rows: usize, + block_number: Vec, + erc20: Vec>, + address: Vec>, + balance: Vec, +} + +impl Erc20BalanceColumns { + fn process_calls(&mut self, call_data_output: eth_calls::CallDataOutput, schema: &Table) { + let (block_number, contract_address, call_data, output_data) = call_data_output; + self.n_rows += 1; + if schema.has_column("block_number") { + self.block_number.push(block_number as u32); + } + if schema.has_column("erc20") { + self.erc20.push(contract_address); + } + if schema.has_column("address") { + self.address.push(call_data); + } + if schema.has_column("balance") { + self.balance.push(output_data.to_vec().as_slice().into()); + } + } + + fn create_df(self, schema: &Table, chain_id: u64) -> Result { + let mut cols = Vec::with_capacity(schema.columns().len()); + with_series!(cols, "block_number", self.block_number, schema); + with_series_binary!(cols, "erc20", self.erc20, schema); + with_series_binary!(cols, "address", self.address, schema); + with_series_u256!(cols, "balance", self.balance, schema); + with_series!(cols, "chain_id", vec![chain_id; self.n_rows], schema); + + DataFrame::new(cols).map_err(CollectError::PolarsError).sort_by_schema(schema) + } +} diff --git a/crates/freeze/src/datasets/erc20_metadata.rs b/crates/freeze/src/datasets/erc20_metadata.rs new file mode 100644 index 00000000..f4150802 --- /dev/null +++ b/crates/freeze/src/datasets/erc20_metadata.rs @@ -0,0 +1,224 @@ +// fix fetcher +// implement --dedup +use crate::types::Erc20Supplies; +use crate::{conversions::ToVecHex, conversions::ToVecU8, ColumnType, Dataset, Datatype}; +use std::collections::HashMap; +use tokio::{sync::mpsc, task}; + +use ethers::prelude::*; +use polars::prelude::*; + +use super::eth_calls; +use crate::types::Erc20Metadata; +use crate::{ + dataframes::SortableDataFrame, + types::{AddressChunk, BlockChunk, CollectError, RowFilter, Source, Table}, + with_series, with_series_binary, with_series_u256, CallDataChunk, U256Type, +}; + +#[async_trait::async_trait] +impl Dataset for Erc20Metadata { + fn datatype(&self) -> Datatype { + Datatype::Erc20Metadata + } + + fn name(&self) -> &'static str { + "erc20_metadata" + } + + fn column_types(&self) -> HashMap<&'static str, ColumnType> { + HashMap::from_iter(vec![ + ("block_number", ColumnType::UInt32), + ("erc20", ColumnType::Binary), + ("name", ColumnType::String), + ("symbol", ColumnType::String), + ("decimals", ColumnType::UInt32), + ("chain_id", ColumnType::UInt64), + ]) + } + + fn default_columns(&self) -> Vec<&'static str> { + vec!["block_number", "erc20", "name", "symbol", "decimals", "chain_id"] + } + + fn default_sort(&self) -> Vec { + vec!["symbol".to_string(), "block_number".to_string()] + } + + async fn collect_block_chunk( + &self, + chunk: &BlockChunk, + source: &Source, + schema: &Table, + filter: Option<&RowFilter>, + ) -> Result { + let contract_chunks = match filter { + Some(filter) => filter.contract_chunks()?, + _ => return Err(CollectError::CollectError("must specify RowFilter".to_string())), + }; + + // build call data + let call_data = prefix_hex::decode("0x18160ddd").expect("Decoding failed"); + let call_data_chunks = vec![CallDataChunk::Values(vec![call_data])]; + + let rx = fetch_metadata_calls(vec![chunk], contract_chunks, call_data_chunks, source).await; + metadata_calls_to_df(rx, schema, source.chain_id).await + } +} + +type MetadataOutput = (u32, Vec, (Option, Option, Option)); + +pub(crate) async fn fetch_metadata_calls( + block_chunks: Vec<&BlockChunk>, + address_chunks: Vec, + source: &Source, +) -> mpsc::Receiver> { + let (tx, rx) = mpsc::channel(100); + + for block_chunk in block_chunks { + for number in block_chunk.numbers() { + for address_chunk in &address_chunks { + for address in address_chunk.values().iter() { + let address = address.clone(); + let address_h160 = H160::from_slice(&address); + + let tx = tx.clone(); + let provider = Arc::clone(&source.provider); + let semaphore = source.semaphore.clone(); + let rate_limiter = source.rate_limiter.as_ref().map(Arc::clone); + task::spawn(async move { + // name + let _permit = match semaphore { + Some(semaphore) => { + Some(Arc::clone(&semaphore).acquire_owned().await) + } + _ => None, + }; + if let Some(limiter) = rate_limiter { + Arc::clone(&limiter).until_ready().await; + } + let transaction = TransactionRequest { + to: Some(address_h160.into()), + data: Some(call_data.clone().into()), + ..Default::default() + }; + let name_result = + provider.call(&transaction.into(), Some(number.into())).await; + + let result = match result { + Ok(value) => Ok((number, address, call_data, value)), + Err(e) => Err(CollectError::ProviderError(e)), + }; + match tx.send(result).await { + Ok(_) => {} + Err(tokio::sync::mpsc::error::SendError(_e)) => { + eprintln!("send error, try using a rate limit with --requests-per-second or limiting max concurrency with --max-concurrent-requests"); + std::process::exit(1) + } + } + }); + } + } + } + } + + rx +} + +async fn contract_call( + number: u32, + semaphore: Arc>, + rate_limiter: Arc, +) -> Result { + let _permit = match semaphore { + Some(semaphore) => Some(Arc::clone(&semaphore).acquire_owned().await), + _ => None, + }; + if let Some(limiter) = rate_limiter { + Arc::clone(&limiter).until_ready().await; + } + + let transaction = TransactionRequest { + to: Some(address_h160.into()), + data: Some(call_data.clone().into()), + ..Default::default() + }; + + provider.call(&transaction.into(), Some(number.into())).await +} + +async fn metadata_calls_to_df( + mut stream: mpsc::Receiver>, + schema: &Table, + chain_id: u64, +) -> Result { + // initialize + let mut columns = Erc20MetadataColumns::default(); + + // parse stream of blocks + while let Some(message) = stream.recv().await { + match message { + Ok(call_data_output) => { + columns.process_calls(call_data_output, schema); + } + Err(e) => { + println!("{:?}", e); + return Err(CollectError::TooManyRequestsError); + } + } + } + + // convert to dataframes + columns.create_df(schema, chain_id) +} + +#[derive(Default)] +struct Erc20MetadataColumns { + n_rows: usize, + block_number: Vec, + erc20: Vec>, + name: Vec>, + symbol: Vec>, + decimals: Vec>, +} + +impl Erc20MetadataColumns { + fn process_calls(&mut self, call_data_output: MetadataOutput, schema: &Table) { + let (block_number, contract_address, output_data) = call_data_output; + let (name, symbol, decimals) = output_data; + self.n_rows += 1; + if schema.has_column("block_number") { + self.block_number.push(block_number as u32); + } + if schema.has_column("erc20") { + self.erc20.push(contract_address); + } + if schema.has_column("name") { + self.name.push(String::from_utf8(name.to_vec()).ok()); + } + if schema.has_column("symbol") { + self.symbol.push(String::from_utf8(symbol.to_vec()).ok()); + } + if schema.has_column("decimals") { + let v = decimals.to_vec(); + let decimals = if v.len() == 32 && v[0..28].iter().all(|b| *b == 0) { + Some(u32::from_be_bytes([v[28], v[29], v[30], v[31]])) + } else { + None + }; + self.decimals.push(decimals); + } + } + + fn create_df(self, schema: &Table, chain_id: u64) -> Result { + let mut cols = Vec::with_capacity(schema.columns().len()); + with_series!(cols, "block_number", self.block_number, schema); + with_series_binary!(cols, "erc20", self.erc20, schema); + with_series!(cols, "name", self.name, schema); + with_series!(cols, "symbol", self.symbol, schema); + with_series!(cols, "decimals", self.decimals, schema); + with_series!(cols, "chain_id", vec![chain_id; self.n_rows], schema); + + DataFrame::new(cols).map_err(CollectError::PolarsError).sort_by_schema(schema) + } +} diff --git a/crates/freeze/src/datasets/erc20_supplies.rs b/crates/freeze/src/datasets/erc20_supplies.rs new file mode 100644 index 00000000..047ce9e8 --- /dev/null +++ b/crates/freeze/src/datasets/erc20_supplies.rs @@ -0,0 +1,124 @@ +use crate::types::Erc20Supplies; +use crate::{conversions::ToVecHex, conversions::ToVecU8, ColumnType, Dataset, Datatype}; +use std::collections::HashMap; +use tokio::sync::mpsc; + +use ethers::prelude::*; +use polars::prelude::*; + +use crate::{ + CallDataChunk, + dataframes::SortableDataFrame, + types::{BlockChunk, CollectError, RowFilter, Source, Table}, + with_series, with_series_binary, with_series_u256, U256Type, +}; + +use super::eth_calls; + +#[async_trait::async_trait] +impl Dataset for Erc20Supplies { + fn datatype(&self) -> Datatype { + Datatype::Erc20Supplies + } + + fn name(&self) -> &'static str { + "erc20_supplies" + } + + fn column_types(&self) -> HashMap<&'static str, ColumnType> { + HashMap::from_iter(vec![ + ("block_number", ColumnType::UInt32), + ("erc20", ColumnType::Binary), + ("total_supply", ColumnType::UInt256), + ("chain_id", ColumnType::UInt64), + ]) + } + + fn default_columns(&self) -> Vec<&'static str> { + vec!["block_number", "erc20", "total_supply"] + } + + fn default_sort(&self) -> Vec { + vec!["erc20".to_string(), "block_number".to_string()] + } + + async fn collect_block_chunk( + &self, + chunk: &BlockChunk, + source: &Source, + schema: &Table, + filter: Option<&RowFilter>, + ) -> Result { + let contract_chunks = match filter { + Some(filter) => filter.contract_chunks()?, + _ => return Err(CollectError::CollectError("must specify RowFilter".to_string())), + }; + + // build call data + let call_data = prefix_hex::decode("0x18160ddd").expect("Decoding failed"); + let call_data_chunks = vec![CallDataChunk::Values(vec![call_data])]; + + let rx = + eth_calls::fetch_eth_calls(vec![chunk], contract_chunks, call_data_chunks, source).await; + supply_calls_to_df(rx, schema, source.chain_id).await + } +} + +async fn supply_calls_to_df( + mut stream: mpsc::Receiver>, + schema: &Table, + chain_id: u64, +) -> Result { + // initialize + let mut columns = SupplyColumns::default(); + + // parse stream of blocks + while let Some(message) = stream.recv().await { + match message { + Ok(call_data_output) => { + columns.process_calls(call_data_output, schema); + } + Err(e) => { + println!("{:?}", e); + return Err(CollectError::TooManyRequestsError); + } + } + } + + // convert to dataframes + columns.create_df(schema, chain_id) +} + +#[derive(Default)] +struct SupplyColumns { + n_rows: usize, + block_number: Vec, + erc20: Vec>, + total_supply: Vec, +} + +impl SupplyColumns { + fn process_calls(&mut self, call_data_output: eth_calls::CallDataOutput, schema: &Table) { + let (block_number, contract_address, _call_data, output_data) = call_data_output; + self.n_rows += 1; + if schema.has_column("block_number") { + self.block_number.push(block_number as u32); + } + if schema.has_column("erc20") { + self.erc20.push(contract_address); + } + if schema.has_column("total_supply") { + self.total_supply.push(output_data.to_vec().as_slice().into()); + } + } + + fn create_df(self, schema: &Table, chain_id: u64) -> Result { + let mut cols = Vec::with_capacity(schema.columns().len()); + with_series!(cols, "block_number", self.block_number, schema); + with_series_binary!(cols, "erc20", self.erc20, schema); + with_series_u256!(cols, "total_supply", self.total_supply, schema); + with_series!(cols, "chain_id", vec![chain_id; self.n_rows], schema); + + DataFrame::new(cols).map_err(CollectError::PolarsError).sort_by_schema(schema) + } +} diff --git a/crates/freeze/src/datasets/erc20_transfers.rs b/crates/freeze/src/datasets/erc20_transfers.rs new file mode 100644 index 00000000..3b00e352 --- /dev/null +++ b/crates/freeze/src/datasets/erc20_transfers.rs @@ -0,0 +1,204 @@ +// 0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef +// data field is a single 32 byte word + +// single erc20 / many erc20s +// single address / many addresses +// single block / many blocks + +// --contract(s) +// --address(es) + +use std::collections::HashMap; + +use ethers::prelude::*; +use polars::prelude::*; +use tokio::sync::mpsc; + +use crate::{ + dataframes::SortableDataFrame, + types::{ + conversions::{ToVecHex, ToVecU8}, + BlockChunk, CollectError, ColumnType, Dataset, Datatype, RowFilter, Source, Table, + TransactionChunk, + }, + with_series, with_series_binary, with_series_u256, +}; + +use super::logs; +use crate::{types::Erc20Transfers, U256Type}; + +#[async_trait::async_trait] +impl Dataset for Erc20Transfers { + fn datatype(&self) -> Datatype { + Datatype::Erc20Transfers + } + + fn name(&self) -> &'static str { + "erc20_transfers" + } + + fn column_types(&self) -> HashMap<&'static str, ColumnType> { + HashMap::from_iter(vec![ + ("block_number", ColumnType::UInt32), + ("transaction_index", ColumnType::UInt32), + ("log_index", ColumnType::UInt32), + ("transaction_hash", ColumnType::Binary), + ("erc20", ColumnType::Binary), + ("from_address", ColumnType::Binary), + ("to_address", ColumnType::Binary), + ("value", ColumnType::UInt256), + ("chain_id", ColumnType::UInt64), + ]) + } + + fn default_columns(&self) -> Vec<&'static str> { + vec![ + "block_number", + "transaction_index", + "log_index", + "transaction_hash", + "erc20", + "from_address", + "to_address", + "value", + "chain_id", + ] + } + + fn default_sort(&self) -> Vec { + vec!["block_number".to_string(), "log_index".to_string()] + } + + async fn collect_block_chunk( + &self, + chunk: &BlockChunk, + source: &Source, + schema: &Table, + filter: Option<&RowFilter>, + ) -> Result { + let filter = get_row_filter(filter); + let rx = logs::fetch_block_logs(chunk, source, Some(&filter)).await; + logs_to_erc20_transfers(rx, schema, source.chain_id).await + } + + async fn collect_transaction_chunk( + &self, + chunk: &TransactionChunk, + source: &Source, + schema: &Table, + filter: Option<&RowFilter>, + ) -> Result { + let filter = get_row_filter(filter); + let rx = logs::fetch_transaction_logs(chunk, source, Some(&filter)).await; + logs_to_erc20_transfers(rx, schema, source.chain_id).await + } +} + +pub(crate) fn get_row_filter(filter: Option<&RowFilter>) -> RowFilter { + let event_hash: H256 = H256( + prefix_hex::decode("0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef") + .expect("Decoding failed"), + ); + let transfer_topics: [Option>>; 4] = + [Some(ValueOrArray::Value(Some(event_hash))), None, None, None]; + match filter { + None => RowFilter { topics: transfer_topics, ..Default::default() }, + Some(filter) => RowFilter { topics: transfer_topics, ..filter.clone() }, + } +} + +#[derive(Default)] +pub(crate) struct Erc20TransferLogColumns { + n_rows: usize, + block_number: Vec, + transaction_index: Vec, + log_index: Vec, + transaction_hash: Vec>, + erc20: Vec>, + from_address: Vec>, + to_address: Vec>, + value: Vec, +} + +impl Erc20TransferLogColumns { + pub(crate) fn process_erc20_transfer_logs( + &mut self, + logs: Vec, + schema: &Table, + ) -> Result<(), CollectError> { + for log in &logs { + if let Some(true) = log.removed { + continue + } + if log.data.is_empty() { + continue + } + if let (Some(bn), Some(tx), Some(ti), Some(li)) = + (log.block_number, log.transaction_hash, log.transaction_index, log.log_index) + { + self.n_rows += 1; + if schema.has_column("block_number") { + self.block_number.push(bn.as_u32()); + }; + if schema.has_column("transaction_index") { + self.transaction_index.push(ti.as_u32()); + }; + if schema.has_column("log_index") { + self.log_index.push(li.as_u32()); + }; + if schema.has_column("transaction_hash") { + self.transaction_hash.push(tx.as_bytes().to_vec()); + }; + if schema.has_column("erc20") { + self.erc20.push(log.address.as_bytes().to_vec()); + }; + if schema.has_column("from_address") { + self.from_address.push(log.topics[1].as_bytes().to_vec()); + }; + if schema.has_column("to_address") { + self.to_address.push(log.topics[2].as_bytes().to_vec()); + }; + if schema.has_column("value") { + self.value.push(log.data.to_vec().as_slice().into()); + }; + } + } + + Ok(()) + } + + pub(crate) fn create_df( + self, + schema: &Table, + chain_id: u64, + ) -> Result { + let mut cols = Vec::with_capacity(schema.columns().len()); + with_series!(cols, "block_number", self.block_number, schema); + with_series!(cols, "transaction_index", self.transaction_index, schema); + with_series!(cols, "log_index", self.log_index, schema); + with_series_binary!(cols, "transaction_hash", self.transaction_hash, schema); + with_series_binary!(cols, "erc20", self.erc20, schema); + with_series_binary!(cols, "from_address", self.from_address, schema); + with_series_binary!(cols, "to_address", self.to_address, schema); + with_series_u256!(cols, "value", self.value, schema); + with_series!(cols, "chain_id", vec![chain_id; self.n_rows], schema); + + DataFrame::new(cols).map_err(CollectError::PolarsError).sort_by_schema(schema) + } +} + +async fn logs_to_erc20_transfers( + mut logs: mpsc::Receiver, CollectError>>, + schema: &Table, + chain_id: u64, +) -> Result { + let mut columns = Erc20TransferLogColumns::default(); + while let Some(message) = logs.recv().await { + if let Ok(logs) = message { + columns.process_erc20_transfer_logs(logs, schema)? + } else { + return Err(CollectError::TooManyRequestsError) + } + } + columns.create_df(schema, chain_id) +} diff --git a/crates/freeze/src/datasets/erc721_metadata.rs b/crates/freeze/src/datasets/erc721_metadata.rs new file mode 100644 index 00000000..59623c8a --- /dev/null +++ b/crates/freeze/src/datasets/erc721_metadata.rs @@ -0,0 +1,30 @@ +use crate::{types::Erc721Metadata, ColumnType, Dataset, Datatype}; +use std::collections::HashMap; + +impl Dataset for Erc721Metadata { + fn datatype(&self) -> Datatype { + Datatype::Erc721Metadata + } + + fn name(&self) -> &'static str { + "erc721_metadata" + } + + fn column_types(&self) -> HashMap<&'static str, ColumnType> { + HashMap::from_iter(vec![ + ("block_number", ColumnType::UInt32), + ("erc20", ColumnType::Binary), + ("name", ColumnType::String), + ("symbol", ColumnType::String), + ("chain_id", ColumnType::UInt64), + ]) + } + + fn default_columns(&self) -> Vec<&'static str> { + vec!["block_number", "erc20", "name", "symbol", "chain_id"] + } + + fn default_sort(&self) -> Vec { + vec!["symbol".to_string(), "block_number".to_string()] + } +} diff --git a/crates/freeze/src/datasets/erc721_transfers.rs b/crates/freeze/src/datasets/erc721_transfers.rs new file mode 100644 index 00000000..d67381cb --- /dev/null +++ b/crates/freeze/src/datasets/erc721_transfers.rs @@ -0,0 +1,194 @@ +// optional args: contract, from_address, to_address + +// 0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef +// blank data field + +// single erc20 / many erc20s +// single address / many addresses +// single block / many blocks + +// --contract(s) +// --address(es) + +use crate::{types::Erc721Transfers, ColumnType, Dataset, Datatype}; +use std::collections::HashMap; + +use ethers::prelude::*; +use polars::prelude::*; +use tokio::sync::mpsc; + +use super::erc20_transfers; +use crate::{ + dataframes::SortableDataFrame, + types::{ + conversions::{ToVecHex, ToVecU8}, + BlockChunk, CollectError, RowFilter, Source, Table, TransactionChunk, + }, + with_series, with_series_binary, with_series_u256, +}; + +use super::logs; +use crate::U256Type; + +#[async_trait::async_trait] +impl Dataset for Erc721Transfers { + fn datatype(&self) -> Datatype { + Datatype::Erc721Transfers + } + + fn name(&self) -> &'static str { + "erc721_transfers" + } + + fn column_types(&self) -> HashMap<&'static str, ColumnType> { + HashMap::from_iter(vec![ + ("block_number", ColumnType::UInt32), + ("transaction_index", ColumnType::UInt32), + ("log_index", ColumnType::UInt32), + ("transaction_hash", ColumnType::Binary), + ("erc721", ColumnType::Binary), + ("from_address", ColumnType::Binary), + ("to_address", ColumnType::Binary), + ("token_id", ColumnType::UInt256), + ("chain_id", ColumnType::UInt64), + ]) + } + + fn default_columns(&self) -> Vec<&'static str> { + vec![ + "block_number", + "transaction_index", + "log_index", + "transaction_hash", + "erc20", + "from_address", + "to_address", + "token_id", + "chain_id", + ] + } + + fn default_sort(&self) -> Vec { + vec!["block_number".to_string(), "log_index".to_string()] + } + + async fn collect_block_chunk( + &self, + chunk: &BlockChunk, + source: &Source, + schema: &Table, + filter: Option<&RowFilter>, + ) -> Result { + let filter = erc20_transfers::get_row_filter(filter); + let rx = logs::fetch_block_logs(chunk, source, Some(&filter)).await; + logs_to_erc721_transfers(rx, schema, source.chain_id).await + } + + async fn collect_transaction_chunk( + &self, + chunk: &TransactionChunk, + source: &Source, + schema: &Table, + filter: Option<&RowFilter>, + ) -> Result { + let filter = erc20_transfers::get_row_filter(filter); + let rx = logs::fetch_transaction_logs(chunk, source, Some(&filter)).await; + logs_to_erc721_transfers(rx, schema, source.chain_id).await + } +} + +#[derive(Default)] +pub(crate) struct Erc721TransferColumns { + n_rows: usize, + block_number: Vec, + transaction_index: Vec, + log_index: Vec, + transaction_hash: Vec>, + erc20: Vec>, + from_address: Vec>, + to_address: Vec>, + token_id: Vec, +} + +impl Erc721TransferColumns { + pub(crate) fn process_erc721_transfer_logs( + &mut self, + logs: Vec, + schema: &Table, + ) -> Result<(), CollectError> { + for log in &logs { + if let Some(true) = log.removed { + continue; + } + if !log.data.is_empty() { + continue; + } + if let (Some(bn), Some(tx), Some(ti), Some(li)) = + (log.block_number, log.transaction_hash, log.transaction_index, log.log_index) + { + self.n_rows += 1; + if schema.has_column("block_number") { + self.block_number.push(bn.as_u32()); + }; + if schema.has_column("transaction_index") { + self.transaction_index.push(ti.as_u32()); + }; + if schema.has_column("log_index") { + self.log_index.push(li.as_u32()); + }; + if schema.has_column("transaction_hash") { + self.transaction_hash.push(tx.as_bytes().to_vec()); + }; + if schema.has_column("erc20") { + self.erc20.push(log.address.as_bytes().to_vec()); + }; + if schema.has_column("from_address") { + self.from_address.push(log.topics[1].as_bytes().to_vec()); + }; + if schema.has_column("to_address") { + self.to_address.push(log.topics[2].as_bytes().to_vec()); + }; + if schema.has_column("token_id") { + self.token_id.push(log.topics[3].0.to_vec().as_slice().into()); + }; + } + } + + Ok(()) + } + + pub(crate) fn create_df( + self, + schema: &Table, + chain_id: u64, + ) -> Result { + let mut cols = Vec::with_capacity(schema.columns().len()); + with_series!(cols, "block_number", self.block_number, schema); + with_series!(cols, "transaction_index", self.transaction_index, schema); + with_series!(cols, "log_index", self.log_index, schema); + with_series_binary!(cols, "transaction_hash", self.transaction_hash, schema); + with_series_binary!(cols, "erc20", self.erc20, schema); + with_series_binary!(cols, "from_address", self.from_address, schema); + with_series_binary!(cols, "to_address", self.to_address, schema); + with_series_u256!(cols, "token_id", self.token_id, schema); + with_series!(cols, "chain_id", vec![chain_id; self.n_rows], schema); + + DataFrame::new(cols).map_err(CollectError::PolarsError).sort_by_schema(schema) + } +} + +async fn logs_to_erc721_transfers( + mut logs: mpsc::Receiver, CollectError>>, + schema: &Table, + chain_id: u64, +) -> Result { + let mut columns = Erc721TransferColumns::default(); + while let Some(message) = logs.recv().await { + if let Ok(logs) = message { + columns.process_erc721_transfer_logs(logs, schema)? + } else { + return Err(CollectError::TooManyRequestsError); + } + } + columns.create_df(schema, chain_id) +} diff --git a/crates/freeze/src/datasets/eth_calls.rs b/crates/freeze/src/datasets/eth_calls.rs new file mode 100644 index 00000000..d84bb65a --- /dev/null +++ b/crates/freeze/src/datasets/eth_calls.rs @@ -0,0 +1,234 @@ +// want to be able to include either the raw call data or the decoded arguments +// want to be able to include raw_output or decoded columns +// - could do output_decoded as a json object +// +// ways to specify call data +// - give complete raw call data `--call-data` +// - 0x28797abc +// - specify function and arguments separately +// - give function `--function` +// - by name: "totalSupply()" "balanceOf(address)" +// - by json: '{"name": "balanceOf", ...}" +// - give input arguments `--inputs +// - abi encoded: 0x28797abc +// - raw: 5000 +// - give semantic call +// - totalSupply() +// - balanceOf(0x28797abc...278) +// +// ways to specify output type +// - ignore, store only raw output data +// - provide function abi json +// - provide --call-output, e.g. `--call-output u256` + +use crate::{conversions::ToVecHex, types::EthCalls, ColumnType, Dataset, Datatype}; +use std::collections::HashMap; +use tokio::{sync::mpsc, task}; + +use ethers::prelude::*; +use polars::prelude::*; + +use crate::{ + dataframes::SortableDataFrame, + types::{AddressChunk, BlockChunk, CallDataChunk, CollectError, RowFilter, Source, Table}, + with_series, with_series_binary, +}; + +#[async_trait::async_trait] +impl Dataset for EthCalls { + fn datatype(&self) -> Datatype { + Datatype::EthCalls + } + + fn name(&self) -> &'static str { + "eth_calls" + } + + fn column_types(&self) -> HashMap<&'static str, ColumnType> { + HashMap::from_iter(vec![ + ("block_number", ColumnType::UInt32), + ("contract_address", ColumnType::Binary), + ("call_data", ColumnType::Binary), + ("call_data_hash", ColumnType::Binary), + ("output_data", ColumnType::Binary), + ("output_data_hash", ColumnType::Binary), + ("chain_id", ColumnType::UInt64), + ]) + } + + fn default_columns(&self) -> Vec<&'static str> { + vec!["block_number", "contract_address", "call_data", "output_data"] + } + + fn default_sort(&self) -> Vec { + vec!["block_number".to_string(), "contract_address".to_string()] + } + + fn default_blocks(&self) -> Option { + Some("latest".to_string()) + } + + fn arg_aliases(&self) -> HashMap { + [("address", "to_address"), ("contract", "to_address")] + .iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect() + } + + async fn collect_block_chunk( + &self, + chunk: &BlockChunk, + source: &Source, + schema: &Table, + filter: Option<&RowFilter>, + ) -> Result { + let (address_chunks, call_data_chunks) = match filter { + Some(filter) => (filter.address_chunks()?, filter.call_data_chunks()?), + _ => return Err(CollectError::CollectError("must specify RowFilter".to_string())), + }; + let rx = fetch_eth_calls(vec![chunk], address_chunks, call_data_chunks, source).await; + eth_calls_to_df(rx, schema, source.chain_id).await + } +} + +// block, address, call_data, output +pub(crate) type CallDataOutput = (u64, Vec, Vec, Bytes); + +pub(crate) async fn fetch_eth_calls( + block_chunks: Vec<&BlockChunk>, + address_chunks: Vec, + call_data_chunks: Vec, + source: &Source, +) -> mpsc::Receiver> { + let (tx, rx) = mpsc::channel(100); + + for block_chunk in block_chunks { + for number in block_chunk.numbers() { + for address_chunk in &address_chunks { + for address in address_chunk.values().iter() { + for call_data_chunk in &call_data_chunks { + for call_data in call_data_chunk.values().iter() { + let address = address.clone(); + let address_h160 = H160::from_slice(&address); + let call_data = call_data.clone(); + + let tx = tx.clone(); + let provider = Arc::clone(&source.provider); + let semaphore = source.semaphore.clone(); + let rate_limiter = source.rate_limiter.as_ref().map(Arc::clone); + task::spawn(async move { + let _permit = match semaphore { + Some(semaphore) => { + Some(Arc::clone(&semaphore).acquire_owned().await) + } + _ => None, + }; + if let Some(limiter) = rate_limiter { + Arc::clone(&limiter).until_ready().await; + } + + let transaction = TransactionRequest { + to: Some(address_h160.into()), + data: Some(call_data.clone().into()), + ..Default::default() + }; + + let result = + provider.call(&transaction.into(), Some(number.into())).await; + let result = match result { + Ok(value) => Ok((number, address, call_data, value)), + Err(e) => Err(CollectError::ProviderError(e)), + }; + match tx.send(result).await { + Ok(_) => {} + Err(tokio::sync::mpsc::error::SendError(_e)) => { + eprintln!("send error, try using a rate limit with --requests-per-second or limiting max concurrency with --max-concurrent-requests"); + std::process::exit(1) + } + } + }); + } + } + } + } + } + } + + rx +} + +async fn eth_calls_to_df( + mut stream: mpsc::Receiver>, + schema: &Table, + chain_id: u64, +) -> Result { + // initialize + let mut columns = CallDataColumns::default(); + + // parse stream of blocks + while let Some(message) = stream.recv().await { + match message { + Ok(call_data_output) => { + columns.process_calls(call_data_output, schema); + } + Err(e) => { + println!("{:?}", e); + return Err(CollectError::TooManyRequestsError) + } + } + } + + // convert to dataframes + columns.create_df(schema, chain_id) +} + +#[derive(Default)] +struct CallDataColumns { + n_rows: usize, + block_number: Vec, + contract_address: Vec>, + call_data: Vec>, + call_data_hash: Vec>, + output_data: Vec>, + output_data_hash: Vec>, +} + +impl CallDataColumns { + fn process_calls(&mut self, call_data_output: CallDataOutput, schema: &Table) { + let (block_number, contract_address, call_data, output_data) = call_data_output; + self.n_rows += 1; + if schema.has_column("block_number") { + self.block_number.push(block_number as u32); + } + if schema.has_column("contract_address") { + self.contract_address.push(contract_address); + } + if schema.has_column("call_data_hash") { + let call_data_hash = ethers_core::utils::keccak256(call_data.clone()).into(); + self.call_data_hash.push(call_data_hash); + } + if schema.has_column("call_data") { + self.call_data.push(call_data); + } + if schema.has_column("output_data") { + self.output_data.push(output_data.to_vec()); + } + if schema.has_column("output_data_hash") { + let output_data_hash: Vec = ethers_core::utils::keccak256(output_data).into(); + self.output_data.push(output_data_hash.to_vec()); + } + } + + fn create_df(self, schema: &Table, chain_id: u64) -> Result { + let mut cols = Vec::with_capacity(schema.columns().len()); + with_series!(cols, "block_number", self.block_number, schema); + with_series_binary!(cols, "contract_address", self.contract_address, schema); + with_series_binary!(cols, "call_data", self.call_data, schema); + with_series_binary!(cols, "call_data_hash", self.call_data_hash, schema); + with_series_binary!(cols, "output_data", self.output_data, schema); + with_series_binary!(cols, "output_data_hash", self.output_data_hash, schema); + with_series!(cols, "chain_id", vec![chain_id; self.n_rows], schema); + + DataFrame::new(cols).map_err(CollectError::PolarsError).sort_by_schema(schema) + } +} diff --git a/crates/freeze/src/datasets/logs.rs b/crates/freeze/src/datasets/logs.rs index c724d927..f0b4922e 100644 --- a/crates/freeze/src/datasets/logs.rs +++ b/crates/freeze/src/datasets/logs.rs @@ -87,7 +87,7 @@ impl Dataset for Logs { } } -async fn fetch_block_logs( +pub(crate) async fn fetch_block_logs( block_chunk: &BlockChunk, source: &Source, filter: Option<&RowFilter>, @@ -133,7 +133,7 @@ async fn fetch_block_logs( rx } -async fn fetch_transaction_logs( +pub(crate) async fn fetch_transaction_logs( transaction_chunk: &TransactionChunk, source: &Source, _filter: Option<&RowFilter>, diff --git a/crates/freeze/src/datasets/mod.rs b/crates/freeze/src/datasets/mod.rs index a06981eb..2ddb057a 100644 --- a/crates/freeze/src/datasets/mod.rs +++ b/crates/freeze/src/datasets/mod.rs @@ -1,14 +1,27 @@ mod balance_diffs; +mod balances; mod blocks; mod blocks_and_transactions; mod code_diffs; +mod codes; mod contracts; +mod erc20_balances; +mod erc20_metadata; +mod erc20_supplies; +mod erc20_transfers; +mod erc721_metadata; +mod erc721_transfers; +mod eth_calls; mod logs; mod native_transfers; mod nonce_diffs; +mod nonces; mod state_diffs; mod storage_diffs; +mod storages; +mod trace_calls; mod traces; +mod transaction_addresses; mod transactions; mod vm_traces; diff --git a/crates/freeze/src/datasets/nonces.rs b/crates/freeze/src/datasets/nonces.rs new file mode 100644 index 00000000..ecda08ae --- /dev/null +++ b/crates/freeze/src/datasets/nonces.rs @@ -0,0 +1,172 @@ +// required args:: address + +use crate::{types::Nonces, ColumnType, Dataset, Datatype}; +use std::collections::HashMap; + +use std::sync::Arc; + +use ethers::prelude::*; +use polars::prelude::*; +use tokio::{sync::mpsc, task}; + +use crate::{ + dataframes::SortableDataFrame, + types::{ + conversions::ToVecHex, AddressChunk, BlockChunk, CollectError, RowFilter, Source, Table, + }, + with_series, with_series_binary, +}; + +#[async_trait::async_trait] +impl Dataset for Nonces { + fn datatype(&self) -> Datatype { + Datatype::Nonces + } + + fn name(&self) -> &'static str { + "nonces" + } + + fn column_types(&self) -> HashMap<&'static str, ColumnType> { + HashMap::from_iter(vec![ + ("block_number", ColumnType::UInt32), + ("address", ColumnType::Binary), + ("nonce", ColumnType::UInt64), + ("chain_id", ColumnType::UInt64), + ]) + } + + fn default_columns(&self) -> Vec<&'static str> { + vec!["block_number", "address", "nonce"] + } + + fn default_sort(&self) -> Vec { + vec!["block_number".to_string(), "address".to_string()] + } + + async fn collect_block_chunk( + &self, + chunk: &BlockChunk, + source: &Source, + schema: &Table, + filter: Option<&RowFilter>, + ) -> Result { + let address_chunks = match filter { + Some(filter) => match &filter.address_chunks { + Some(address_chunks) => address_chunks.clone(), + _ => return Err(CollectError::CollectError("must specify addresses".to_string())), + }, + _ => return Err(CollectError::CollectError("must specify addresses".to_string())), + }; + let rx = fetch_nonces(vec![chunk], address_chunks, source).await; + nonces_to_df(rx, schema, source.chain_id).await + } +} + +pub(crate) type BlockAddressNonce = (u64, Vec, u32); + +async fn fetch_nonces( + block_chunks: Vec<&BlockChunk>, + address_chunks: Vec, + source: &Source, +) -> mpsc::Receiver> { + let (tx, rx) = mpsc::channel(100); + + for block_chunk in block_chunks { + for number in block_chunk.numbers() { + for address_chunk in &address_chunks { + for address in address_chunk.values().iter() { + let address = address.clone(); + let address_h160 = H160::from_slice(&address); + let tx = tx.clone(); + let provider = Arc::clone(&source.provider); + let semaphore = source.semaphore.clone(); + let rate_limiter = source.rate_limiter.as_ref().map(Arc::clone); + task::spawn(async move { + let _permit = match semaphore { + Some(semaphore) => Some(Arc::clone(&semaphore).acquire_owned().await), + _ => None, + }; + if let Some(limiter) = rate_limiter { + Arc::clone(&limiter).until_ready().await; + } + let result = + provider.get_transaction_count(address_h160, Some(number.into())).await; + let result = match result { + Ok(value) => Ok((number, address, value.as_u32())), + Err(e) => Err(CollectError::ProviderError(e)), + }; + match tx.send(result).await { + Ok(_) => {} + Err(tokio::sync::mpsc::error::SendError(_e)) => { + eprintln!("send error, try using a rate limit with --requests-per-second or limiting max concurrency with --max-concurrent-requests"); + std::process::exit(1) + } + } + }); + } + } + } + } + + rx +} + +async fn nonces_to_df( + mut stream: mpsc::Receiver>, + schema: &Table, + chain_id: u64, +) -> Result { + // initialize + let mut columns = NonceColumns::default(); + + // parse stream of blocks + while let Some(message) = stream.recv().await { + match message { + Ok(block_address_nonce) => { + columns.process_nonce(block_address_nonce, schema); + } + Err(e) => { + println!("{:?}", e); + return Err(CollectError::TooManyRequestsError) + } + } + } + + // convert to dataframes + columns.create_df(schema, chain_id) +} + +#[derive(Default)] +struct NonceColumns { + n_rows: usize, + block_number: Vec, + address: Vec>, + nonce: Vec, +} + +impl NonceColumns { + fn process_nonce(&mut self, block_address_nonce: BlockAddressNonce, schema: &Table) { + let (block, address, nonce) = block_address_nonce; + self.n_rows += 1; + if schema.has_column("block_number") { + self.block_number.push(block as u32); + } + if schema.has_column("address") { + self.address.push(address); + } + if schema.has_column("nonce") { + self.nonce.push(nonce); + } + } + + fn create_df(self, schema: &Table, chain_id: u64) -> Result { + let mut cols = Vec::with_capacity(schema.columns().len()); + with_series!(cols, "block_number", self.block_number, schema); + with_series_binary!(cols, "address", self.address, schema); + with_series!(cols, "nonce", self.nonce, schema); + with_series!(cols, "chain_id", vec![chain_id; self.n_rows], schema); + + DataFrame::new(cols).map_err(CollectError::PolarsError).sort_by_schema(schema) + } +} diff --git a/crates/freeze/src/datasets/storages.rs b/crates/freeze/src/datasets/storages.rs new file mode 100644 index 00000000..f3d41c1e --- /dev/null +++ b/crates/freeze/src/datasets/storages.rs @@ -0,0 +1,188 @@ +// required args:: address + +use crate::{types::Storages, ColumnType, Dataset, Datatype}; +use std::collections::HashMap; + +use std::sync::Arc; + +use ethers::prelude::*; +use polars::prelude::*; +use tokio::{sync::mpsc, task}; + +use crate::{ + dataframes::SortableDataFrame, + types::{ + conversions::ToVecHex, AddressChunk, BlockChunk, CollectError, RowFilter, SlotChunk, + Source, Table, + }, + with_series, with_series_binary, +}; + +#[async_trait::async_trait] +impl Dataset for Storages { + fn datatype(&self) -> Datatype { + Datatype::Storages + } + + fn name(&self) -> &'static str { + "storages" + } + + fn column_types(&self) -> HashMap<&'static str, ColumnType> { + HashMap::from_iter(vec![ + ("block_number", ColumnType::UInt32), + ("address", ColumnType::Binary), + ("slot", ColumnType::Binary), + ("value", ColumnType::Binary), + ("chain_id", ColumnType::UInt64), + ]) + } + + fn default_columns(&self) -> Vec<&'static str> { + vec!["block_number", "address", "slot", "value", "chain_id"] + } + + fn default_sort(&self) -> Vec { + vec!["block_number".to_string(), "address".to_string(), "slot".to_string()] + } + + async fn collect_block_chunk( + &self, + chunk: &BlockChunk, + source: &Source, + schema: &Table, + filter: Option<&RowFilter>, + ) -> Result { + let (address_chunks, slot_chunks) = match filter { + Some(filter) => (filter.address_chunks()?, filter.slot_chunks()?), + _ => return Err(CollectError::CollectError("must specify RowFilter".to_string())), + }; + let rx = fetch_slots(vec![chunk], address_chunks, slot_chunks, source).await; + slots_to_df(rx, schema, source.chain_id).await + } +} + +pub(crate) type BlockAddressSlot = (u64, Vec, Vec, Vec); + +async fn fetch_slots( + block_chunks: Vec<&BlockChunk>, + address_chunks: Vec, + slot_chunks: Vec, + source: &Source, +) -> mpsc::Receiver> { + let (tx, rx) = mpsc::channel(100); + + for block_chunk in block_chunks { + for number in block_chunk.numbers() { + for address_chunk in &address_chunks { + for address in address_chunk.values().iter() { + for slot_chunk in &slot_chunks { + for slot in slot_chunk.values().iter() { + let address = address.clone(); + let address_h160 = H160::from_slice(&address); + let slot = slot.clone(); + let slot_h256 = H256::from_slice(&slot); + let tx = tx.clone(); + let provider = Arc::clone(&source.provider); + let semaphore = source.semaphore.clone(); + let rate_limiter = source.rate_limiter.as_ref().map(Arc::clone); + task::spawn(async move { + let _permit = match semaphore { + Some(semaphore) => { + Some(Arc::clone(&semaphore).acquire_owned().await) + } + _ => None, + }; + if let Some(limiter) = rate_limiter { + Arc::clone(&limiter).until_ready().await; + } + let result = provider + .get_storage_at(address_h160, slot_h256, Some(number.into())) + .await; + let result = match result { + Ok(value) => { + Ok((number, address, slot, value.as_bytes().to_vec())) + } + Err(e) => Err(CollectError::ProviderError(e)), + }; + match tx.send(result).await { + Ok(_) => {} + Err(tokio::sync::mpsc::error::SendError(_e)) => { + eprintln!("send error, try using a rate limit with --requests-per-second or limiting max concurrency with --max-concurrent-requests"); + std::process::exit(1) + } + } + }); + } + } + } + } + } + } + + rx +} + +async fn slots_to_df( + mut stream: mpsc::Receiver>, + schema: &Table, + chain_id: u64, +) -> Result { + // initialize + let mut columns = SlotColumns::default(); + + // parse stream of blocks + while let Some(message) = stream.recv().await { + match message { + Ok(block_address_slot) => { + columns.process_slots(block_address_slot, schema); + } + Err(e) => { + println!("{:?}", e); + return Err(CollectError::TooManyRequestsError) + } + } + } + + // convert to dataframes + columns.create_df(schema, chain_id) +} + +#[derive(Default)] +struct SlotColumns { + n_rows: usize, + block_number: Vec, + address: Vec>, + slot: Vec>, + value: Vec>, +} + +impl SlotColumns { + fn process_slots(&mut self, block_address_slot: BlockAddressSlot, schema: &Table) { + let (block, address, slot, value) = block_address_slot; + self.n_rows += 1; + if schema.has_column("block_number") { + self.block_number.push(block as u32); + } + if schema.has_column("address") { + self.address.push(address); + } + if schema.has_column("slot") { + self.slot.push(slot); + } + if schema.has_column("value") { + self.value.push(value); + } + } + + fn create_df(self, schema: &Table, chain_id: u64) -> Result { + let mut cols = Vec::with_capacity(schema.columns().len()); + with_series!(cols, "block_number", self.block_number, schema); + with_series_binary!(cols, "address", self.address, schema); + with_series_binary!(cols, "slot", self.slot, schema); + with_series_binary!(cols, "value", self.value, schema); + with_series!(cols, "chain_id", vec![chain_id; self.n_rows], schema); + + DataFrame::new(cols).map_err(CollectError::PolarsError).sort_by_schema(schema) + } +} diff --git a/crates/freeze/src/datasets/trace_calls.rs b/crates/freeze/src/datasets/trace_calls.rs new file mode 100644 index 00000000..e86af558 --- /dev/null +++ b/crates/freeze/src/datasets/trace_calls.rs @@ -0,0 +1,424 @@ +// want to be able to include either the raw call data or the decoded arguments +// want to be able to include raw_output or decoded columns +// - could do output_decoded as a json object + +use crate::{types::TraceCalls, ColumnType, Dataset, Datatype, Traces, U256Type}; +use std::collections::HashMap; + +use crate::{conversions::ToVecHex, types::conversions::ToVecU8}; +use tokio::{sync::mpsc, task}; + +use ethers::prelude::*; +use polars::prelude::*; + +use super::traces; +use crate::{ + dataframes::SortableDataFrame, + types::{AddressChunk, BlockChunk, CallDataChunk, CollectError, RowFilter, Source, Table}, + with_series, with_series_binary, with_series_u256, +}; + +#[async_trait::async_trait] +impl Dataset for TraceCalls { + fn datatype(&self) -> Datatype { + Datatype::TraceCalls + } + + fn name(&self) -> &'static str { + "trace_calls" + } + + fn column_types(&self) -> HashMap<&'static str, ColumnType> { + let mut types = Traces.column_types(); + types.insert("tx_to_address", ColumnType::Binary); + types.insert("tx_call_data", ColumnType::Binary); + types + } + + fn default_columns(&self) -> Vec<&'static str> { + Traces.default_columns() + } + + fn default_sort(&self) -> Vec { + Traces.default_sort() + } + + async fn collect_block_chunk( + &self, + chunk: &BlockChunk, + source: &Source, + schema: &Table, + filter: Option<&RowFilter>, + ) -> Result { + let (address_chunks, call_data_chunks) = match filter { + Some(filter) => (filter.address_chunks()?, filter.call_data_chunks()?), + _ => return Err(CollectError::CollectError("must specify RowFilter".to_string())), + }; + let rx = fetch_trace_calls(vec![chunk], address_chunks, call_data_chunks, source).await; + trace_calls_to_df(rx, schema, source.chain_id).await + } +} + +// block, address, call_data, BlockTrace +type TraceCallOutput = (u64, Vec, Vec, BlockTrace); + +async fn fetch_trace_calls( + block_chunks: Vec<&BlockChunk>, + address_chunks: Vec, + call_data_chunks: Vec, + source: &Source, +) -> mpsc::Receiver> { + let (tx, rx) = mpsc::channel(100); + + for block_chunk in block_chunks { + for number in block_chunk.numbers() { + for address_chunk in &address_chunks { + for address in address_chunk.values().iter() { + for call_data_chunk in &call_data_chunks { + for call_data in call_data_chunk.values().iter() { + let address = address.clone(); + let address_h160 = H160::from_slice(&address); + let call_data = call_data.clone(); + + let tx = tx.clone(); + let provider = Arc::clone(&source.provider); + let semaphore = source.semaphore.clone(); + let rate_limiter = source.rate_limiter.as_ref().map(Arc::clone); + task::spawn(async move { + let _permit = match semaphore { + Some(semaphore) => { + Some(Arc::clone(&semaphore).acquire_owned().await) + } + _ => None, + }; + if let Some(limiter) = rate_limiter { + Arc::clone(&limiter).until_ready().await; + } + + let transaction = TransactionRequest { + to: Some(address_h160.into()), + data: Some(call_data.clone().into()), + ..Default::default() + }; + // let transaction = + // ethers::types::transaction::eip2718::TypedTransaction::Legacy(transaction); + // let transaction = + // TypedTransaction::Legacy(transaction); + let trace_type = vec![TraceType::Trace]; + + let result = provider + .trace_call(transaction, trace_type, Some(number.into())) + .await; + let result = match result { + Ok(value) => Ok((number, address, call_data, value)), + Err(e) => Err(CollectError::ProviderError(e)), + }; + match tx.send(result).await { + Ok(_) => {} + Err(tokio::sync::mpsc::error::SendError(_e)) => { + eprintln!("send error, try using a rate limit with --requests-per-second or limiting max concurrency with --max-concurrent-requests"); + std::process::exit(1) + } + } + }); + } + } + } + } + } + } + + rx +} + +async fn trace_calls_to_df( + mut stream: mpsc::Receiver>, + schema: &Table, + chain_id: u64, +) -> Result { + // initialize + let mut columns = TraceCallColumns::default(); + + // parse stream of blocks + while let Some(message) = stream.recv().await { + match message { + Ok(trace_call_output) => { + columns.process_calls(trace_call_output, schema); + } + Err(e) => { + println!("{:?}", e); + return Err(CollectError::TooManyRequestsError) + } + } + } + + // convert to dataframes + columns.create_df(schema, chain_id) +} + +#[derive(Default)] +struct TraceCallColumns { + n_rows: usize, + tx_to_address: Vec>, + tx_call_data: Vec>, + action_from: Vec>>, + action_to: Vec>>, + action_value: Vec, + action_gas: Vec>, + action_input: Vec>>, + action_call_type: Vec>, + action_init: Vec>>, + action_reward_type: Vec>, + action_type: Vec, + result_gas_used: Vec>, + result_output: Vec>>, + result_code: Vec>>, + result_address: Vec>>, + trace_address: Vec, + subtraces: Vec, + transaction_position: Vec, + transaction_hash: Vec>>, + block_number: Vec, + block_hash: Vec>>, + error: Vec>, +} + +impl TraceCallColumns { + fn process_calls(&mut self, trace_call_output: TraceCallOutput, schema: &Table) { + let (block_number, contract_address, call_data, output_data) = trace_call_output; + if let Some(tx_traces) = output_data.trace { + for tx_trace in tx_traces.iter() { + self.n_rows += 1; + + if schema.has_column("tx_to_address") { + self.tx_to_address.push(contract_address.clone()); + } + if schema.has_column("tx_call_data") { + self.tx_call_data.push(call_data.clone()); + } + + match &tx_trace.action { + Action::Call(a) => { + if schema.has_column("action_from") { + self.action_from.push(Some(a.from.as_bytes().to_vec())); + } + if schema.has_column("action_to") { + self.action_to.push(Some(a.to.as_bytes().to_vec())); + } + if schema.has_column("action_value") { + self.action_value.push(a.value); + } + if schema.has_column("action_gas") { + self.action_gas.push(Some(a.gas.as_u32())); + } + if schema.has_column("action_input") { + self.action_input.push(Some(a.input.to_vec())); + } + if schema.has_column("action_call_type") { + self.action_call_type + .push(Some(traces::action_call_type_to_string(&a.call_type))); + } + + if schema.has_column("action_init") { + self.action_init.push(None) + } + if schema.has_column("action_reward_type") { + self.action_reward_type.push(None) + } + } + Action::Create(action) => { + if schema.has_column("action_from") { + self.action_from.push(Some(action.from.as_bytes().to_vec())); + } + if schema.has_column("action_value") { + self.action_value.push(action.value); + } + if schema.has_column("action_gas") { + self.action_gas.push(Some(action.gas.as_u32())); + } + if schema.has_column("action_init") { + self.action_init.push(Some(action.init.to_vec())); + } + + if schema.has_column("action_to") { + self.action_to.push(None) + } + if schema.has_column("action_input") { + self.action_input.push(None) + } + if schema.has_column("action_call_type") { + self.action_call_type.push(None) + } + if schema.has_column("action_reward_type") { + self.action_reward_type.push(None) + } + } + Action::Suicide(action) => { + if schema.has_column("action_from") { + self.action_from.push(Some(action.address.as_bytes().to_vec())); + } + if schema.has_column("action_to") { + self.action_to.push(Some(action.refund_address.as_bytes().to_vec())); + } + if schema.has_column("action_value") { + self.action_value.push(action.balance); + } + + if schema.has_column("action_gas") { + self.action_gas.push(None) + } + if schema.has_column("action_input") { + self.action_input.push(None) + } + if schema.has_column("action_call_type") { + self.action_call_type.push(None) + } + if schema.has_column("action_init") { + self.action_init.push(None) + } + if schema.has_column("action_reward_type") { + self.action_reward_type.push(None) + } + } + Action::Reward(action) => { + if schema.has_column("action_to") { + self.action_to.push(Some(action.author.as_bytes().to_vec())); + } + if schema.has_column("action_value") { + self.action_value.push(action.value); + } + if schema.has_column("action_reward_type") { + self.action_reward_type + .push(Some(traces::reward_type_to_string(&action.reward_type))); + } + + if schema.has_column("action_from") { + self.action_from.push(None) + } + if schema.has_column("action_gas") { + self.action_gas.push(None) + } + if schema.has_column("action_input") { + self.action_input.push(None) + } + if schema.has_column("action_call_type") { + self.action_call_type.push(None) + } + if schema.has_column("action_init") { + self.action_init.push(None) + } + } + } + if schema.has_column("action_type") { + self.action_type.push(traces::action_type_to_string(&tx_trace.action_type)); + } + + match &tx_trace.result { + Some(Res::Call(result)) => { + if schema.has_column("result_gas_used") { + self.result_gas_used.push(Some(result.gas_used.as_u32())); + } + if schema.has_column("result_output") { + self.result_output.push(Some(result.output.to_vec())); + } + + if schema.has_column("result_code") { + self.result_code.push(None); + } + if schema.has_column("result_address") { + self.result_address.push(None); + } + } + Some(Res::Create(result)) => { + if schema.has_column("result_gas_used") { + self.result_gas_used.push(Some(result.gas_used.as_u32())); + } + if schema.has_column("result_code") { + self.result_code.push(Some(result.code.to_vec())); + } + if schema.has_column("result_address") { + self.result_address.push(Some(result.address.as_bytes().to_vec())); + } + + if schema.has_column("result_output") { + self.result_output.push(None); + } + } + Some(Res::None) | None => { + if schema.has_column("result_gas_used") { + self.result_gas_used.push(None); + } + if schema.has_column("result_output") { + self.result_output.push(None); + } + if schema.has_column("result_code") { + self.result_code.push(None); + } + if schema.has_column("result_address") { + self.result_address.push(None); + } + } + } + if schema.has_column("trace_address") { + self.trace_address.push( + tx_trace + .trace_address + .iter() + .map(|n| n.to_string()) + .collect::>() + .join("_"), + ); + } + if schema.has_column("subtraces") { + self.subtraces.push(tx_trace.subtraces as u32); + } + if schema.has_column("transaction_position") { + self.transaction_position.push(0_u32); + } + if schema.has_column("transaction_hash") { + self.transaction_hash + .push(output_data.transaction_hash.map(|x| x.as_bytes().to_vec())); + } + if schema.has_column("block_number") { + self.block_number.push(block_number as u32); + } + if schema.has_column("block_hash") { + self.block_hash.push(None); + } + if schema.has_column("error") { + self.error.push(tx_trace.error.clone()); + } + } + } + } + + fn create_df(self, schema: &Table, chain_id: u64) -> Result { + let mut cols = Vec::with_capacity(schema.columns().len()); + + with_series_binary!(cols, "action_from", self.action_from, schema); + with_series_binary!(cols, "action_to", self.action_to, schema); + with_series_u256!(cols, "action_value", self.action_value, schema); + with_series!(cols, "action_gas", self.action_gas, schema); + with_series_binary!(cols, "action_input", self.action_input, schema); + with_series!(cols, "action_call_type", self.action_call_type, schema); + with_series_binary!(cols, "action_init", self.action_init, schema); + with_series!(cols, "action_reward_type", self.action_reward_type, schema); + with_series!(cols, "action_type", self.action_type, schema); + with_series!(cols, "result_gas_used", self.result_gas_used, schema); + with_series_binary!(cols, "result_output", self.result_output, schema); + with_series_binary!(cols, "result_code", self.result_code, schema); + with_series_binary!(cols, "result_address", self.result_address, schema); + with_series!(cols, "trace_address", self.trace_address, schema); + with_series!(cols, "subtraces", self.subtraces, schema); + with_series!(cols, "transaction_position", self.transaction_position, schema); + with_series_binary!(cols, "transaction_hash", self.transaction_hash, schema); + with_series!(cols, "block_number", self.block_number, schema); + with_series_binary!(cols, "block_hash", self.block_hash, schema); + with_series!(cols, "error", self.error, schema); + with_series_binary!(cols, "tx_to_address", self.tx_to_address, schema); + with_series_binary!(cols, "tx_call_data", self.tx_call_data, schema); + with_series!(cols, "chain_id", vec![chain_id; self.n_rows], schema); + + DataFrame::new(cols).map_err(CollectError::PolarsError).sort_by_schema(schema) + } +} diff --git a/crates/freeze/src/datasets/traces.rs b/crates/freeze/src/datasets/traces.rs index b7b33786..75941d71 100644 --- a/crates/freeze/src/datasets/traces.rs +++ b/crates/freeze/src/datasets/traces.rs @@ -189,7 +189,7 @@ pub(crate) async fn fetch_transaction_traces( } } -fn reward_type_to_string(reward_type: &RewardType) -> String { +pub(crate) fn reward_type_to_string(reward_type: &RewardType) -> String { match reward_type { RewardType::Block => "reward".to_string(), RewardType::Uncle => "uncle".to_string(), @@ -198,7 +198,7 @@ fn reward_type_to_string(reward_type: &RewardType) -> String { } } -fn action_type_to_string(action_type: &ActionType) -> String { +pub(crate) fn action_type_to_string(action_type: &ActionType) -> String { match action_type { ActionType::Call => "call".to_string(), ActionType::Create => "create".to_string(), @@ -207,7 +207,7 @@ fn action_type_to_string(action_type: &ActionType) -> String { } } -fn action_call_type_to_string(action_call_type: &CallType) -> String { +pub(crate) fn action_call_type_to_string(action_call_type: &CallType) -> String { match action_call_type { CallType::None => "none".to_string(), CallType::Call => "call".to_string(), diff --git a/crates/freeze/src/datasets/transaction_addresses.rs b/crates/freeze/src/datasets/transaction_addresses.rs new file mode 100644 index 00000000..e6c0bc5d --- /dev/null +++ b/crates/freeze/src/datasets/transaction_addresses.rs @@ -0,0 +1,493 @@ +use crate::{types::TransactionAddresses, ColumnType, Dataset, Datatype}; +use std::collections::HashMap; + +use ethers::prelude::*; +use polars::prelude::*; +use tokio::{sync::mpsc, task}; + +use crate::{ + dataframes::SortableDataFrame, + types::{ + conversions::ToVecHex, BlockChunk, CollectError, RowFilter, Source, Table, TransactionChunk, + }, + with_series, with_series_binary, +}; + +lazy_static::lazy_static! { + pub static ref ERC20_TRANSFER: H256 = H256( + prefix_hex::decode("0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef") + .expect("Decoding failed"), + ); +} + +#[async_trait::async_trait] +impl Dataset for TransactionAddresses { + fn datatype(&self) -> Datatype { + Datatype::TransactionAddresses + } + + fn name(&self) -> &'static str { + "transaction_addresses" + } + + fn column_types(&self) -> HashMap<&'static str, ColumnType> { + HashMap::from_iter(vec![ + ("block_number", ColumnType::UInt32), + ("transaction_hash", ColumnType::Binary), + ("address", ColumnType::Binary), + ("relationship", ColumnType::String), + ("chain_id", ColumnType::UInt64), + ]) + } + + fn default_columns(&self) -> Vec<&'static str> { + vec!["block_number", "transaction_hash", "address", "relationship", "chain_id"] + } + + fn default_sort(&self) -> Vec { + vec![ + "block_number".to_string(), + "transaction_hash".to_string(), + "address".to_string(), + "relationship".to_string(), + ] + } + + async fn collect_block_chunk( + &self, + chunk: &BlockChunk, + source: &Source, + schema: &Table, + _filter: Option<&RowFilter>, + ) -> Result { + let rx = fetch_block_tx_addresses(chunk, source).await; + traces_to_addresses_df(rx, schema, source.chain_id).await + } + + async fn collect_transaction_chunk( + &self, + chunk: &TransactionChunk, + source: &Source, + schema: &Table, + _filter: Option<&RowFilter>, + ) -> Result { + let rx = fetch_transaction_tx_addresses(chunk, source).await; + traces_to_addresses_df(rx, schema, source.chain_id).await + } +} + +type BlockLogTraces = (Block, Vec, Vec); + +pub(crate) async fn fetch_block_tx_addresses( + block_chunk: &BlockChunk, + source: &Source, +) -> mpsc::Receiver> { + let (tx, rx) = mpsc::channel(block_chunk.numbers().len()); + + for number in block_chunk.numbers() { + let tx = tx.clone(); + let source = source.clone(); + let semaphore = source.semaphore.clone(); + let rate_limiter = source.rate_limiter.as_ref().map(Arc::clone); + task::spawn(async move { + let _permit = match semaphore { + Some(semaphore) => Some(Arc::clone(&semaphore).acquire_owned().await), + _ => None, + }; + if let Some(limiter) = &rate_limiter { + Arc::clone(limiter).until_ready().await; + } + + let result = get_block_block_logs_traces(number, &source).await; + match tx.send(result).await { + Ok(_) => {} + Err(tokio::sync::mpsc::error::SendError(_e)) => { + eprintln!("send error, try using a rate limit with --requests-per-second or limiting max concurrency with --max-concurrent-requests"); + std::process::exit(1) + } + } + }); + } + rx +} + +async fn fetch_transaction_tx_addresses( + transaction_chunk: &TransactionChunk, + source: &Source, +) -> mpsc::Receiver> { + match transaction_chunk { + TransactionChunk::Values(tx_hashes) => { + let (tx, rx) = mpsc::channel(tx_hashes.len()); + for tx_hash in tx_hashes.iter() { + let tx_hash = H256::from_slice(&tx_hash.clone()); + let tx = tx.clone(); + let source = source.clone(); + let semaphore = source.semaphore.clone(); + task::spawn(async move { + let _permit = match semaphore { + Some(semaphore) => Some(Arc::clone(&semaphore).acquire_owned().await), + _ => None, + }; + let result = get_tx_block_logs_traces(tx_hash, &source).await; + match tx.send(result).await { + Ok(_) => {} + Err(tokio::sync::mpsc::error::SendError(_e)) => { + eprintln!("send error, try using a rate limit with --requests-per-second or limiting max concurrency with --max-concurrent-requests"); + std::process::exit(1) + } + } + }); + } + rx + } + _ => { + let (tx, rx) = mpsc::channel(1); + let result = Err(CollectError::CollectError( + "transaction value ranges not supported".to_string(), + )); + match tx.send(result).await { + Ok(_) => {} + Err(tokio::sync::mpsc::error::SendError(_e)) => { + eprintln!("send error, try using a rate limit with --requests-per-second or limiting max concurrency with --max-concurrent-requests"); + std::process::exit(1) + } + } + rx + } + } +} + +async fn get_block_block_logs_traces( + number: u64, + source: &Source, +) -> Result { + let provider = &source.provider; + let block_number: BlockNumber = number.into(); + + // block + println!("collecting block"); + if let Some(limiter) = &source.rate_limiter { + Arc::clone(limiter).until_ready().await; + } + let block_result = provider + .get_block(block_number) + .await + .map_err(CollectError::ProviderError)? + .ok_or(CollectError::CollectError("could not get block data".to_string()))?; + + // logs + println!("collecting logs"); + if let Some(limiter) = &source.rate_limiter { + Arc::clone(limiter).until_ready().await; + } + let filter = Filter { + block_option: FilterBlockOption::Range { + from_block: Some(block_number), + to_block: Some(block_number), + }, + ..Default::default() + }; + let log_result = provider.get_logs(&filter).await.map_err(CollectError::ProviderError)?; + + // traces + println!("collecting traces"); + if let Some(limiter) = &source.rate_limiter { + Arc::clone(limiter).until_ready().await; + } + let traces_result = + provider.trace_block(block_number).await.map_err(CollectError::ProviderError)?; + + Ok((block_result, log_result, traces_result)) +} + +async fn get_tx_block_logs_traces( + tx_hash: H256, + source: &Source, +) -> Result { + let provider = &source.provider; + let tx_data = + provider.get_transaction(tx_hash).await.map_err(CollectError::ProviderError)?.ok_or_else( + || CollectError::CollectError("could not find transaction data".to_string()), + )?; + + // block + let block_number = tx_data + .block_number + .ok_or_else(|| CollectError::CollectError("block not found".to_string()))?; + let block_result = provider + .get_block(block_number) + .await + .map_err(CollectError::ProviderError)? + .ok_or(CollectError::CollectError("could not get block".to_string()))?; + + // logs + let log_result = provider + .get_transaction_receipt(tx_hash) + .await + .map_err(CollectError::ProviderError)? + .ok_or(CollectError::CollectError("could not get tx receipt".to_string()))? + .logs; + + // traces + let traces_result = + provider.trace_transaction(tx_hash).await.map_err(CollectError::ProviderError)?; + + Ok((block_result, log_result, traces_result)) +} + +async fn traces_to_addresses_df( + mut rx: mpsc::Receiver>, + schema: &Table, + chain_id: u64, +) -> Result { + // initialize + let mut columns = TransactionAddressColumns::default(); + + // parse stream of blocks + while let Some(message) = rx.recv().await { + match message { + Ok((block, logs, traces)) => columns.process_tx_addresses(block, logs, traces, schema), + Err(e) => { + println!("{:?}", e); + return Err(CollectError::TooManyRequestsError) + } + } + } + + // convert to dataframes + columns.create_df(schema, chain_id) +} + +#[derive(Default)] +struct TransactionAddressColumns { + n_rows: usize, + block_number: Vec, + transaction_hash: Vec>, + address: Vec>, + relationship: Vec, +} + +impl TransactionAddressColumns { + fn process_tx_addresses( + &mut self, + block: Block, + logs: Vec, + traces: Vec, + schema: &Table, + ) { + let mut logs_by_tx: HashMap> = HashMap::new(); + for log in logs.into_iter() { + if let Some(tx_hash) = log.transaction_hash { + logs_by_tx.entry(tx_hash).or_insert_with(Vec::new).push(log); + } + } + + let (block_number, block_author) = match (block.number, block.author) { + (Some(number), Some(author)) => (number.as_u64(), author), + _ => return, + }; + + let mut current_tx_hash = H256([0; 32]); + for trace in traces.iter() { + if let (Some(tx_hash), Some(_tx_pos)) = + (trace.transaction_hash, trace.transaction_position) + { + let first_trace_in_tx = tx_hash != current_tx_hash; + + if first_trace_in_tx { + self.process_address( + block_author, + "miner_fee", + trace.block_number, + tx_hash, + schema, + ); + + // erc transfers + if let Some(logs) = logs_by_tx.get(&tx_hash) { + for log in logs.iter() { + if log.topics.len() >= 3 { + let event = log.topics[0]; + let name = if event == *ERC20_TRANSFER { + if log.data.len() > 0 { + Some("erc20_transfer") + } else if log.topics.len() == 4 { + Some("erc721_transfer") + } else { + None + } + } else { + None + }; + if let Some(name) = name { + let mut from: [u8; 20] = [0; 20]; + from.copy_from_slice(&log.topics[1].to_fixed_bytes()[12..32]); + self.process_address( + H160(from), + &(name.to_string() + "_from"), + block_number, + tx_hash, + schema, + ); + + let mut to: [u8; 20] = [0; 20]; + to.copy_from_slice(&log.topics[1].to_fixed_bytes()[12..32]); + self.process_address( + H160(to), + &(name.to_string() + "_to"), + block_number, + tx_hash, + schema, + ); + } + } + } + } + + match &trace.action { + Action::Call(action) => { + self.process_address( + action.from, + "tx_from", + trace.block_number, + tx_hash, + schema, + ); + self.process_address( + action.to, + "tx_to", + trace.block_number, + tx_hash, + schema, + ); + } + Action::Create(action) => { + self.process_address( + action.from, + "tx_from", + trace.block_number, + tx_hash, + schema, + ); + } + _ => panic!("invalid first tx trace"), + } + + if let Some(Res::Create(result)) = &trace.result { + self.process_address( + result.address, + "tx_to", + trace.block_number, + tx_hash, + schema, + ); + }; + } + + match &trace.action { + Action::Call(action) => { + // let (from_name, to_name) = if first_trace_in_tx { + // ("tx_from", "tx_to") + // } else { + // ("call_from", "call_to") + // }; + self.process_address( + action.from, + "call_from", + trace.block_number, + tx_hash, + schema, + ); + self.process_address( + action.to, + "call_to", + trace.block_number, + tx_hash, + schema, + ); + } + Action::Create(action) => { + self.process_address( + action.from, + "factory", + trace.block_number, + tx_hash, + schema, + ); + } + Action::Suicide(action) => { + self.process_address( + action.address, + "suicide", + trace.block_number, + tx_hash, + schema, + ); + self.process_address( + action.refund_address, + "suicide_refund", + trace.block_number, + tx_hash, + schema, + ); + } + Action::Reward(action) => { + self.process_address( + action.author, + "author", + trace.block_number, + tx_hash, + schema, + ); + } + } + + if let Some(Res::Create(result)) = &trace.result { + self.process_address( + result.address, + "create", + trace.block_number, + tx_hash, + schema, + ); + }; + + current_tx_hash = tx_hash; + } + } + } + + fn process_address( + &mut self, + address: H160, + relationship: &str, + block_number: u64, + transaction_hash: H256, + schema: &Table, + ) { + self.n_rows += 1; + if schema.has_column("address") { + self.address.push(address.as_bytes().to_vec()); + } + if schema.has_column("relationship") { + self.relationship.push(relationship.to_string()); + } + if schema.has_column("block_number") { + self.block_number.push(block_number as u32); + } + if schema.has_column("transaction_hash") { + self.transaction_hash.push(transaction_hash.as_bytes().to_vec()); + } + } + + fn create_df(self, schema: &Table, chain_id: u64) -> Result { + let mut cols = Vec::with_capacity(schema.columns().len()); + with_series!(cols, "block_number", self.block_number, schema); + with_series_binary!(cols, "transaction_hash", self.transaction_hash, schema); + with_series_binary!(cols, "address", self.address, schema); + with_series!(cols, "relationship", self.relationship, schema); + with_series!(cols, "chain_id", vec![chain_id; self.n_rows], schema); + + DataFrame::new(cols).map_err(CollectError::PolarsError).sort_by_schema(schema) + } +} diff --git a/crates/freeze/src/types/chunks/binary_chunk.rs b/crates/freeze/src/types/chunks/binary_chunk.rs index 71ed3775..6401f6dd 100644 --- a/crates/freeze/src/types/chunks/binary_chunk.rs +++ b/crates/freeze/src/types/chunks/binary_chunk.rs @@ -44,3 +44,13 @@ impl ChunkData for BinaryChunk { } } } + +impl BinaryChunk { + /// get list of values in chunk + pub fn values(&self) -> &Vec> { + match self { + BinaryChunk::Values(values) => values, + BinaryChunk::Range(_start, _end) => panic!("values not implemented for binary ranges"), + } + } +} diff --git a/crates/freeze/src/types/chunks/chunk.rs b/crates/freeze/src/types/chunks/chunk.rs index 5699f3b7..2b1a56c2 100644 --- a/crates/freeze/src/types/chunks/chunk.rs +++ b/crates/freeze/src/types/chunks/chunk.rs @@ -12,6 +12,12 @@ pub type TransactionChunk = BinaryChunk; /// address chunk pub type AddressChunk = BinaryChunk; +/// slot chunk +pub type SlotChunk = BinaryChunk; + +/// call data chunk +pub type CallDataChunk = BinaryChunk; + /// Chunk of data #[derive(Debug, Clone)] pub enum Chunk { diff --git a/crates/freeze/src/types/chunks/mod.rs b/crates/freeze/src/types/chunks/mod.rs index dd8a3b47..8b986f06 100644 --- a/crates/freeze/src/types/chunks/mod.rs +++ b/crates/freeze/src/types/chunks/mod.rs @@ -4,6 +4,6 @@ pub(crate) mod chunk_ops; pub(crate) mod number_chunk; pub(crate) mod subchunks; -pub use chunk::{AddressChunk, BlockChunk, Chunk, TransactionChunk}; +pub use chunk::{AddressChunk, BlockChunk, CallDataChunk, Chunk, SlotChunk, TransactionChunk}; pub use chunk_ops::ChunkData; pub use subchunks::Subchunk; diff --git a/crates/freeze/src/types/conversions.rs b/crates/freeze/src/types/conversions.rs index 9fbea194..11224006 100644 --- a/crates/freeze/src/types/conversions.rs +++ b/crates/freeze/src/types/conversions.rs @@ -11,8 +11,8 @@ pub trait ToVecU8 { impl ToVecU8 for U256 { fn to_vec_u8(&self) -> Vec { let mut vec = Vec::new(); - for &number in self.0.iter() { - vec.extend_from_slice(&number.to_ne_bytes()); + for &number in self.0.iter().rev() { + vec.extend_from_slice(&number.to_be_bytes()); } vec } @@ -22,8 +22,8 @@ impl ToVecU8 for Vec { fn to_vec_u8(&self) -> Vec { let mut vec = Vec::new(); for value in self { - for &number in value.0.iter() { - vec.extend_from_slice(&number.to_ne_bytes()); + for &number in value.0.iter().rev() { + vec.extend_from_slice(&number.to_be_bytes()); } } vec diff --git a/crates/freeze/src/types/dataframes/mod.rs b/crates/freeze/src/types/dataframes/mod.rs index e999cd19..a1f72dc9 100644 --- a/crates/freeze/src/types/dataframes/mod.rs +++ b/crates/freeze/src/types/dataframes/mod.rs @@ -1,8 +1,10 @@ mod export; +mod read; mod sort; #[macro_use] mod creation; pub(crate) use export::*; +pub use read::*; pub(crate) use sort::SortableDataFrame; diff --git a/crates/freeze/src/types/dataframes/read.rs b/crates/freeze/src/types/dataframes/read.rs new file mode 100644 index 00000000..c18da73d --- /dev/null +++ b/crates/freeze/src/types/dataframes/read.rs @@ -0,0 +1,31 @@ +use crate::ParseError; +use polars::prelude::*; + +/// read single binary column of parquet file as Vec +pub fn read_binary_column(path: &str, column: &str) -> Result>, ParseError> { + let file = std::fs::File::open(path) + .map_err(|_e| ParseError::ParseError("could not open file path".to_string()))?; + + let df = ParquetReader::new(file) + .with_columns(Some(vec![column.to_string()])) + .finish() + .map_err(|_e| ParseError::ParseError("could not read data from column".to_string()))?; + + let series = df + .column(column) + .map_err(|_e| ParseError::ParseError("could not get column".to_string()))? + .unique() + .map_err(|_e| ParseError::ParseError("could not get column".to_string()))?; + + let ca = series + .binary() + .map_err(|_e| ParseError::ParseError("could not convert to binary column".to_string()))?; + + ca.into_iter() + .map(|value| { + value + .ok_or_else(|| ParseError::ParseError("transaction hash missing".to_string())) + .map(|data| data.into()) + }) + .collect() +} diff --git a/crates/freeze/src/types/datatypes/scalar.rs b/crates/freeze/src/types/datatypes/scalar.rs index f63f9af1..a83a4f55 100644 --- a/crates/freeze/src/types/datatypes/scalar.rs +++ b/crates/freeze/src/types/datatypes/scalar.rs @@ -10,52 +10,104 @@ use crate::types::{ /// Balance Diffs Dataset pub struct BalanceDiffs; +/// Balances Dataset +pub struct Balances; /// Blocks Dataset pub struct Blocks; /// Code Diffs Dataset pub struct CodeDiffs; +/// Codes Dataset +pub struct Codes; +/// Contracts Dataset +pub struct Contracts; +/// Erc20 Balances Dataset +pub struct Erc20Balances; +/// Erc20 Metadata Dataset +pub struct Erc20Metadata; +/// Erc20 Supplies Dataset +pub struct Erc20Supplies; +/// Erc20 Transfers Dataset +pub struct Erc20Transfers; +/// Erc721 Metadata Dataset +pub struct Erc721Metadata; +/// Erc721 Transfers Dataset +pub struct Erc721Transfers; +/// Eth Calls Dataset +pub struct EthCalls; /// Logs Dataset pub struct Logs; /// Nonce Diffs Dataset pub struct NonceDiffs; +/// Nonces Dataset +pub struct Nonces; /// Storage Diffs Dataset pub struct StorageDiffs; +/// Storage Dataset +pub struct Storages; /// Traces Dataset pub struct Traces; +/// Trace Calls Dataset +pub struct TraceCalls; /// Transactions Dataset pub struct Transactions; +/// Transaction Addresses Dataset +pub struct TransactionAddresses; /// VmTraces Dataset pub struct VmTraces; /// Native Transfers Dataset pub struct NativeTransfers; -/// Contracts Dataset -pub struct Contracts; /// enum of possible datatypes that cryo can collect #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize)] pub enum Datatype { /// Balance Diffs BalanceDiffs, + /// Balances + Balances, /// Blocks Blocks, /// Code Diffs CodeDiffs, + /// Codes + Codes, + /// Contracts + Contracts, + /// Erc20 Balances + Erc20Balances, + /// Erc20 Metadata + Erc20Metadata, + /// Erc20 Supplies + Erc20Supplies, + /// Erc20 Transfers + Erc20Transfers, + /// Erc721 Metadata + Erc721Metadata, + /// Erc721 Transfers + Erc721Transfers, + /// Eth Calls + EthCalls, /// Logs Logs, /// Nonce Diffs NonceDiffs, - /// Transactions - Transactions, - /// Traces - Traces, + /// Nonces + Nonces, /// Storage Diffs StorageDiffs, + /// Storage + Storages, + /// Traces + Traces, + /// Trace Calls + TraceCalls, + /// Transactions + Transactions, + /// Transaction Addresses + TransactionAddresses, /// VmTraces VmTraces, /// Native Transfers NativeTransfers, - /// Contracts - Contracts, } impl Datatype { @@ -63,16 +115,29 @@ impl Datatype { pub fn dataset(&self) -> Box { match *self { Datatype::BalanceDiffs => Box::new(BalanceDiffs), + Datatype::Balances => Box::new(Balances), Datatype::Blocks => Box::new(Blocks), Datatype::CodeDiffs => Box::new(CodeDiffs), + Datatype::Codes => Box::new(Codes), + Datatype::Contracts => Box::new(Contracts), + Datatype::Erc20Balances => Box::new(Erc20Balances), + Datatype::Erc20Metadata => Box::new(Erc20Metadata), + Datatype::Erc20Supplies => Box::new(Erc20Supplies), + Datatype::Erc20Transfers => Box::new(Erc20Transfers), + Datatype::Erc721Metadata => Box::new(Erc721Metadata), + Datatype::Erc721Transfers => Box::new(Erc721Transfers), + Datatype::EthCalls => Box::new(EthCalls), Datatype::Logs => Box::new(Logs), Datatype::NonceDiffs => Box::new(NonceDiffs), - Datatype::Transactions => Box::new(Transactions), - Datatype::Traces => Box::new(Traces), + Datatype::Nonces => Box::new(Nonces), Datatype::StorageDiffs => Box::new(StorageDiffs), + Datatype::Storages => Box::new(Storages), + Datatype::Traces => Box::new(Traces), + Datatype::TraceCalls => Box::new(TraceCalls), + Datatype::Transactions => Box::new(Transactions), + Datatype::TransactionAddresses => Box::new(TransactionAddresses), Datatype::VmTraces => Box::new(VmTraces), Datatype::NativeTransfers => Box::new(NativeTransfers), - Datatype::Contracts => Box::new(Contracts), } } } @@ -97,6 +162,16 @@ pub trait Dataset: Sync + Send { /// default sort order for dataset fn default_sort(&self) -> Vec; + /// default blocks for dataset + fn default_blocks(&self) -> Option { + None + } + + /// input arg aliases + fn arg_aliases(&self) -> HashMap { + HashMap::new() + } + /// collect dataset for a particular chunk async fn collect_chunk( &self, diff --git a/crates/freeze/src/types/mod.rs b/crates/freeze/src/types/mod.rs index a1dfcc82..29dfb07a 100644 --- a/crates/freeze/src/types/mod.rs +++ b/crates/freeze/src/types/mod.rs @@ -24,8 +24,12 @@ pub mod schemas; /// types related to summaries pub mod summaries; -pub use chunks::{AddressChunk, BlockChunk, Chunk, ChunkData, Subchunk, TransactionChunk}; +pub use chunks::{ + AddressChunk, BlockChunk, CallDataChunk, Chunk, ChunkData, SlotChunk, Subchunk, + TransactionChunk, +}; pub use conversions::{ToVecHex, ToVecU8}; +pub use dataframes::*; pub use datatypes::*; pub use files::{ColumnEncoding, FileFormat, FileOutput}; pub use queries::{MultiQuery, RowFilter, SingleQuery}; diff --git a/crates/freeze/src/types/queries.rs b/crates/freeze/src/types/queries.rs index e8982da6..5522a3d9 100644 --- a/crates/freeze/src/types/queries.rs +++ b/crates/freeze/src/types/queries.rs @@ -7,7 +7,7 @@ use ethers::prelude::*; use crate::{ types::{Chunk, Datatype, Table}, - CollectError, FileOutput, FreezeError, + AddressChunk, CallDataChunk, CollectError, FileOutput, FreezeError, SlotChunk, }; /// Query multiple data types @@ -69,12 +69,124 @@ fn list_files(dir: &Path) -> Result, std::io::Error> { } /// Options for fetching logs -#[derive(Clone)] +#[derive(Clone, Default)] pub struct RowFilter { /// topics to filter for pub topics: [Option>>; 4], /// address to filter for pub address: Option>, + /// address chunks to collect + pub address_chunks: Option>, + /// contract chunks to collect + pub contract_chunks: Option>, + /// to_address chunks to collect + pub to_address_chunks: Option>, + /// slot chunks to collect + pub slot_chunks: Option>, + /// call_data chunks to collect + pub call_data_chunks: Option>, +} + +impl RowFilter { + pub(crate) fn address_chunks(&self) -> Result, CollectError> { + match &self.address_chunks { + Some(address_chunks) => Ok(address_chunks.clone()), + _ => Err(CollectError::CollectError("must specify --address".to_string())), + } + } + + pub(crate) fn contract_chunks(&self) -> Result, CollectError> { + match &self.contract_chunks { + Some(contract_chunks) => Ok(contract_chunks.clone()), + _ => Err(CollectError::CollectError("must specify --contract".to_string())), + } + } + + // pub(crate) fn to_address_chunks(&self) -> Result, CollectError> { + // match &self.to_address_chunks { + // Some(to_address_chunks) => Ok(to_address_chunks.clone()), + // _ => Err(CollectError::CollectError("must specify --to-address".to_string())), + // } + // } + + // pub(crate) fn from_address_chunks(&self) -> Result, CollectError> { + // match &self.from_address_chunks { + // Some(from_address_chunks) => Ok(from_address_chunks.clone()), + // _ => Err(CollectError::CollectError("must specify --from-address".to_string())), + // } + // } + + pub(crate) fn slot_chunks(&self) -> Result, CollectError> { + match &self.slot_chunks { + Some(slot_chunks) => Ok(slot_chunks.clone()), + _ => Err(CollectError::CollectError("must specify slots".to_string())), + } + } + + pub(crate) fn call_data_chunks(&self) -> Result, CollectError> { + match &self.call_data_chunks { + Some(call_data_chunks) => Ok(call_data_chunks.clone()), + _ => Err(CollectError::CollectError("must specify call_data".to_string())), + } + } + + /// apply arg aliases + pub fn apply_arg_aliases(&self, arg_aliases: HashMap) -> RowFilter { + let mut row_filter: RowFilter = self.clone(); + for (from, to) in arg_aliases.iter() { + row_filter = match from.as_str() { + "address" => match to.as_str() { + "contract" => RowFilter { + contract_chunks: row_filter.address_chunks.clone(), + address_chunks: None, + ..row_filter.clone() + }, + "to_address" => RowFilter { + to_address_chunks: row_filter.address_chunks.clone(), + address_chunks: None, + ..row_filter.clone() + }, + _ => { + panic!("invalid alias") + } + }, + "contract" => match to.as_str() { + "address" => RowFilter { + address_chunks: row_filter.address_chunks.clone(), + contract_chunks: None, + ..row_filter.clone() + }, + "to_address" => RowFilter { + to_address_chunks: row_filter.contract_chunks.clone(), + contract_chunks: None, + ..row_filter.clone() + }, + _ => { + panic!("invalid alias") + } + }, + "to_address" => match to.as_str() { + "address" => RowFilter { + address_chunks: row_filter.to_address_chunks.clone(), + to_address_chunks: None, + ..row_filter.clone() + }, + "contract" => RowFilter { + contract_chunks: row_filter.to_address_chunks.clone(), + to_address_chunks: None, + ..row_filter.clone() + }, + _ => { + panic!("invalid alias") + } + }, + _ => { + panic!("invalid alias") + } + }; + } + row_filter + } } impl From for SingleQuery { diff --git a/crates/python/src/collect_adapter.rs b/crates/python/src/collect_adapter.rs index 7451b20e..440aa25e 100644 --- a/crates/python/src/collect_adapter.rs +++ b/crates/python/src/collect_adapter.rs @@ -38,6 +38,13 @@ use cryo_freeze::collect; compression = vec!["lz4".to_string()], report_dir = None, no_report = false, + address = None, + to_address = None, + from_address = None, + call_data = None, + function = None, + inputs = None, + slots = None, contract = None, topic0 = None, topic1 = None, @@ -81,7 +88,14 @@ pub fn _collect( compression: Vec, report_dir: Option, no_report: bool, - contract: Option, + address: Option>, + to_address: Option>, + from_address: Option>, + call_data: Option>, + function: Option>, + inputs: Option>, + slots: Option>, + contract: Option>, topic0: Option, topic1: Option, topic2: Option, @@ -121,6 +135,13 @@ pub fn _collect( compression, report_dir, no_report, + address, + to_address, + from_address, + call_data, + function, + inputs, + slots, contract, topic0, topic1, diff --git a/crates/python/src/freeze_adapter.rs b/crates/python/src/freeze_adapter.rs index bb0291cd..372a662e 100644 --- a/crates/python/src/freeze_adapter.rs +++ b/crates/python/src/freeze_adapter.rs @@ -39,6 +39,13 @@ use cryo_cli::{run, Args}; compression = vec!["lz4".to_string()], report_dir = None, no_report = false, + address = None, + to_address = None, + from_address = None, + call_data = None, + function = None, + inputs = None, + slots = None, contract = None, topic0 = None, topic1 = None, @@ -82,7 +89,14 @@ pub fn _freeze( compression: Vec, report_dir: Option, no_report: bool, - contract: Option, + address: Option>, + to_address: Option>, + from_address: Option>, + call_data: Option>, + function: Option>, + inputs: Option>, + slots: Option>, + contract: Option>, topic0: Option, topic1: Option, topic2: Option, @@ -122,6 +136,13 @@ pub fn _freeze( compression, report_dir, no_report, + address, + to_address, + from_address, + call_data, + function, + inputs, + slots, contract, topic0, topic1, From 20d94d1b24acab59664dbebded808136a0de17b8 Mon Sep 17 00:00:00 2001 From: sslivkoff Date: Fri, 15 Sep 2023 19:48:07 -0700 Subject: [PATCH 8/8] clean merged code --- collect.py | 29 -------- collect.rs | 10 --- crates/cli/src/args.rs | 4 +- crates/freeze/src/datasets/eth_calls.rs | 23 ------- crates/freeze/src/datasets/logs.rs | 91 ++++++++++++++----------- crates/freeze/src/types/conversions.rs | 8 +-- crates/freeze/src/types/queries.rs | 12 ++-- 7 files changed, 63 insertions(+), 114 deletions(-) delete mode 100644 collect.py delete mode 100644 collect.rs diff --git a/collect.py b/collect.py deleted file mode 100644 index dc24b507..00000000 --- a/collect.py +++ /dev/null @@ -1,29 +0,0 @@ -# The Ordering Dimension -# - blocks vs transactions are the same dimension, just different levels of granularity -# - selections in the ordering dimension are -# - sometimes over a single point in time -# - sometimes over a range of time - - -def freeze(query, datasets): - # a query starts off as a seires of lists of chunks - subqueries = partition_query(query, partition_by) - for dataset in datasets: - for subquery in subqueries: - collect_partition(subquery, dataset) - - -# break a query into subqueries -def partition_query(query, partition_by): - return [ - create_parition_query(partition, query) - for partition in create_partitions(query, partition_by): - ] - - -def collect_partition(query, dataset): - for request in get_query_requests(query): - dataset.perform_request(request) - df = results_to_df() - - diff --git a/collect.rs b/collect.rs deleted file mode 100644 index 8630e138..00000000 --- a/collect.rs +++ /dev/null @@ -1,10 +0,0 @@ - - - -fn collect(filter: RowFitler) { - -} - - - - diff --git a/crates/cli/src/args.rs b/crates/cli/src/args.rs index d87440a0..dc642e0d 100644 --- a/crates/cli/src/args.rs +++ b/crates/cli/src/args.rs @@ -152,11 +152,11 @@ pub struct Args { pub address: Option>, /// To Address - #[arg(long, help_heading = "Dataset-specific Options", num_args(1..))] + #[arg(long, help_heading = "Dataset-specific Options", num_args(1..), value_name="TO")] pub to_address: Option>, /// From Address - #[arg(long, help_heading = "Dataset-specific Options", num_args(1..))] + #[arg(long, help_heading = "Dataset-specific Options", num_args(1..), value_name="FROM")] pub from_address: Option>, /// [eth_calls] Call data to use for eth_calls diff --git a/crates/freeze/src/datasets/eth_calls.rs b/crates/freeze/src/datasets/eth_calls.rs index 4287c36e..dab469ea 100644 --- a/crates/freeze/src/datasets/eth_calls.rs +++ b/crates/freeze/src/datasets/eth_calls.rs @@ -1,26 +1,3 @@ -// want to be able to include either the raw call data or the decoded arguments -// want to be able to include raw_output or decoded columns -// - could do output_decoded as a json object -// -// ways to specify call data -// - give complete raw call data `--call-data` -// - 0x28797abc -// - specify function and arguments separately -// - give function `--function` -// - by name: "totalSupply()" "balanceOf(address)" -// - by json: '{"name": "balanceOf", ...}" -// - give input arguments `--inputs -// - abi encoded: 0x28797abc -// - raw: 5000 -// - give semantic call -// - totalSupply() -// - balanceOf(0x28797abc...278) -// -// ways to specify output type -// - ignore, store only raw output data -// - provide function abi json -// - provide --call-output, e.g. `--call-output u256` - use crate::{conversions::ToVecHex, types::EthCalls, ColumnType, Dataset, Datatype}; use std::collections::HashMap; use tokio::{sync::mpsc, task}; diff --git a/crates/freeze/src/datasets/logs.rs b/crates/freeze/src/datasets/logs.rs index 3b4fe99f..a5e16be8 100644 --- a/crates/freeze/src/datasets/logs.rs +++ b/crates/freeze/src/datasets/logs.rs @@ -488,21 +488,32 @@ mod test { use super::*; use polars::prelude::DataType::Boolean; - // fn make_log_decoder() -> LogDecoder { - // let e = HumanReadableParser::parse_event(RAW).unwrap(); - - // LogDecoder { raw: RAW.to_string(), event: e.clone() } - // } - - // #[test] - // fn test_mapping_log_into_type_columns() { - // let decoder = make_log_decoder(); - // // let log = serde_json::from_str::(RAW_LOG).unwrap(); - // // let m = decoder.parse_log_from_event(vec![log]); - // assert_eq!(m.len(), 2); - // assert_eq!(m.get("msgSender").unwrap().len(), 1); - // assert_eq!(m.get("mintQuantity").unwrap().len(), 1); - // } + const RAW: &str = "event NewMint(address indexed msgSender, uint256 indexed mintQuantity)"; + const RAW_LOG: &str = r#"{ + "address": "0x0000000000000000000000000000000000000000", + "topics": [ + "0x52277f0b4a9b555c5aa96900a13546f972bda413737ec164aac947c87eec6024", + "0x00000000000000000000000062a73d9116eda78a78f4cf81602bdc926fb4c0dd", + "0x0000000000000000000000000000000000000000000000000000000000000003" + ], + "data": "0x" + }"#; + + fn make_log_decoder() -> LogDecoder { + let e = HumanReadableParser::parse_event(RAW).unwrap(); + + LogDecoder { raw: RAW.to_string(), event: e.clone() } + } + + #[test] + fn test_mapping_log_into_type_columns() { + let decoder = make_log_decoder(); + let log = serde_json::from_str::(RAW_LOG).unwrap(); + let m = decoder.parse_log_from_event(vec![log]); + assert_eq!(m.len(), 2); + assert_eq!(m.get("msgSender").unwrap().len(), 1); + assert_eq!(m.get("mintQuantity").unwrap().len(), 1); + } #[test] fn test_parsing_bools() { @@ -544,29 +555,29 @@ mod test { assert_eq!(s.len(), 2) } - // #[test] - // fn test_parsing_big_ints() { - // let s = make_log_decoder() - // .make_series( - // "msgSender".to_string(), - // vec![Token::Int(U256::max_value()), Token::Int(2.into())], - // 2, - // ) - // .unwrap(); - // assert_eq!(s.dtype(), &DataType::Utf8); - // assert_eq!(s.len(), 2) - // } - - // #[test] - // fn test_parsing_addresses() { - // let s = make_log_decoder() - // .make_series( - // "ints".to_string(), - // vec![Token::Address(Address::zero()), Token::Address(Address::zero())], - // 2, - // ) - // .unwrap(); - // assert_eq!(s.dtype(), &DataType::Utf8); - // assert_eq!(s.len(), 2) - // } + #[test] + fn test_parsing_big_ints() { + let s = make_log_decoder() + .make_series( + "msgSender".to_string(), + vec![Token::Int(U256::max_value()), Token::Int(2.into())], + 2, + ) + .unwrap(); + assert_eq!(s.dtype(), &DataType::Utf8); + assert_eq!(s.len(), 2) + } + + #[test] + fn test_parsing_addresses() { + let s = make_log_decoder() + .make_series( + "ints".to_string(), + vec![Token::Address(Address::zero()), Token::Address(Address::zero())], + 2, + ) + .unwrap(); + assert_eq!(s.dtype(), &DataType::Utf8); + assert_eq!(s.len(), 2) + } } diff --git a/crates/freeze/src/types/conversions.rs b/crates/freeze/src/types/conversions.rs index 11224006..9fbea194 100644 --- a/crates/freeze/src/types/conversions.rs +++ b/crates/freeze/src/types/conversions.rs @@ -11,8 +11,8 @@ pub trait ToVecU8 { impl ToVecU8 for U256 { fn to_vec_u8(&self) -> Vec { let mut vec = Vec::new(); - for &number in self.0.iter().rev() { - vec.extend_from_slice(&number.to_be_bytes()); + for &number in self.0.iter() { + vec.extend_from_slice(&number.to_ne_bytes()); } vec } @@ -22,8 +22,8 @@ impl ToVecU8 for Vec { fn to_vec_u8(&self) -> Vec { let mut vec = Vec::new(); for value in self { - for &number in value.0.iter().rev() { - vec.extend_from_slice(&number.to_be_bytes()); + for &number in value.0.iter() { + vec.extend_from_slice(&number.to_ne_bytes()); } } vec diff --git a/crates/freeze/src/types/queries.rs b/crates/freeze/src/types/queries.rs index 5522a3d9..3ec41af2 100644 --- a/crates/freeze/src/types/queries.rs +++ b/crates/freeze/src/types/queries.rs @@ -102,12 +102,12 @@ impl RowFilter { } } - // pub(crate) fn to_address_chunks(&self) -> Result, CollectError> { - // match &self.to_address_chunks { - // Some(to_address_chunks) => Ok(to_address_chunks.clone()), - // _ => Err(CollectError::CollectError("must specify --to-address".to_string())), - // } - // } + pub(crate) fn to_address_chunks(&self) -> Result, CollectError> { + match &self.to_address_chunks { + Some(to_address_chunks) => Ok(to_address_chunks.clone()), + _ => Err(CollectError::CollectError("must specify --to-address".to_string())), + } + } // pub(crate) fn from_address_chunks(&self) -> Result, CollectError> { // match &self.from_address_chunks {