-
Notifications
You must be signed in to change notification settings - Fork 176
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: add experimental remote HDFS support for native DataFusion reader #1359
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -73,7 +73,7 @@ use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctio | |
|
||
use crate::execution::shuffle::CompressionCodec; | ||
use crate::execution::spark_plan::SparkPlan; | ||
use crate::parquet::parquet_support::SparkParquetOptions; | ||
use crate::parquet::parquet_support::{register_object_store, SparkParquetOptions}; | ||
use crate::parquet::schema_adapter::SparkSchemaAdapterFactory; | ||
use datafusion::datasource::listing::PartitionedFile; | ||
use datafusion::datasource::physical_plan::parquet::ParquetExecBuilder; | ||
|
@@ -1155,12 +1155,9 @@ impl PhysicalPlanner { | |
)) | ||
}); | ||
|
||
let object_store = object_store::local::LocalFileSystem::new(); | ||
// register the object store with the runtime environment | ||
let url = Url::try_from("file://").unwrap(); | ||
self.session_ctx | ||
.runtime_env() | ||
.register_object_store(&url, Arc::new(object_store)); | ||
// By default, local FS object store registered | ||
// if `hdfs` feature enabled then HDFS file object store registered | ||
register_object_store(Arc::clone(&self.session_ctx))?; | ||
|
||
// Generate file groups | ||
let mut file_groups: Vec<Vec<PartitionedFile>> = | ||
|
@@ -1220,7 +1217,7 @@ impl PhysicalPlanner { | |
// TODO: I think we can remove partition_count in the future, but leave for testing. | ||
assert_eq!(file_groups.len(), partition_count); | ||
|
||
let object_store_url = ObjectStoreUrl::local_filesystem(); | ||
let object_store_url = ObjectStoreUrl::parse("hdfs://namenode:9000").unwrap(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this will be addressed in #1360 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The url should be available as part of the file path passed in. (see line 1178 above) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks @parthchandra it is already fixed. |
||
let partition_fields: Vec<Field> = partition_schema | ||
.fields() | ||
.iter() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@andygrove I'm keeping the updated HDFS object storage in personal repo for now, let me know if there any concerns