Skip to content

Commit

Permalink
Raising Environment Error on Single Node Clusters (#40)
Browse files Browse the repository at this point in the history
* catching single node configurations

* test added

* environ.get over getenv

* updated error message
  • Loading branch information
benrutter authored Jun 3, 2024
1 parent 6df6c46 commit df8be09
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 1 deletion.
9 changes: 8 additions & 1 deletion dask_databricks/databrickscluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,19 @@ def __init__(
loop: Optional[IOLoop] = None,
asynchronous: bool = False,
):
self.spark_local_ip = os.getenv("SPARK_LOCAL_IP")
self.spark_local_ip = os.environ.get("SPARK_LOCAL_IP")
if self.spark_local_ip is None:
raise KeyError(
"Unable to find expected environment variable SPARK_LOCAL_IP. "
"Are you running this on a Databricks driver node?"
)
if os.environ.get("MASTER") and "local[" in os.environ.get("MASTER"):
raise EnvironmentError(
"You appear to be trying to run a multi-node Dask cluster on a "
"single-node databricks cluster. Maybe you want "
"`dask.distributed.LocalCluster().get_client()` instead"

)
try:
name = spark.conf.get("spark.databricks.clusterUsageTags.clusterId")
except AttributeError:
Expand Down
8 changes: 8 additions & 0 deletions dask_databricks/tests/test_databricks.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ def test_databricks_cluster_raises_key_error_when_initialised_outside_of_databri
with pytest.raises(KeyError):
DatabricksCluster()

def test_databricks_cluster_raises_environment_error_when_master_variable_implies_single_node(
monkeypatch,
set_spark_local_ip,
dask_cluster,
):
monkeypatch.setenv("MASTER", "local[8]")
with pytest.raises(EnvironmentError):
DatabricksCluster()

def test_databricks_cluster_create(set_spark_local_ip, dask_cluster):
cluster = DatabricksCluster()
Expand Down

0 comments on commit df8be09

Please sign in to comment.