Skip to content

Commit

Permalink
updated miniconda specific image version.
Browse files Browse the repository at this point in the history
  • Loading branch information
maaz112233 committed Jul 27, 2023
1 parent 1ee4a3d commit 672769d
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 4 deletions.
5 changes: 4 additions & 1 deletion examples/etl_pipeline/etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,19 @@ def extract_transform_load(dataset: DirectoryPath) -> DirectoryPath:

df = pd.DataFrame(data)
print(f"Total Number of universities from API {len(data)}")

df = df[df["name"].str.contains("California")]
print(f"Number of universities in california {len(df)}")

df["domains"] = [",".join(map(str, l)) for l in df["domains"]]
df["web_pages"] = [",".join(map(str, l)) for l in df["web_pages"]]
df = df.reset_index(drop=True)
df = df[["domains", "country", "web_pages", "name"]]

outdir = task.context().output
file_name = outdir / "file.csv"
print(df.head)
df.to_csv(file_name, sep="\t", encoding="utf-8")

return outdir


Expand Down
4 changes: 1 addition & 3 deletions pirlib/cli/dockerize.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def _generate_dockerfile(context_path: pathlib.Path, docker_base_image: str) ->

return "\n".join(
[
"FROM continuumio/miniconda3:latest AS base",
"FROM continuumio/miniconda3:4.12.0 AS base",
"ARG CONDA_ENV_B64",
"RUN echo $CONDA_ENV_B64 | base64 -d > /tmp/environment.yml",
"RUN conda env create -n pircli -f /tmp/environment.yml",
Expand All @@ -131,8 +131,6 @@ def _generate_dockerfile(context_path: pathlib.Path, docker_base_image: str) ->
f"WORKDIR {workdir}",
f"ENV PYTHONPATH={pythonpath}",
f'ENV PATH="{miniconda3}/envs/pircli/bin":$PATH',
"ENV PYSPARK_PYTHON=python3.8",
"ENV PYSPARK_DRIVER_PYTHON=python3.8",
]
)

Expand Down

0 comments on commit 672769d

Please sign in to comment.