From d25eacb867d8d0bc45c4017de2c5c2de5d5cca5b Mon Sep 17 00:00:00 2001 From: Robin Engler Date: Fri, 3 Mar 2023 18:02:10 +0100 Subject: [PATCH] notebook 04: move part of exercise 4.2 to Additional Exercises --- notebooks/04_modules.ipynb | 80 +++++++++++-- notebooks/04_modules_exercises.ipynb | 55 +++++++-- ...ise_43_module.py => exercise_44_module.py} | 0 notebooks/solutions/solution_42.py | 33 ------ notebooks/solutions/solution_43.py | 111 ++++++------------ notebooks/solutions/solution_44.py | 75 ++++++++++++ 6 files changed, 230 insertions(+), 124 deletions(-) rename notebooks/{exercise_43_module.py => exercise_44_module.py} (100%) create mode 100644 notebooks/solutions/solution_44.py diff --git a/notebooks/04_modules.ipynb b/notebooks/04_modules.ipynb index 94fd875..1f59d6f 100644 --- a/notebooks/04_modules.ipynb +++ b/notebooks/04_modules.ipynb @@ -48,7 +48,7 @@ "Good news: almost everything you will want to do in Python has already been implemented by someone else. \n", "Many workflows have been developed into **modules** which can be **imported** into your Python session.\n", "\n", - "There are quite a few modules which come bundled with the basic Python installation (native modules), and even more if you installed Python via the **Anaconda distribution** (which you in principle you have for this course).\n", + "There are quite a few modules which come bundled with the basic Python installation (native modules), and even more if you installed Python via the **Anaconda distribution** (which in principle you did for this course).\n", "\n", "Additional packages with modules can be installed to your (environment-specific) library using the `conda package manager` or `pip`, both of which are shipped with Anaconda. \n", "\n", @@ -118,7 +118,7 @@ "source": [ "
\n", "\n", - "**Warning:** trying to call a function directly (in this case `mean()`), without prefixing it with its module name raises a **`NameError`**, because the name of individual functions are not imported into your python session's namespace." + "* **Warning:** trying to call a function directly (in this case `mean()`), without prefixing it with its module name raises a **`NameError`**, because the name of individual functions are not imported into your python session's namespace." ] }, { @@ -219,7 +219,23 @@ "outputs": [], "source": [ "# Something to avoid !\n", - "from pandas import *" + "from pandas import *\n", + "\n", + "# Display objects in namespace with the \"%whos\" jupyter command.\n", + "%whos" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "import pandas\n", + "\n", + "%whos" ] }, { @@ -262,7 +278,14 @@ "* **Organize your code** into multiple files, e.g. your main workflow in one file, and functions \n", " grouped by category in different files (modules).\n", "\n", - "This is done exactly like with built-in and external modules:" + "Importing your own module is done exactly like with built-in and external modules.\n", + "\n", + "
\n", + "\n", + "**Example:** import a module `my_own_module` from the file `my_own_module.py`.\n", + "* *Note:* The following example works because `my_own_module.py` is located in the\n", + " current working directory. More generally, modules files must be stored at specific\n", + " locations to be importable." ] }, { @@ -290,26 +313,42 @@ "my_own_module.greeting(my_own_module.DEFAULT_USER)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "\n", + "* **Importing individual objects** from the module." + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# Importing individual objects from the module.\n", "from my_own_module import greeting, DEFAULT_USER\n", "\n", "greeting(name=\"Bob\")\n", "greeting(DEFAULT_USER)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "\n", + "* Importing the module as an **alias**." + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# Importing the module as an alias.\n", "import my_own_module as mom\n", "\n", "mom.greeting(name=\"James\")" @@ -391,7 +430,7 @@ "* `os.path.dirname(path)` - returns the parent directory of the last element of a path.\n", "* `os.path.isfile(path)` - returns `True` if `path` is an existing regular file (note: follows symlinks\n", " -> returns `True` for symlinks).\n", - "* `os.path.isdir()` - returns `True` if `path` is an existing directory.\n", + "* `os.path.isdir(path)` - returns `True` if `path` is an existing directory.\n", "* `os.path.join(path1, path2, ...)` - returns a new path by appending all paths passed as arguments one after the other.\n", "\n", "
\n", @@ -458,6 +497,27 @@ " print(\"Looks like this file does not exist!\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> **Note:** creating a path with `os.path.join()` vs. string concatenation.\n", + ">\n", + "> **Question:** in the example below, the objective is to , what type of problem does using\n", + " `os.path.join()` solve?\n", + " \n", + " ```python\n", + " current_wd = os.getcwd()\n", + " output_file = \"my_output.csv\"\n", + "\n", + " input_file = os.path.join(current_wd, file_name)\n", + " input_file = current_wd + \"/\" + file_name\n", + " \n", + " with open(output_file, \"w\") as f:\n", + " print(\"printing to file...\", file=f)\n", + " ```" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -465,7 +525,7 @@ "
\n", "\n", "* Example of a function that lists the content of a directory.\n", - " Can be used as **inspiration for exercise 4.1**" + " Can be used as **inspiration for exercise 4.2**" ] }, { @@ -805,12 +865,12 @@ "test_sequence = \"ATAGAGCGATCGATCCCTAG\"\n", "\n", "start_time = time.time() \n", - "revcomp_v1 = reverse_complement_v1(test_sequence)\n", + "rev_comp_v1 = reverse_complement_v1(test_sequence)\n", "time_v1 = time.time() - start_time\n", "print(time_v1)\n", "\n", "start_time = time.time()\n", - "revcomp_v2 = reverse_complement_v2(test_sequence)\n", + "rev_comp_v2 = reverse_complement_v2(test_sequence)\n", "time_v2 = time.time() - start_time\n", "print(time_v2)" ] diff --git a/notebooks/04_modules_exercises.ipynb b/notebooks/04_modules_exercises.ipynb index ffd2f0e..e6a2c5b 100644 --- a/notebooks/04_modules_exercises.ipynb +++ b/notebooks/04_modules_exercises.ipynb @@ -49,12 +49,11 @@ "\n", "## Exercise 4.2\n", "\n", - "Write a function that takes as argument the path of a directory and returns the number of files present in the directory (non-recursively, i.e. no need to search files in subdirectories).\n", - "\n", - "**Additional tasks (if you have time):**\n", + "Write a function that takes as argument the path of a directory and returns the number of files present in the directory (non-recursively, i.e. no need to search files in subdirectories). \n", "* Make sure your function is still working with a directory that is not the current working directory.\n", - "* Add an optional argument \"ignore_hidden\" that, when set to `True`, will ignore hidden files (i.e.\n", - " files whose name is starting with a dot, e.g. `.DS_Store`)." + "* **Hints:** you will need to use the `os.listdir()` and `os.path.isfile()` functions from the `os` module.\n", + "* **Warning:** the `os.path.isfile()` function requires that you either give the full path of the\n", + " file/directory you want to check (absolute or relative)." ] }, { @@ -94,10 +93,50 @@ "# Additional Exercises\n", "---------------------------------\n", "\n", - "\n", "## Exercise 4.3\n", "\n", - "Import the function `is_part_of_set` of the `exercise_43_module` module, located in the same directory as this notebook.\n", + "Re-use the function you wrote at exercise 4.2 above, and add the following improvement:\n", + "* Add an optional argument `ignore_hidden` that, when set to `True`, will ignore hidden files (i.e.\n", + " files whose name is starting with a dot, e.g. `.DS_Store`).\n", + "* The default value of `ignore_hidden` should be `False`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "\n", + "### Solution:\n", + "Uncomment and run the cell below to show the solution." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# %load solutions/solution_43.py" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
\n", + "\n", + "## Exercise 4.4\n", + "\n", + "Import the function `is_part_of_set` of the `exercise_44_module` module, located in the same directory as this notebook.\n", "1. What does `is_part_of_set` do ?\n", "2. Use `is_part_of_set` to get the list of all prime numbers between 2 to 50000.\n", "3. How long does this computation takes ?\n", @@ -132,7 +171,7 @@ "metadata": {}, "outputs": [], "source": [ - "# %load solutions/solution_43.py" + "# %load solutions/solution_44.py" ] } ], diff --git a/notebooks/exercise_43_module.py b/notebooks/exercise_44_module.py similarity index 100% rename from notebooks/exercise_43_module.py rename to notebooks/exercise_44_module.py diff --git a/notebooks/solutions/solution_42.py b/notebooks/solutions/solution_42.py index 25ddb3c..0655167 100644 --- a/notebooks/solutions/solution_42.py +++ b/notebooks/solutions/solution_42.py @@ -35,36 +35,3 @@ def count_files_2(input_dir): return sum( (1 for x in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, x))) ) - - -# Optional task: add an optional argument "ignore_hidden" that, when set to -# "True", will ignore hidden files (i.e. files whose name is starting with -# a dot, e.g. ".DS_Store") - -def count_files(dir_name, ignore_hidden=False): - """Counts files present in the input directory. - Only files are counted, directories are ignored. - """ - - # Initialize file counter. - file_count = 0 - - # Loop through all files and directories present in the input directory. - for f in os.listdir(path=dir_name): - - # Get the absolute path of the file/directory. - full_path = os.path.join(dir_name, f) - - # Verify the path corresponds to a file, not a directory. - if os.path.isfile(full_path) and not (ignore_hidden and f.startswith(".")): - file_count += 1 - - return file_count - -parent_dir = os.path.dirname(os.getcwd()) -print("File count in [", parent_dir, "]: ", count_files(parent_dir), sep="") -print( - "File count (excluding hidden files) in [", parent_dir, "]: ", - count_files(parent_dir, ignore_hidden=True), - sep="" -) \ No newline at end of file diff --git a/notebooks/solutions/solution_43.py b/notebooks/solutions/solution_43.py index eea427b..0108681 100644 --- a/notebooks/solutions/solution_43.py +++ b/notebooks/solutions/solution_43.py @@ -1,75 +1,40 @@ # Exercise 4.3 -# 1. What does is_part_of_set do? -# ******************************* -from exercise_43_module import is_part_of_set - -# help(is_part_of_set) -# "is_part_of_set()" returns True if its argument is a prime number -# and False otherwise. - -for n in range(10): - print(n, "->", is_part_of_set(n)) - - -# 2 and 3. Use is_part_of_set to get all primes numbers between 2 and 50000 -# ************************************************************************* -from exercise_43_module import is_part_of_set -from time import time - -primes = [] - -# Get the current time, so we can compute elapsed time at the end. -t0 = time() -for i in range(50000): - if is_part_of_set(i): - primes.append(i) - -time_first_algo = time() - t0 -print("it took", time_first_algo, "seconds") -print("found", len(primes), "prime numbers") - - -# Optional: devise a more time efficient way of getting the prime numbers -# *********************************************************************** -# -# Principle: rather than testing each number separately, we test the whole -# set of numbers at once by going over all number and "eliminating" all -# multiples of that number. -from time import time - -t0 = time() - -# Phase1 : initialization -upperLimit = 50000 - -# Create a list that contains True for all numbers we want to test. -# During the algorithm we will set all non-prime numbers to False. -arePrime = [True] * (upperLimit + 1) -arePrime[0] = False # 0 is not prime -arePrime[1] = False # 1 is not prime - - -# Phase2 : go through all numbers -primes2 = [] - -for i in range(2, upperLimit + 1): # for each candidate number - - # only do something if that number has not been set as a non-prime before - if arePrime[i]: - primes2.append(i) - - # then we want to set all multiples of that number as non-prime - mult = 2 * i - - # all multiples until the upper limit is reached - while mult <= upperLimit: - arePrime[mult] = False # set the multiple to non-prime - mult += i # nest multiple - -time_second_algo = time() - t0 - -print("it took", time() - t0, "seconds") -print("speedup compared to first algorithm:", time_first_algo / time_second_algo) -print("found", len(primes2), "prime numbers") -print("is this list the same as with the first algorithm ?", primes == primes2) +import os # Import the os module into the global namespace. + +# We re-use the function from exercise 4.2, and add an optional argument +# "ignore_hidden" that, when set to "True", will ignore hidden files (i.e. +# files whose name is starting with a dot, e.g. ".DS_Store") + +def count_files(dir_name, ignore_hidden=False): + """Counts files present in the input directory. + Only files are counted, directories are ignored. + + Arguments: + dir_name: path of directory in which to count files. + ignore_hidden: Optional. If set to True, hidden files (files that + start with a '.') are ignored from the count. + """ + + # Initialize file counter. + file_count = 0 + + # Loop through all files and directories present in the input directory. + for f in os.listdir(path=dir_name): + + # Get the absolute path of the file/directory. + full_path = os.path.join(dir_name, f) + + # Verify the path corresponds to a file, not a directory. + if os.path.isfile(full_path) and not (ignore_hidden and f.startswith(".")): + file_count += 1 + + return file_count + +parent_dir = os.path.dirname(os.getcwd()) +print("File count in [", parent_dir, "]: ", count_files(parent_dir), sep="") +print( + "File count (excluding hidden files) in [", parent_dir, "]: ", + count_files(parent_dir, ignore_hidden=True), + sep="" +) \ No newline at end of file diff --git a/notebooks/solutions/solution_44.py b/notebooks/solutions/solution_44.py new file mode 100644 index 0000000..5774dc9 --- /dev/null +++ b/notebooks/solutions/solution_44.py @@ -0,0 +1,75 @@ +# Exercise 4.4 + +# 1. What does is_part_of_set do? +# ******************************* +from exercise_44_module import is_part_of_set + +# help(is_part_of_set) +# "is_part_of_set()" returns True if its argument is a prime number +# and False otherwise. + +for n in range(10): + print(n, "->", is_part_of_set(n)) + + +# 2 and 3. Use is_part_of_set to get all primes numbers between 2 and 50000 +# ************************************************************************* +from exercise_44_module import is_part_of_set +from time import time + +primes = [] + +# Get the current time, so we can compute elapsed time at the end. +t0 = time() +for i in range(50000): + if is_part_of_set(i): + primes.append(i) + +time_first_algo = time() - t0 +print("it took", time_first_algo, "seconds") +print("found", len(primes), "prime numbers") + + +# Optional: devise a more time efficient way of getting the prime numbers +# *********************************************************************** +# +# Principle: rather than testing each number separately, we test the whole +# set of numbers at once by going over all number and "eliminating" all +# multiples of that number. +from time import time + +t0 = time() + +# Phase1 : initialization +upperLimit = 50000 + +# Create a list that contains True for all numbers we want to test. +# During the algorithm we will set all non-prime numbers to False. +arePrime = [True] * (upperLimit + 1) +arePrime[0] = False # 0 is not prime +arePrime[1] = False # 1 is not prime + + +# Phase2 : go through all numbers +primes2 = [] + +for i in range(2, upperLimit + 1): # for each candidate number + + # only do something if that number has not been set as a non-prime before + if arePrime[i]: + primes2.append(i) + + # then we want to set all multiples of that number as non-prime + mult = 2 * i + + # all multiples until the upper limit is reached + while mult <= upperLimit: + arePrime[mult] = False # set the multiple to non-prime + mult += i # nest multiple + +time_second_algo = time() - t0 + +print("it took", time() - t0, "seconds") +print("speedup compared to first algorithm:", time_first_algo / time_second_algo) +print("found", len(primes2), "prime numbers") +print("is this list the same as with the first algorithm ?", primes == primes2)