From 88d823c41ff48d4f1f35bb46b6232511b3491964 Mon Sep 17 00:00:00 2001 From: Christopher Brooks Date: Mon, 11 Nov 2019 13:24:30 +0000 Subject: [PATCH] example --- bonus_example.ipynb | 275 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 275 insertions(+) create mode 100644 bonus_example.ipynb diff --git a/bonus_example.ipynb b/bonus_example.ipynb new file mode 100644 index 0000000..fc33530 --- /dev/null +++ b/bonus_example.ipynb @@ -0,0 +1,275 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Matching Characters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Overview" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Individual characters can be matched in regular expressions simply by indicating the characters as a pattern. A special character, `.`, is used as a wildcard to indicate that any character which is not a newline can be matched instead. Some special characters, such as `\\`, `(`, and `)` might need to be escaped in order to be matched. To escape a character put a `\\` in front of it." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Examples\n", + "In these examples we will use the text of article five of the United States Constitution, as found on wikipedia at https://en.wikipedia.org/wiki/Article_Five_of_the_United_States_Constitution." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example One" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['the',\n", + " 'the',\n", + " 'the',\n", + " 'the',\n", + " 'the',\n", + " 'the',\n", + " 'the',\n", + " 'the',\n", + " 'the',\n", + " 'the',\n", + " 'the',\n", + " 'the',\n", + " 'the',\n", + " 'the',\n", + " 'the',\n", + " 'the']" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "text = r\"\"\"\n", + "The Congress, whenever two thirds of both houses shall deem it necessary, shall propose amendments to this \n", + "Constitution, or, on the application of the legislatures of two thirds of the several states, shall call a \n", + "convention for proposing amendments, which, in either case, shall be valid to all intents and purposes, as \n", + "part of this Constitution, when ratified by the legislatures of three fourths of the several states, or by \n", + "conventions in three fourths thereof, as the one or the other mode of ratification may be proposed by the \n", + "Congress; provided that no amendment which may be made prior to the year one thousand eight hundred and \n", + "eight shall in any manner affect the first and fourth clauses in the ninth section of the first article; \n", + "and that no state, without its consent, shall be deprived of its equal suffrage in the Senate.\"\"\"\n", + "\n", + "# To match all instances of the word \"the\" we just set that string to be the pattern\n", + "pattern=r\"the\"\n", + "\n", + "# We then import the python regular expression library\n", + "import re\n", + "\n", + "# And we look for all instances of this pattern\n", + "re.findall(pattern, text)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example Two" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[' two ',\n", + " ' the ',\n", + " ' the ',\n", + " ' two ',\n", + " ' the ',\n", + " ' the ',\n", + " ' the ',\n", + " ' the ',\n", + " ' the ',\n", + " ' the ',\n", + " ' the ',\n", + " ' the ',\n", + " ' the ',\n", + " ' the ',\n", + " ' the ']" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# To match all ithree letter words that begin with the letter \"t\" we just set our pattern to be a \"t\" followed\n", + "# by two wildcard characters and both preceded and followed by space characters, which indicate the beginning\n", + "# and ends of a word. Note that this won't work for words which end a sentence, as punctuation isn't a space, \n", + "# but we can deal with that in a different manner\n", + "pattern=r\" t.. \"\n", + "\n", + "# And we look for all instances of this pattern\n", + "re.findall(pattern, text)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Problems\n", + "In these problems you will use text from the Canadian Charter of Rights and Freedoms section 2 from wikipedia https://en.wikipedia.org/wiki/Section_2_of_the_Canadian_Charter_of_Rights_and_Freedoms." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Problem 1\n", + "Count the number of freedoms listed (they are enumerated in parentheses). Return a single number." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "text=r\"\"\"\n", + "2. Everyone has the following fundamental freedoms:\n", + "(a) freedom of conscience and religion;\n", + "(b) freedom of thought, belief, opinion and expression, including freedom of the press and other media of communication;\n", + "(c) freedom of peaceful assembly; and\n", + "(d) freedom of association.\n", + "\"\"\"\n", + "\n", + "# Your code goes here" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "4\n" + ] + } + ], + "source": [ + "# Solution\n", + "\n", + "# First we import the python re module\n", + "import re\n", + "\n", + "# In this pattern we want to match any single character in paretheses followed by a space. Note that we have\n", + "# to escape the parentheses.\n", + "pattern=r\"\\(.\\) \"\n", + "\n", + "# Now we just print the length of that list\n", + "print(len(re.findall(pattern,text)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Problem 2\n", + "How many times is a word beginning with the term \"free\" mentioned in this section of the document? Return a single number." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "6\n" + ] + } + ], + "source": [ + "# Solution\n", + "\n", + "# In this pattern we want to match just the term \"free\" with a space in front of it. This wouldn't catch words\n", + "# that started a sentence or paragraph as they might not be preceeded by a space (e.g. the start of a new \n", + "# line) and they might have different capitalization.\n", + "pattern=r\" free\"\n", + "\n", + "# Now we just print the length of that list\n", + "print(len(re.findall(pattern,text)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Reuse" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "I authorize that this work may be used in further iterations of this or other classes with or without attribution in order to help people learn regex." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "I do not authorize this work to be used in further iterations of this or other classes." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}