Wrote code using test driven development for new starter mantid pytho…

…n course - the code contains functions listed below 1. get_file_name_from_command_line 2. read_ascii_file 3. split_hyphenated_word 4. remove_punctuations_from_text 5. split_file_content_into_words 6. count_word_occurence the tes functions are added to the test_wordcount.py file
mantidproject · Jun 4, 2024 · cc17398 · cc17398
1 parent 19e0593
commit cc17398
Show file tree

Hide file tree

Showing 5 changed files with 94 additions and 26 deletions.
diff --git a/exercises-cpp/jimoh_yusuf/ex01_basics/src/main.cpp b/exercises-cpp/jimoh_yusuf/ex01_basics/src/main.cpp
@@ -1,7 +1,6 @@
 /**
  * Skeleton main routine
  */
-<<<<<<< HEAD
 
 #include <iostream>
 #include <fstream>
@@ -104,17 +103,3 @@ int main(int argc, char* argv[]) {
     std::cout << "Results written to " << outputFilePath << std::endl;
     return 0;
 }
-
-=======
-#include <iostream>
-
-int main(int argc, char *argv[])
-{
-    std::cout << "Hello, World!" << std::endl;
-    if (argc > 1) {
-        // This will print the first argument passed to your program
-        std::cout << argv[1] << std::endl;
-    }
-    return 0;
-}
->>>>>>> e139231 (completed exercise2 -read a sample text, clean the data, count the number of words and write to file)
diff --git a/exercises-python/template/ex01_basics/file.txt b/exercises-python/template/ex01_basics/file.txt
diff --git a/exercises-python/template/ex01_basics/holmes.txt b/exercises-python/template/ex01_basics/holmes.txt
diff --git a/exercises-python/template/ex01_basics/main.py b/exercises-python/template/ex01_basics/main.py
@@ -1,17 +1,64 @@
-#!/usr/bin/env python3
+import sys
+import re
+from collections import Counter
+
+
+def get_file_name_from_command_line():
+    try:
+        filename = sys.argv[1]
+        return filename
+    except IndexError:
+        raise ValueError(f"no commandline argument for filename was provided")
+
 
 def read_ascii_file(filename):
-	try:
-		f = open(filename, 'r')
-		return f.read()
-	except FileNotFoundError as e:
-		raise FileNotFoundError(f"The file {filename} was not found.")
+    try:
+        f = open(filename, 'r')
+        content = f.read()
+        f.close()
+        return content
+    except FileNotFoundError:
+        raise FileNotFoundError(f"The file {filename} was not found.")
 
-def main():
-	read_ascii_file("file.txt")
 
+def split_hyphenated_word(word):
+    return word.split("-")
 
-if __name__ == "__main__":
-	main()
 
+def remove_punctuations_from_text(word):
+    clean_word = re.sub(r'[.,?\'"!():]', '', word)
+    return clean_word
+
+
+def split_file_content_into_words(file_content):
+    word_list = []
+    for word in file_content.split():
+        word = word.lower()
+        word = remove_punctuations_from_text(word)
+        hyphenated_word_list = split_hyphenated_word(word)
+        word_list.extend(hyphenated_word_list)
+
+    return word_list
 
+
+def count_word_occurence(words):
+    word_counts = Counter(words)
+    for word, count in word_counts.items():
+        print(f'{word:<15} {count}')
+    return word_counts
+
+
+def main():
+    try:
+        filename = get_file_name_from_command_line()
+        file_content = read_ascii_file(filename)
+        words = split_file_content_into_words(file_content)
+        count_word_occurence(words)
+    except FileNotFoundError:
+        print(f"Error: The file '{filename}' was not found.")
+    except ValueError as e:
+        print(e)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/exercises-python/template/ex01_basics/test_wordcount.py b/exercises-python/template/ex01_basics/test_wordcount.py
@@ -22,7 +22,43 @@ def test_read_file_raise_file_found_error(self, mock_file):
             read_ascii_file(self.filename)
         mock_file.assert_called_once_with(self.filename, 'r')
 
+    def test_split_hyphenated_words(self):
+        word = "Hyphenated-Word"
+        hyphenated_word_list = split_hyphenated_word(word)
+        self.assertTrue(len(hyphenated_word_list) == 2)
+        self.assertTrue("Hyphenated" in hyphenated_word_list)
+        self.assertTrue("Word" in hyphenated_word_list)
 
+    def test_that_words_can_be_splitted_into_a_list(self):
+        file_content = 'The Content of the file\nThe Next Page-Two '
+        content_list = split_file_content_into_words(file_content)
+        self.assertTrue(len(content_list) > 0)
+        self.assertEqual(len(content_list), 9)
+        self.assertTrue("the" in content_list)
+        self.assertTrue("content" in content_list)
+        self.assertTrue("two" in content_list)
 
+    def test_remove_punctuation_from_word(self):
+        word_with_quotation = 'The"'
+        word_with_question_mark = 'Conten?t'
+        word_with_exclamation_mark = 'o!f'
+        word_with_bracket = "fil(e"
+        word_with_colon = "Nex:t"
+        clean_word_without_quotation = remove_punctuations_from_text(word_with_quotation)
+        clean_word_without_question_mark = remove_punctuations_from_text(word_with_question_mark)
+        clean_word_without_exclamation_mark = remove_punctuations_from_text(word_with_exclamation_mark)
+        clean_word_without_bracket = remove_punctuations_from_text(word_with_bracket)
+        clean_word_without_colon = remove_punctuations_from_text(word_with_colon)
+        self.assertEqual(clean_word_without_quotation, 'The')
+        self.assertEqual(clean_word_without_question_mark, 'Content')
+        self.assertEqual(clean_word_without_exclamation_mark, 'of')
+        self.assertEqual(clean_word_without_bracket, 'file')
+        self.assertEqual(clean_word_without_colon, 'Next')
 
-
+    def test_count_word_occurences(self):
+        words = ['hey', 'who', 'are', 'you', 'hey', 'nice', 'to', 'meet', 'you']
+        expected_dict = {'hey': 2, 'who': 1, 'are': 1, 'you': 2, 'nice': 1, 'to': 1, 'meet': 1}
+        word_dictionary = count_word_occurence(words)
+        self.assertEqual(word_dictionary, expected_dict)
+        self.assertEqual(word_dictionary['hey'], 2)
+        self.assertEqual(word_dictionary['who'], 1)