diff --git a/examples/extract_key_values_from_cbc_reports_in_different_layout.ipynb b/examples/extract_key_values_from_cbc_reports_in_different_layout.ipynb index e573076..9529498 100644 --- a/examples/extract_key_values_from_cbc_reports_in_different_layout.ipynb +++ b/examples/extract_key_values_from_cbc_reports_in_different_layout.ipynb @@ -194,95 +194,99 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[[[{'NAME': 'Test Patient A',\n", - " 'HAEMOGLOBIN': '12.6',\n", - " 'RBC (RED CELLS COUNT)': '4.54',\n", - " 'HAEMATOCRIT(PCV)': '38.1',\n", - " 'MCV': '83.9',\n", - " 'MCH': '27.8',\n", - " 'MCHC': '33.1',\n", - " 'TOTAL LEUCOCYTE COUNT': '4.62',\n", - " 'DIFFERENTIAL LEUCOCYTE COUNT': 'NA',\n", - " 'NEUTROPHILS': '66',\n", - " 'LYMPHOCYTES': '28',\n", - " 'MONOCYTES': '06',\n", - " 'EOSINOPHILS': '00',\n", - " 'BASOPHIL': '00',\n", - " 'PROMYELOCYTES': 'NA',\n", - " 'MYELOCYTES': 'NA',\n", - " 'METAMYELOCYTES': 'NA',\n", - " 'BLASTS': 'NA',\n", - " 'PLATELETS': '195',\n", - " 'RETICULOCYTE COUNT': 'NA'}]],\n", - " [[{'NAME': 'TEST PATIENT 12',\n", - " 'HAEMOGLOBIN': '12.8',\n", - " 'RBC (RED CELLS COUNT)': '4.5',\n", - " 'HAEMATOCRIT(PCV)': '42',\n", - " 'MCV': '93',\n", - " 'MCH': '28',\n", - " 'MCHC': '31',\n", - " 'TOTAL LEUCOCYTE COUNT': '12.6',\n", - " 'DIFFERENTIAL LEUCOCYTE COUNT': 'NA',\n", - " 'NEUTROPHILS': '67',\n", - " 'LYMPHOCYTES': '23',\n", - " 'MONOCYTES': '07',\n", - " 'EOSINOPHILS': '03',\n", - " 'BASOPHIL': '00',\n", - " 'PROMYELOCYTES': 'NA',\n", - " 'MYELOCYTES': 'NA',\n", - " 'METAMYELOCYTES': 'NA',\n", - " 'BLASTS': 'NA',\n", - " 'PLATELETS': '210',\n", - " 'RETICULOCYTE COUNT': 'NA'}]],\n", - " [[{'NAME': 'Test Patient 14',\n", - " 'HAEMOGLOBIN': '14.8',\n", - " 'RBC (RED CELLS COUNT)': '5.22',\n", - " 'HAEMATOCRIT(PCV)': '45.2',\n", - " 'MCV': '86.6',\n", - " 'MCH': '28.4',\n", - " 'MCHC': '32.7',\n", - " 'TOTAL LEUCOCYTE COUNT': '5.45',\n", - " 'DIFFERENTIAL LEUCOCYTE COUNT': 'NA',\n", - " 'NEUTROPHILS': '64.3',\n", - " 'LYMPHOCYTES': '25.1',\n", - " 'MONOCYTES': '7.2',\n", - " 'EOSINOPHILS': '2.8',\n", - " 'BASOPHIL': '0.6',\n", - " 'PROMYELOCYTES': 'NA',\n", - " 'MYELOCYTES': 'NA',\n", - " 'METAMYELOCYTES': 'NA',\n", - " 'BLASTS': 'NA',\n", - " 'PLATELETS': '287',\n", - " 'RETICULOCYTE COUNT': 'NA'}]],\n", - " [[{'NAME': 'Test Patient 20',\n", - " 'HAEMOGLOBIN': '11.3',\n", - " 'RBC (RED CELLS COUNT)': '4.2',\n", - " 'HAEMATOCRIT(PCV)': '35',\n", + "[[[{'SCL No': '1232475',\n", + " 'Name': 'Test Patient A',\n", + " 'Contact #': '03443730681',\n", + " 'Ward Name': 'NA',\n", + " 'Admission#': 'NA',\n", + " 'Collection Time': '15-Jan-202410:19',\n", + " 'REF BY': 'Test Doctor A',\n", + " 'Haemoglobin': '12.6 gm/dl',\n", + " 'Haematocrit': '38.1 %',\n", + " 'R.B.C': '4.54 10E12/L',\n", + " 'M.C.V': '83.9 FL',\n", + " 'M.C.H': '27,8 PG',\n", + " 'M.C.H.C': '33.1 %',\n", + " 'W.B.C': '4.62 x10E9/L',\n", + " 'Neutrophils': '66 %',\n", + " 'Lymphocytes': '28 %',\n", + " 'Eosinophils': '00 %',\n", + " 'Monocytes': '06 %',\n", + " 'Basophils': '00 %',\n", + " 'Platelet Count': '195 x10E9L',\n", + " 'Normochromic Normocytic': 'NA'}]],\n", + " [[{'MRNo': '231121-116431337',\n", + " 'InvoiceNo': '1115230939',\n", + " 'Name': 'TEST PATIENT 12',\n", + " 'Age/Gender': 'NA',\n", + " 'ReferredBy': 'SELF',\n", + " 'SampleDate': '22-Nov-2023',\n", + " 'HB': '12.8 gm/dL',\n", + " 'RBC': '4.5 x 10^12/L',\n", + " 'HCT': '42 %',\n", + " 'MCV': '93 fL',\n", + " 'MCH': '28 pg',\n", + " 'MCHC': '31 gm/dL',\n", + " 'WBC': '12.6 x 10e9/L',\n", + " 'PLATELETS': '210 x10^9/L',\n", + " 'NEUTROPHILS%': '67 %',\n", + " 'LYMPHOCYTES%': '23%',\n", + " 'MONOCYTES%': '07 %',\n", + " 'EOSINOPHILS%': '03 %',\n", + " 'BASOPHILS%': '00 %',\n", + " 'Remarks': 'Normocytic, normochromic. Leucocytosis. ? Cause. Clinical Correlation Advised'}]],\n", + " [[{'Lab No': '23020107031',\n", + " 'Name': 'Test Patient 14',\n", + " 'Age': 'NA',\n", + " 'Gender': 'NA',\n", + " 'Contact #': 'NA',\n", + " 'Location': 'NA',\n", + " 'Ward/ Bed': 'F-OPD',\n", + " 'Collection Time': '04-Mar-2023 11:13',\n", + " 'Reporting Time': '04-Mar-2023 12:14',\n", + " 'Haemoglobin': '14.8',\n", + " 'Haematocrit': '45.2',\n", + " 'M.C.V': '86.6',\n", + " 'M.C.H': '28.4',\n", + " 'M.C.H.C': '32.7',\n", + " 'RBC': '5.22',\n", + " 'Platelet Count': '287',\n", + " 'WBC': '5.45',\n", + " 'Neutrophils': '64.3',\n", + " 'Lymphocytes': '25.1',\n", + " 'Eosinophils': '2.8',\n", + " 'Monocytes': '7.2',\n", + " 'Basophils': '0.6',\n", + " 'ESR': '10'}]],\n", + " [[{'Name': 'Test Patient 20',\n", + " 'Age': 'NA',\n", + " 'Gender': 'NA',\n", + " 'TestDate': '10-June-2023',\n", + " 'TestDescription': 'Haematology',\n", + " 'Haemoglobin': '11.3',\n", + " 'RedBloodCellCount': '4.2',\n", + " 'Haematocrit': '35',\n", " 'MCV': '82',\n", " 'MCH': '27',\n", " 'MCHC': '33',\n", - " 'TOTAL LEUCOCYTE COUNT': '9,600',\n", - " 'DIFFERENTIAL LEUCOCYTE COUNT': 'NA',\n", - " 'NEUTROPHILS': '58',\n", - " 'LYMPHOCYTES': '40',\n", - " 'MONOCYTES': '01',\n", - " 'EOSINOPHILS': '01',\n", - " 'BASOPHIL': 'NA',\n", - " 'PROMYELOCYTES': 'NA',\n", - " 'MYELOCYTES': 'NA',\n", - " 'METAMYELOCYTES': 'NA',\n", - " 'BLASTS': 'NA',\n", - " 'PLATELETS': '238,000',\n", - " 'RETICULOCYTE COUNT': 'NA'}]]]" + " 'TotalLeukocyteCount': '9,600',\n", + " 'NeutrophilsPercentage': '58',\n", + " 'LymphocytesPercentage': '40',\n", + " 'MonocytesPercentage': '01',\n", + " 'EosinophilsPercentage': '01',\n", + " 'BasophilPercentage': 'NA',\n", + " 'Platelets': '238,000',\n", + " 'ReticulocyteCount': 'NA',\n", + " 'Normoblast': 'Normochromic. Anisocytosis.'}]]]" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -291,290 +295,6 @@ "qa_results" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Output Analysis\n", - "Now, we will analyze the output and compare it with the expected result. We'll take note of any missing keys, any additional keys added by OpenParser, and any incorrect values." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "---------------------------------------------------\n", - "REPORT 1\n", - "\n", - "Summary: {'missing_keys': [], 'additional_keys': [], 'incorrect_keys': []}\n", - "---------------------------------------------------\n", - "REPORT 2\n", - "\n", - "Summary: {'missing_keys': [], 'additional_keys': [], 'incorrect_keys': []}\n", - "---------------------------------------------------\n", - "REPORT 3\n", - "\n", - "Summary: {'missing_keys': [], 'additional_keys': [], 'incorrect_keys': []}\n", - "---------------------------------------------------\n", - "REPORT 4\n", - "\n", - "Summary: {'missing_keys': [], 'additional_keys': [], 'incorrect_keys': []}\n" - ] - } - ], - "source": [ - "values_list = []\n", - "\n", - "def compare_vals(str1, str2):\n", - " try:\n", - " num1 = float(str1)\n", - " num2 = float(str2)\n", - " return num1 == num2\n", - " except ValueError:\n", - " return str1 == str2\n", - "\n", - "for idx, qa_result in enumerate(qa_results):\n", - " print('---------------------------------------------------')\n", - " print(f\"REPORT {idx+1}\\n\")\n", - " data = qa_result[0]\n", - " keys = [list(item.keys()) for item in data][0]\n", - " values = [list(item.values()) for item in data][0]\n", - "\n", - " this_expected_result = expected_result[idx]\n", - " missing_keys = []\n", - " incorrect_keys = []\n", - " expected_keys = list(this_expected_result.keys())\n", - "\n", - " for key_idx, key in enumerate(expected_keys):\n", - " if key_idx < 0 or key_idx >= len(keys) or key != keys[key_idx]:\n", - " print(f\"Key {key} not found in report {idx+1}\")\n", - " missing_keys.append(key)\n", - " keys.insert(key_idx, key)\n", - " values.insert(key_idx, 'Missing')\n", - " elif not compare_vals(this_expected_result[key], values[keys.index(key)]):\n", - " print(f\"Incorrect {key}. Expected: {this_expected_result[key]}, Found: {values[keys.index(key)]}\")\n", - " incorrect_keys.append((key, this_expected_result[key], values[keys.index(key)]))\n", - " values[keys.index(key)] = f'{values[keys.index(key)]} * ({this_expected_result[key]})'\n", - "\n", - " different_keys = {'missing_keys': missing_keys, 'additional_keys': [], 'incorrect_keys': incorrect_keys}\n", - "\n", - " # Check for differences in keys\n", - " for key in keys:\n", - " if key not in input_keys:\n", - " different_keys['additional_keys'].append(key)\n", - " print(f\"Summary: {different_keys}\")\n", - "\n", - " # Create a DataFrame\n", - " values_list.append(values)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Last, we print out the table with all the values extracted from the report. If a value is missing, it's highlighted yellow. If it's an incorrect value, it's highlighted red with the expected value in parentheses." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - " | CBC_ReportSample_1.png | \n", - "CBC_ReportSample_12.jpeg | \n", - "CBC_ReportSample_14.jpeg | \n", - "CBC_ReportSample_20.jpeg | \n", - "
---|---|---|---|---|
NAME | \n", - "Test Patient A | \n", - "TEST PATIENT 12 | \n", - "Test Patient 14 | \n", - "Test Patient 20 | \n", - "
HAEMOGLOBIN | \n", - "12.6 | \n", - "12.8 | \n", - "14.8 | \n", - "11.3 | \n", - "
RBC (RED CELLS COUNT) | \n", - "4.54 | \n", - "4.5 | \n", - "5.22 | \n", - "4.2 | \n", - "
HAEMATOCRIT(PCV) | \n", - "38.1 | \n", - "42 | \n", - "45.2 | \n", - "35 | \n", - "
MCV | \n", - "83.9 | \n", - "93 | \n", - "86.6 | \n", - "82 | \n", - "
MCH | \n", - "27.8 | \n", - "28 | \n", - "28.4 | \n", - "27 | \n", - "
MCHC | \n", - "33.1 | \n", - "31 | \n", - "32.7 | \n", - "33 | \n", - "
TOTAL LEUCOCYTE COUNT | \n", - "4.62 | \n", - "12.6 | \n", - "5.45 | \n", - "9,600 | \n", - "
DIFFERENTIAL LEUCOCYTE COUNT | \n", - "NA | \n", - "NA | \n", - "NA | \n", - "NA | \n", - "
NEUTROPHILS | \n", - "66 | \n", - "67 | \n", - "64.3 | \n", - "58 | \n", - "
LYMPHOCYTES | \n", - "28 | \n", - "23 | \n", - "25.1 | \n", - "40 | \n", - "
MONOCYTES | \n", - "06 | \n", - "07 | \n", - "7.2 | \n", - "01 | \n", - "
EOSINOPHILS | \n", - "00 | \n", - "03 | \n", - "2.8 | \n", - "01 | \n", - "
BASOPHIL | \n", - "00 | \n", - "00 | \n", - "0.6 | \n", - "NA | \n", - "
PROMYELOCYTES | \n", - "NA | \n", - "NA | \n", - "NA | \n", - "NA | \n", - "
MYELOCYTES | \n", - "NA | \n", - "NA | \n", - "NA | \n", - "NA | \n", - "
METAMYELOCYTES | \n", - "NA | \n", - "NA | \n", - "NA | \n", - "NA | \n", - "
BLASTS | \n", - "NA | \n", - "NA | \n", - "NA | \n", - "NA | \n", - "
PLATELETS | \n", - "195 | \n", - "210 | \n", - "287 | \n", - "238,000 | \n", - "
RETICULOCYTE COUNT | \n", - "NA | \n", - "NA | \n", - "NA | \n", - "NA | \n", - "