Merge branch 'master' into development

Kohulan · May 4, 2024 · 52635a4 · 52635a4
2 parents 1e600d7 + 41138be
commit 52635a4
Show file tree

Hide file tree

Showing 9 changed files with 19 additions and 16 deletions.
diff --git a/DECIMER/DECIMER_EfficinetNetV2_Transfomer_Trainer.py b/DECIMER/DECIMER_EfficinetNetV2_Transfomer_Trainer.py
@@ -250,7 +250,7 @@ def loss_fn(real, pred):
             transformer_config["d_model"], lr_config["warm_steps"]
         )
 
-        # Instiate an optimizer
+        # Instantiate an optimizer
         optimizer = tf.keras.optimizers.Adam(
             lr_scheduler, beta_1=0.9, beta_2=0.98, epsilon=1e-9
         )
@@ -323,7 +323,7 @@ def loss_fn(real, pred):
     start_epoch = int(ckpt_manager.latest_checkpoint.split("-")[-1])
 
 
-# Main training step fucntion
+# Main training step function
 def train_step(image_batch, selfies_batch):
     """Main training step function.
 
@@ -376,7 +376,7 @@ def dist_train_step(image_batch, selfies_batch):
 
 
 """
-# Main validation step fucntion
+# Main validation step function
 def validation_step(image_batch, selfies_batch):
 
     selfies_batch_input = selfies_batch[:, :-1]

diff --git a/DECIMER/Predictor_usingCheckpoints.py b/DECIMER/Predictor_usingCheckpoints.py
@@ -45,7 +45,7 @@
 
 max_length = 302
 
-# Image partameters
+# Image parameters
 IMG_EMB_DIM = (16, 16, 512)
 IMG_EMB_DIM = (IMG_EMB_DIM[0] * IMG_EMB_DIM[1], IMG_EMB_DIM[2])
 IMG_SHAPE = (512, 512, 3)

diff --git a/DECIMER/Repack_model.py b/DECIMER/Repack_model.py
@@ -140,7 +140,7 @@ class DECIMER_Predictor(tf.Module):
     It loads the saved checkpoint and the necessary tokenizers. The
     inference begins with the start token (<start>) and ends when the
     end token(<end>) is met. This class can only work with tf.Tensor
-    objects. The strings shoul gets transformed into np.arrays before
+    objects. The strings should get transformed into np.arrays before
     feeding them into this class.
     """
 
@@ -150,7 +150,7 @@ def __init__(self, encoder, tokenizer, transformer, max_length):
 
         Args:
             encoder (tf.keras.model):  The encoder model
-            tokenizer (tf.keras.tokenizer): Output tokenizer, defines which charater is assigned to what token
+            tokenizer (tf.keras.tokenizer): Output tokenizer, defines which character is assigned to what token
             transformer (tf.keras.model):  The transformer model
             max_length (int): Maximum length of a string which can get predicted
         """
@@ -160,7 +160,7 @@ def __init__(self, encoder, tokenizer, transformer, max_length):
         self.max_length = max_length
 
     def __call__(self, Decoded_image):
-        """This fuction takes in the Decoded image as input and makes the
+        """This function takes in the Decoded image as input and makes the
         predicted list of tokens and return the tokens as tf.Tensor array.
         Before feeding the input array we must define start and the end tokens.
 
@@ -225,7 +225,7 @@ def __init__(self, DECIMER):
 
     @tf.function
     def __call__(self, Decoded_Image):
-        """This fucntion calls the __call__function from the translator class.
+        """This function calls the __call__function from the translator class.
         In the tf.function only the output sentence is returned. Thanks to the
         non-strict execution in tf.function any unnecessary values are never
         computed.

diff --git a/DECIMER/Utils/Create_image_tokenizer.py b/DECIMER/Utils/Create_image_tokenizer.py
@@ -70,7 +70,7 @@ def data_loader(Smiles_Path):
     all_img_name = []
 
     for line in smiles.split("\n"):
-        # Split the ID and SMILES to seperate tokens
+        # Split the ID and SMILES to separate tokens
         tokens = line.split(",")
 
         image_id = str(tokens[0]) + ".png"

diff --git a/DECIMER/config.py b/DECIMER/config.py
@@ -232,7 +232,7 @@ def initialize_encoder_config(
         defined configurations.
 
         Args:
-            image_embedding_dim (int): Embedding dimention of the input image
+            image_embedding_dim (int): Embedding dimension of the input image
             preprocessing_fn (method): Efficient Net preprocessing function for input image
             backbone_fn (method): Calls Efficient-Net V2 as backbone for encoder
             image_shape (int): Shape of the input image
@@ -267,7 +267,7 @@ def initialize_transformer_config(
             n_transformer_layers (int): Number of layers present in the transformer model
             transformer_d_dff (int): Transformer feed forward upwards projection size
             transformer_n_heads (int): Number of heads present in the transformer model
-            image_embedding_dim (int): Total number of dimension the image gets embeddeded
+            image_embedding_dim (int): Total number of dimension the image gets embedded
             dropout_rate (float, optional): Fraction of the input units to drop. Defaults to 0.1.
         """
         self.transformer_config = dict(
@@ -330,7 +330,7 @@ def prepare_models(encoder_config, transformer_config, replica_batch_size, verbo
         [type]: Optimizer, Encoder model and the Transformer
     """
 
-    # Instiate an optimizer
+    # Instantiate an optimizer
     optimizer = tf.keras.optimizers.Adam(learning_rate=0.00051)
 
     # Instantiate the encoder model

diff --git a/DECIMER/decimer.py b/DECIMER/decimer.py
@@ -71,7 +71,7 @@ def get_models(model_urls: dict):
 
 
 def detokenize_output(predicted_array: int) -> str:
-    """This function takes the predited tokens from the DECIMER model and
+    """This function takes the predicted tokens from the DECIMER model and
     returns the decoded SMILES string.
 
     Args:

diff --git a/DECIMER/efficientnetv2/effnetv2_model.py b/DECIMER/efficientnetv2/effnetv2_model.py
@@ -180,7 +180,7 @@ def __init__(self, block_args, mconfig, name=None):
 
         self.endpoints = None
 
-        # Builds the block accordings to arguments.
+        # Builds the block according to arguments.
         self._build()
 
     @property

diff --git a/README.md b/README.md
@@ -87,7 +87,7 @@ print(SMILES)
 - This project is licensed under the MIT License - see the [LICENSE](https://raw.githubusercontent.com/Kohulan/DECIMER-Image_Transformer/master/LICENSE?token=AHKLIF3EULMCUKCFUHIPBMDARSMDO) file for details
 
 ## Citation
-- Rajan K, Brinkhaus HO, Agea MI, Zielesny A, Steinbeck C (2023) DECIMER.ai - An open platform for automated optical chemical structure identification, segmentation and recognition in scientific publications. ChemRxiv. doi: https://10.26434/chemrxiv-2023-xhcx9
+- Rajan K, Brinkhaus HO, Agea MI, Zielesny A, Steinbeck C DECIMER.ai - An open platform for automated optical chemical structure identification, segmentation and recognition in scientific publications. Nat. Commun. 14, 5045 (2023). https://doi.org/10.1038/s41467-023-40782-0
 - Rajan, K., Zielesny, A. & Steinbeck, C. DECIMER 1.0: deep learning for chemical image recognition using transformers. J Cheminform 13, 61 (2021). https://doi.org/10.1186/s13321-021-00538-8
 
 ## References

diff --git a/setup.py b/setup.py
@@ -20,9 +20,12 @@
     author_email="[email protected]",
     maintainer="Kohulan Rajan, Otto Brinkhaus ",
     maintainer_email="[email protected], [email protected]",
-    description="DECIMER 2.4.0: Deep Learning for Chemical Image Recognition using Efficient-Net V2 + Transformer",
+    description="DECIMER 2.6.0: Deep Learning for Chemical Image Recognition using Efficient-Net V2 + Transformer",
     long_description=long_description,
     long_description_content_type="text/markdown",
+    entry_points={
+        "console_scripts": ["decimer = DECIMER.decimer:main"],
+    },
     url="https://github.com/Kohulan/DECIMER-Image_Transformer",
     packages=setuptools.find_packages(),
     license="MIT",