Skip to content

Commit

Permalink
Merge branch 'master' into development
Browse files Browse the repository at this point in the history
  • Loading branch information
Kohulan authored May 4, 2024
2 parents 1e600d7 + 41138be commit 52635a4
Show file tree
Hide file tree
Showing 9 changed files with 19 additions and 16 deletions.
6 changes: 3 additions & 3 deletions DECIMER/DECIMER_EfficinetNetV2_Transfomer_Trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def loss_fn(real, pred):
transformer_config["d_model"], lr_config["warm_steps"]
)

# Instiate an optimizer
# Instantiate an optimizer
optimizer = tf.keras.optimizers.Adam(
lr_scheduler, beta_1=0.9, beta_2=0.98, epsilon=1e-9
)
Expand Down Expand Up @@ -323,7 +323,7 @@ def loss_fn(real, pred):
start_epoch = int(ckpt_manager.latest_checkpoint.split("-")[-1])


# Main training step fucntion
# Main training step function
def train_step(image_batch, selfies_batch):
"""Main training step function.
Expand Down Expand Up @@ -376,7 +376,7 @@ def dist_train_step(image_batch, selfies_batch):


"""
# Main validation step fucntion
# Main validation step function
def validation_step(image_batch, selfies_batch):
selfies_batch_input = selfies_batch[:, :-1]
Expand Down
2 changes: 1 addition & 1 deletion DECIMER/Predictor_usingCheckpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@

max_length = 302

# Image partameters
# Image parameters
IMG_EMB_DIM = (16, 16, 512)
IMG_EMB_DIM = (IMG_EMB_DIM[0] * IMG_EMB_DIM[1], IMG_EMB_DIM[2])
IMG_SHAPE = (512, 512, 3)
Expand Down
8 changes: 4 additions & 4 deletions DECIMER/Repack_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ class DECIMER_Predictor(tf.Module):
It loads the saved checkpoint and the necessary tokenizers. The
inference begins with the start token (<start>) and ends when the
end token(<end>) is met. This class can only work with tf.Tensor
objects. The strings shoul gets transformed into np.arrays before
objects. The strings should get transformed into np.arrays before
feeding them into this class.
"""

Expand All @@ -150,7 +150,7 @@ def __init__(self, encoder, tokenizer, transformer, max_length):
Args:
encoder (tf.keras.model): The encoder model
tokenizer (tf.keras.tokenizer): Output tokenizer, defines which charater is assigned to what token
tokenizer (tf.keras.tokenizer): Output tokenizer, defines which character is assigned to what token
transformer (tf.keras.model): The transformer model
max_length (int): Maximum length of a string which can get predicted
"""
Expand All @@ -160,7 +160,7 @@ def __init__(self, encoder, tokenizer, transformer, max_length):
self.max_length = max_length

def __call__(self, Decoded_image):
"""This fuction takes in the Decoded image as input and makes the
"""This function takes in the Decoded image as input and makes the
predicted list of tokens and return the tokens as tf.Tensor array.
Before feeding the input array we must define start and the end tokens.
Expand Down Expand Up @@ -225,7 +225,7 @@ def __init__(self, DECIMER):

@tf.function
def __call__(self, Decoded_Image):
"""This fucntion calls the __call__function from the translator class.
"""This function calls the __call__function from the translator class.
In the tf.function only the output sentence is returned. Thanks to the
non-strict execution in tf.function any unnecessary values are never
computed.
Expand Down
2 changes: 1 addition & 1 deletion DECIMER/Utils/Create_image_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def data_loader(Smiles_Path):
all_img_name = []

for line in smiles.split("\n"):
# Split the ID and SMILES to seperate tokens
# Split the ID and SMILES to separate tokens
tokens = line.split(",")

image_id = str(tokens[0]) + ".png"
Expand Down
6 changes: 3 additions & 3 deletions DECIMER/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def initialize_encoder_config(
defined configurations.
Args:
image_embedding_dim (int): Embedding dimention of the input image
image_embedding_dim (int): Embedding dimension of the input image
preprocessing_fn (method): Efficient Net preprocessing function for input image
backbone_fn (method): Calls Efficient-Net V2 as backbone for encoder
image_shape (int): Shape of the input image
Expand Down Expand Up @@ -267,7 +267,7 @@ def initialize_transformer_config(
n_transformer_layers (int): Number of layers present in the transformer model
transformer_d_dff (int): Transformer feed forward upwards projection size
transformer_n_heads (int): Number of heads present in the transformer model
image_embedding_dim (int): Total number of dimension the image gets embeddeded
image_embedding_dim (int): Total number of dimension the image gets embedded
dropout_rate (float, optional): Fraction of the input units to drop. Defaults to 0.1.
"""
self.transformer_config = dict(
Expand Down Expand Up @@ -330,7 +330,7 @@ def prepare_models(encoder_config, transformer_config, replica_batch_size, verbo
[type]: Optimizer, Encoder model and the Transformer
"""

# Instiate an optimizer
# Instantiate an optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=0.00051)

# Instantiate the encoder model
Expand Down
2 changes: 1 addition & 1 deletion DECIMER/decimer.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def get_models(model_urls: dict):


def detokenize_output(predicted_array: int) -> str:
"""This function takes the predited tokens from the DECIMER model and
"""This function takes the predicted tokens from the DECIMER model and
returns the decoded SMILES string.
Args:
Expand Down
2 changes: 1 addition & 1 deletion DECIMER/efficientnetv2/effnetv2_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def __init__(self, block_args, mconfig, name=None):

self.endpoints = None

# Builds the block accordings to arguments.
# Builds the block according to arguments.
self._build()

@property
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ print(SMILES)
- This project is licensed under the MIT License - see the [LICENSE](https://raw.githubusercontent.com/Kohulan/DECIMER-Image_Transformer/master/LICENSE?token=AHKLIF3EULMCUKCFUHIPBMDARSMDO) file for details

## Citation
- Rajan K, Brinkhaus HO, Agea MI, Zielesny A, Steinbeck C (2023) DECIMER.ai - An open platform for automated optical chemical structure identification, segmentation and recognition in scientific publications. ChemRxiv. doi: https://10.26434/chemrxiv-2023-xhcx9
- Rajan K, Brinkhaus HO, Agea MI, Zielesny A, Steinbeck C DECIMER.ai - An open platform for automated optical chemical structure identification, segmentation and recognition in scientific publications. Nat. Commun. 14, 5045 (2023). https://doi.org/10.1038/s41467-023-40782-0
- Rajan, K., Zielesny, A. & Steinbeck, C. DECIMER 1.0: deep learning for chemical image recognition using transformers. J Cheminform 13, 61 (2021). https://doi.org/10.1186/s13321-021-00538-8

## References
Expand Down
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@
author_email="[email protected]",
maintainer="Kohulan Rajan, Otto Brinkhaus ",
maintainer_email="[email protected], [email protected]",
description="DECIMER 2.4.0: Deep Learning for Chemical Image Recognition using Efficient-Net V2 + Transformer",
description="DECIMER 2.6.0: Deep Learning for Chemical Image Recognition using Efficient-Net V2 + Transformer",
long_description=long_description,
long_description_content_type="text/markdown",
entry_points={
"console_scripts": ["decimer = DECIMER.decimer:main"],
},
url="https://github.com/Kohulan/DECIMER-Image_Transformer",
packages=setuptools.find_packages(),
license="MIT",
Expand Down

0 comments on commit 52635a4

Please sign in to comment.