Skip to content

Commit

Permalink
update numbers
Browse files Browse the repository at this point in the history
  • Loading branch information
btyu committed Feb 20, 2024
1 parent 640b156 commit 99f645f
Show file tree
Hide file tree
Showing 2 changed files with 175 additions and 1 deletion.
2 changes: 1 addition & 1 deletion index.html
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ <h2 class="subtitle is-3 publication-subtitle">
demonstrate
that our developed LLMs can achieve very strong results on a comprehensive set of chemistry tasks,
<i>outperforming the most advanced GPT-4 across all the tasks by a substantial margin
<strong>(e.g., 94.5% exact match for converting SMILES to Formula vs. GPT-4's 16.4%;
<strong>(e.g., 94.5% exact match for converting SMILES to Formula vs. GPT-4's 16.4%;
32.9% exact match for Retrosynthesis vs. GPT-4's 1.2%)</strong> and approaching the SoTA task-specific
models.</i>
The key to our success is a large-scale, comprehensive, high-quality dataset for instruction tuning named
Expand Down
174 changes: 174 additions & 0 deletions test_generation.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===================================BUG REPORT===================================\n",
"Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues\n",
"================================================================================\n",
"CUDA_SETUP: WARNING! libcudart.so not found in any environmental path. Searching /usr/local/cuda/lib64...\n",
"CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so\n",
"CUDA SETUP: Highest compute capability among GPUs detected: 8.6\n",
"CUDA SETUP: Detected CUDA version 118\n",
"CUDA SETUP: Loading binary /home/yu.3737/Software/miniconda3/envs/lora/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cuda118.so...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/yu.3737/Software/miniconda3/envs/lora/lib/python3.9/site-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: /home/yu.3737/Software/miniconda3/envs/lora did not contain libcudart.so as expected! Searching further paths...\n",
" warn(msg)\n",
"/home/yu.3737/Software/miniconda3/envs/lora/lib/python3.9/site-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('FILE')}\n",
" warn(msg)\n",
"/home/yu.3737/Software/miniconda3/envs/lora/lib/python3.9/site-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('/usr/modulefiles')}\n",
" warn(msg)\n",
"/home/yu.3737/Software/miniconda3/envs/lora/lib/python3.9/site-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('/usr/share/lmod/lmod/share/man')}\n",
" warn(msg)\n",
"/home/yu.3737/Software/miniconda3/envs/lora/lib/python3.9/site-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('() { eval $($LMOD_DIR/ml_cmd \"$@\")\\n}')}\n",
" warn(msg)\n",
"/home/yu.3737/Software/miniconda3/envs/lora/lib/python3.9/site-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('vs/workbench/api/node/extensionHostProcess')}\n",
" warn(msg)\n",
"/home/yu.3737/Software/miniconda3/envs/lora/lib/python3.9/site-packages/bitsandbytes/cuda_setup/main.py:136: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('//matplotlib_inline.backend_inline'), PosixPath('module')}\n",
" warn(msg)\n"
]
}
],
"source": [
"from generation import LlaSMolGeneration, canonicalize_smiles_in_text"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "91482b88007548129672708d278f7f09",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"generator = LlaSMolGeneration('osunlp/LlaSMol-Mistral-7B')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'input_text': 'Given the SMILES representation <SMILES> S=P1(N(CCCl)CCCl)NCCCO1 </SMILES>, what would be its molecular formula?',\n",
" 'real_input_text': '<s>[INST] Given the SMILES representation <SMILES> S=P1(N(CCCl)CCCl)NCCCO1 </SMILES> , what would be its molecular formula? [/INST]',\n",
" 'output': ['It is <MOLFORMULA> C7H15Cl2N2OPS </MOLFORMULA> . </s>']}]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"generator.generate(\n",
" 'Given the SMILES representation <SMILES> S=P1(N(CCCl)CCCl)NCCCO1 </SMILES>, what would be its molecular formula?'\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from utils.smiles_canonicalization import canonicalize_molecule_smiles"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'CCC1(C)CCOC1=O'"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"canonicalize_molecule_smiles('CCC1(C)CCOC1=O')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'CCC1(C)COC(=O)C1'"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"canonicalize_molecule_smiles('CCC1(C)COC(=O)C1')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "lora",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 99f645f

Please sign in to comment.