4-bit on Colab

henk717 · Jul 17, 2023 · 5bbcdc4 · 5bbcdc4
1 parent da9226f
commit 5bbcdc4
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 4 deletions.
diff --git a/aiserver.py b/aiserver.py
@@ -1514,6 +1514,7 @@ def new_emit(*args, **kwargs):
         args.quiet = True;
         args.lowmem = True;
         args.noaimenu = True;
+        koboldai_vars.colab_arg = True;
 
     if args.quiet:
         koboldai_vars.quiet = True

diff --git a/koboldai_settings.py b/koboldai_settings.py
@@ -1206,12 +1206,12 @@ class system_settings(settings):
     local_only_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 
                             'lua_koboldcore', 'regex_sl', 'acregex_ai', 'acregex_ui', 'comregex_ai', 'comregex_ui',
                             'sp', '_horde_pid', 'inference_config', 'image_pipeline', 
-                            'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states', 'comregex_ai', 'comregex_ui']
+                            'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states', 'comregex_ai', 'comregex_ui', 'colab_arg']
     no_save_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 
                          'lua_koboldcore', 'sp', 'sp_length', '_horde_pid', 'horde_share', 'aibusy', 
                          'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', 'on_colab'
                          'summary_tokenizer', 'use_colab_tpu', 'noai', 'disable_set_aibusy', 'cloudflare_link', 'tts_model',
-                         'generating_image', 'bit_8_available', 'host', 'hascuda', 'usegpu', 'rng_states', 'comregex_ai', 'comregex_ui', 'git_repository', 'git_branch']
+                         'generating_image', 'bit_8_available', 'host', 'hascuda', 'usegpu', 'rng_states', 'comregex_ai', 'comregex_ui', 'git_repository', 'git_branch', 'colab_arg']
     settings_name = "system"
     def __init__(self, socketio, koboldai_var):
         self._socketio = socketio
@@ -1279,11 +1279,12 @@ def __init__(self, socketio, koboldai_var):
         self.disable_output_formatting = False
         self.api_tokenizer_id = None
         self.port = 5000
+        self.colab_arg = False
         try:
             import google.colab
             self.on_colab = True
         except:
-            self.on_colab = False
+            self.on_colab = self.colab_arg
         print(f"Colab Check: {self.on_colab}, TPU: {self.use_colab_tpu}")
         self.horde_share = False
         self._horde_pid = None

diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py
@@ -82,7 +82,7 @@ def _load(self, save_model: bool, initial_load: bool) -> None:
             "low_cpu_mem_usage": True,
         }
 
-        if self.use_4_bit:
+        if self.use_4_bit or utils.koboldai_vars.colab_arg:
             tf_kwargs.update({
                 "quantization_config":BitsAndBytesConfig(
                     load_in_4bit=True,