docs (#792)

huggingface · Oct 15, 2024 · 797ed58 · 797ed58
1 parent b6cc9f7
commit 797ed58
Show file tree

Hide file tree

Showing 24 changed files with 375 additions and 184 deletions.
diff --git a/configs/image_classification/local.yml b/configs/image_classification/local.yml
@@ -10,7 +10,7 @@ data:
   valid_split: null
   column_mapping:
     image_column: image
-    target_column: labels
+    target_column: label
 
 params:
   epochs: 2

diff --git a/configs/llm_finetuning/qwen.yml b/configs/llm_finetuning/qwen.yml
@@ -0,0 +1,34 @@
+task: llm-sft
+base_model: Qwen/Qwen2.5-Coder-7B-Instruct
+project_name: autotrain-qwen-finetune
+log: tensorboard
+backend: local
+
+data:
+  path: HuggingFaceH4/no_robots
+  train_split: test
+  valid_split: null
+  chat_template: tokenizer
+  column_mapping:
+    text_column: messages
+
+params:
+  block_size: 2048
+  model_max_length: 4096
+  epochs: 1
+  batch_size: 1
+  lr: 1e-5
+  peft: true
+  quantization: int4
+  target_modules: all-linear
+  padding: right
+  optimizer: adamw_torch
+  scheduler: cosine
+  gradient_accumulation: 1
+  mixed_precision: fp16
+  merge_adapter: true
+
+hub:
+  username: ${HF_USERNAME}
+  token: ${HF_TOKEN}
+  push_to_hub: true
diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
@@ -10,20 +10,12 @@
   title: Getting Started
 - sections:
   - local: quickstart_spaces
-    title: Quickstart
-  title: AutoTrain on Hugging Face Spaces
-- sections:
+    title: Train on Spaces
   - local: quickstart
-    title: Quickstart
+    title: Train Locally
   - local: config
-    title: Configurations
-  title: Use AutoTrain Locally
-- sections:
-  - local: col_map
-    title: Understanding Column Mapping
-  - local: autotrain_api
-    title: AutoTrain API
-  title: Miscellaneous
+    title: Config File
+  title: Quickstart
 - sections:
   - local: tasks/llm_finetuning
     title: LLM Finetuning
@@ -33,10 +25,8 @@
     title: Extractive QA
   - local: tasks/sentence_transformer
     title: Sentence Transformer
-  - local: tasks/image_classification
-    title: Image Classification
-  - local: tasks/image_regression
-    title: Image Scoring/Regression
+  - local: tasks/image_classification_regression
+    title: Image Classification / Regression
   - local: tasks/object_detection
     title: Object Detection
   - local: tasks/dreambooth
@@ -49,20 +39,8 @@
     title: Tabular
   title: Tasks
 - sections:
-  - local: params/extractive_qa_params
-    title: Extractive QA
-  - local: params/image_classification_params
-    title: Image Classification
-  - local: params/image_regression_params
-    title: Image Scoring/Regression
-  - local: params/object_detection_params
-    title: Object Detection
-  - local: params/dreambooth_params
-    title: DreamBooth
-  - local: params/seq2seq_params
-    title: Seq2Seq
-  - local: params/token_classification_params
-    title: Token Classification
-  - local: params/tabular_params
-    title: Tabular
-  title: Parameters
+  - local: col_map
+    title: Understanding Column Mapping
+  - local: autotrain_api
+    title: AutoTrain API
+  title: Miscellaneous
diff --git a/docs/source/index.mdx b/docs/source/index.mdx
@@ -1,4 +1,4 @@
-# What is AutoTrain Advanced?
+# AutoTrain
 
 ![autotrain-homepage](https://raw.githubusercontent.com/huggingface/autotrain-advanced/main/static/autotrain_homepage.png)
 

diff --git a/docs/source/params/dreambooth_params.mdx b/docs/source/params/dreambooth_params.mdx
diff --git a/docs/source/params/extractive_qa_params.mdx b/docs/source/params/extractive_qa_params.mdx
diff --git a/docs/source/params/image_classification_params.mdx b/docs/source/params/image_classification_params.mdx
diff --git a/docs/source/params/image_regression_params.mdx b/docs/source/params/image_regression_params.mdx
diff --git a/docs/source/params/object_detection_params.mdx b/docs/source/params/object_detection_params.mdx
diff --git a/docs/source/params/seq2seq_params.mdx b/docs/source/params/seq2seq_params.mdx
diff --git a/docs/source/params/tabular_params.mdx b/docs/source/params/tabular_params.mdx
diff --git a/docs/source/params/token_classification_params.mdx b/docs/source/params/token_classification_params.mdx
diff --git a/docs/source/tasks/dreambooth.mdx b/docs/source/tasks/dreambooth.mdx
@@ -33,3 +33,8 @@ This token acts as a unique identifier for your subject within the model.
 Typically, you will use a simple, descriptive keyword like prompt in the parameters 
 section of your training setup. This token will be used to generate new images of 
 your subject by the model.
+
+
+## Parameters
+
+[[autodoc]] trainers.dreambooth.params.DreamBoothTrainingParams
diff --git a/docs/source/tasks/extractive_qa.mdx b/docs/source/tasks/extractive_qa.mdx
@@ -28,4 +28,59 @@ Note: the preferred format for question answering is JSONL, if you want to use C
 Example dataset from Hugging Face Hub: [lhoestq/squad](https://huggingface.co/datasets/lhoestq/squad)
 
 
-P.S. You can use both squad and squad v2 data format with correct column mappings.
+P.S. You can use both squad and squad v2 data format with correct column mappings.
+
+## Training Locally
+
+To train an Extractive QA model locally, you need a config file:
+
+```yaml
+task: extractive-qa
+base_model: google-bert/bert-base-uncased
+project_name: autotrain-bert-ex-qa1
+log: tensorboard
+backend: local
+
+data:
+  path: lhoestq/squad
+  train_split: train
+  valid_split: validation
+  column_mapping:
+    text_column: context
+    question_column: question
+    answer_column: answers
+
+params:
+  max_seq_length: 512
+  max_doc_stride: 128
+  epochs: 3
+  batch_size: 4
+  lr: 2e-5
+  optimizer: adamw_torch
+  scheduler: linear
+  gradient_accumulation: 1
+  mixed_precision: fp16
+
+hub:
+  username: ${HF_USERNAME}
+  token: ${HF_TOKEN}
+  push_to_hub: true
+```
+
+To train the model, run the following command:
+
+```bash
+$ autotrain --config config.yaml
+```
+
+Here, we are training a BERT model on the SQuAD dataset using the Extractive QA task. The model is trained for 3 epochs with a batch size of 4 and a learning rate of 2e-5. The training process is logged using TensorBoard. The model is trained locally and pushed to the Hugging Face Hub after training.
+
+## Training on the Hugging Face Spaces
+
+![AutoTrain Extractive Question Answering on Hugging Face Spaces](https://raw.githubusercontent.com/huggingface/autotrain-advanced/main/static/ext_qa.png)
+
+As always, pay special attention to column mapping.
+
+## Parameters
+
+[[autodoc]] trainers.extractive_question_answering.params.ExtractiveQuestionAnsweringParams
diff --git a/docs/source/tasks/image_classification.mdx b/docs/source/tasks/image_classification.mdx