modelscope · Jintao-Huang · Jan 24, 2025 · Jan 24, 2025 · Jan 24, 2025 · Jan 24, 2025
diff --git a/examples/infer/demo_grounding.py b/examples/infer/demo_grounding.py
@@ -0,0 +1,38 @@
+import os
+import re
+from typing import Literal
+
+from swift.llm import load_image
+
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+
+def draw_bbox(image, response):
+    matchs = re.findall(
+        r'<\|object_ref_start\|>(.*?)<\|object_ref_end\|><\|box_start\|>\((\d+),(\d+)\),\((\d+),(\d+)\)<\|box_end\|>',
+        response)
+
+
+def infer_grounding():
+    from swift.llm import (PtEngine, RequestConfig, AdapterRequest, get_template, BaseArguments, InferRequest,
+                           safe_snapshot_download, get_model_tokenizer)
+    from swift.tuners import Swift
+    image = load_image('http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/animal.png')
+    infer_request = InferRequest(messages=[{'role': 'user', 'content': 'Task: Object Detection'}], images=[image])
+
+    request_config = RequestConfig(max_tokens=512, temperature=0)
+    adapter_path = safe_snapshot_download(
+        '/mnt/nas2/huangjintao.hjt/work/llmscope/output/v92-20250126-173609/checkpoint-1237')
+    args = BaseArguments.from_pretrained(adapter_path)
+
+    engine = PtEngine(args.model, adapters=[adapter_path])
+    resp_list = engine.infer([infer_request], request_config)
+    response = resp_list[0].choices[0].message.content
+    print(f'lora-response: {response}')
+
+    new_image = draw_bbox(image, response)
+    new_image.save('animal_bbox.png')
+
+
+if __name__ == '__main__':
+    infer_grounding()
diff --git a/swift/llm/template/grounding.py b/swift/llm/template/grounding.py
@@ -1,6 +1,13 @@
 from typing import Any, Dict, List, Literal
 
-from PIL import Image
+from PIL import Image, ImageDraw
+
+
+def draw_bbox(image: Image.Image, objects: Dict[str, List[Any]], bbox_type: Literal['norm1000', 'none'] = 'norm1000'):
+    normalize_bbox([image], objects, bbox_type)
+    bbox = objects[0]['bbox']
+    draw = ImageDraw.Draw(image)
+    draw.rectangle(bbox, outline='red', width=2)
 
 
 def normalize_bbox(images: List[Image.Image],