ZinengTang
/

llava-lora-spatial

Model card Files Files and versions

ZinengTang commited on Nov 12, 2024

Commit

646928f

·

verified ·

1 Parent(s): 3147f67

Update README.md

Files changed (1) hide show

README.md +41 -1

README.md CHANGED Viewed

@@ -24,7 +24,7 @@ import torch
 base_model = LlavaForConditionalGeneration.from_pretrained(
     "llava-hf/llava-1.5-7b-hf",
     torch_dtype=torch.bfloat16
-)
 processor = AutoProcessor.from_pretrained("llava-hf/llava-1.5-7b-hf")
 # Load LoRA adapter
@@ -32,4 +32,44 @@ model = PeftModel.from_pretrained(
     base_model,
     "ZinengTang/llava-lora-spatial"
 )
 ```

 base_model = LlavaForConditionalGeneration.from_pretrained(
     "llava-hf/llava-1.5-7b-hf",
     torch_dtype=torch.bfloat16
+).to('cuda')
 processor = AutoProcessor.from_pretrained("llava-hf/llava-1.5-7b-hf")
 # Load LoRA adapter
     base_model,
     "ZinengTang/llava-lora-spatial"
 )
+from PIL import Image
+init_prompt_instruct = "Describe the location of the blue sphere relative to the environment features."
+conversation = [
+    {
+        "role": "user",
+        "content": [
+            {"type": "text", "text": init_prompt_instruct},
+            {"type": "image"},  # This will be replaced with the actual image
+        ],
+    },
+]
+speaker_image = Image.open('your_image_path')
+prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
+# print(prompt)
+# Process the input image and prompt
+inputs = processor(
+    images=speaker_image,
+    text=prompt,
+    return_tensors="pt",
+    max_length=256,
+).to('cuda')
+with torch.no_grad():
+    generated = model.generate(
+        input_ids=inputs["input_ids"],
+        attention_mask=inputs["attention_mask"],
+        pixel_values=inputs["pixel_values"],
+        max_length=512,
+        num_beams=1,
+        do_sample=True,
+        temperature=0.7
+    )
+    generated_message = processor.batch_decode(
+        generated,
+        skip_special_tokens=True
+    )
+    print(generated_message)
+    generated_message = generated_message[0].split('ASSISTANT: ')[-1][:100]
 ```