Skip to content

Commit

Permalink
Add some prompts
Browse files Browse the repository at this point in the history
  • Loading branch information
Chiroyce1 committed Mar 13, 2024
1 parent c14a88f commit ba58e96
Show file tree
Hide file tree
Showing 7 changed files with 241 additions and 190 deletions.
1 change: 1 addition & 0 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ <h1>Gemini Pro Vision</h1>
<div class="right">
<input type="password" id="api" placeholder="Enter API key (get it from ai.google.dev)">
<textarea id="prompt" placeholder="Enter prompt here" rows="4" cols="40"></textarea><br>
<select id="promptSelect"></select><br>
<select id="cameraSelect"></select><br>
</div>
<div class="left">
Expand Down
11 changes: 11 additions & 0 deletions prompts.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[
{ "path": "default.txt", "description": "Default prompt" },
{
"path": "pic2loc.txt",
"description": "Determine the location of an image."
},
{
"path": "person.txt",
"description": "Identify of a person in an image."
}
]
1 change: 1 addition & 0 deletions prompts/default.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
What do you see in this picture? Describe in detail, along with reasoning.
15 changes: 15 additions & 0 deletions prompts/location.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Identify and describe distinctive geographical features to determine the likely location.
Consider natural landmarks, terrain characteristics, signboards, stores, roadsigns, or any unique elements visible.
Provide insights on the elements in the image, and make an informed guess about the location based on the identified features.
If there are buildings or urban locations, try naming them or identifying them specifically.
Example response in JSON:

DO NOT return anything that is not there in the image, make sure ou confirm and check again to make sure
that all elements returned do really exist.

{
"city":"New York City",
"region":"New York",
"country":"USA",
"elements": "NYC Taxi written on the taxi cabs, street sign saying '5th avenue', architecture of the skyscrapers, an exact Disney store in times square."
}
7 changes: 7 additions & 0 deletions prompts/person.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Imagine you are a skilled individual with expertise in identifying famous personalities.
Develop a paragraph response that details the recognition of an individual in a given image.
Provide information such as their name, occupation, and any distinctive features contributing to the identification.
Consider facial features, attire, and contextual cues.
Show an overall confidence score out of 100% ONLY at THE END of the paragraph.

Confidence: 85%
180 changes: 98 additions & 82 deletions script.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { GoogleGenerativeAI } from "@google/generative-ai";

const responseElement = document.getElementById("response");
const cameraSelect = document.getElementById("cameraSelect");
const promptSelect = document.getElementById("promptSelect");
const promptInput = document.getElementById("prompt");
const video = document.getElementById("webcam");
const canvas = document.getElementById("canvas");
Expand All @@ -13,106 +14,121 @@ promptInput.value = `What do you see in this picture? Describe in detail, along
const show = (text) => (responseElement.innerText = text);

async function fileToGenerativePart(file) {
const base64EncodedDataPromise = new Promise((resolve) => {
const reader = new FileReader();
reader.onloadend = () => resolve(reader.result.split(",")[1]);
reader.readAsDataURL(file);
});
return {
inlineData: { data: await base64EncodedDataPromise, mimeType: file.type },
};
const base64EncodedDataPromise = new Promise((resolve) => {
const reader = new FileReader();
reader.onloadend = () => resolve(reader.result.split(",")[1]);
reader.readAsDataURL(file);
});
return {
inlineData: { data: await base64EncodedDataPromise, mimeType: file.type },
};
}

navigator.mediaDevices
.enumerateDevices()
.then((devices) => {
devices.forEach((device) => {
if (device.kind === "videoinput") {
const option = document.createElement("option");
option.value = device.deviceId;
option.text =
device.label || `Camera ${cameraSelect.options.length + 1}`;
cameraSelect.add(option);
}
});
})
.catch((error) => {
show(`Error enumerating devices: ${error}`);
console.error(`Error enumerating devices: ${error}`);
});
.enumerateDevices()
.then((devices) => {
devices.forEach((device) => {
if (device.kind === "videoinput") {
const option = document.createElement("option");
option.value = device.deviceId;
option.text =
device.label || `Camera ${cameraSelect.options.length + 1}`;
cameraSelect.add(option);
}
});
})
.catch((error) => {
show(`Error enumerating devices: ${error}`);
console.error(`Error enumerating devices: ${error}`);
});

cameraSelect.addEventListener("change", setCamera);

function setCamera() {
const selectedCameraId = cameraSelect.value;
navigator.mediaDevices
.getUserMedia({
video: { deviceId: selectedCameraId },
})
.then((stream) => {
video.srcObject = stream;
})
.catch((error) => {
console.error(`Error accessing webcam: ${error}`);
show(`Error accessing webcam: ${error}`);
});
const selectedCameraId = cameraSelect.value;
navigator.mediaDevices
.getUserMedia({
video: { deviceId: selectedCameraId },
})
.then((stream) => {
video.srcObject = stream;
})
.catch((error) => {
console.error(`Error accessing webcam: ${error}`);
show(`Error accessing webcam: ${error}`);
});
}

async function captureImage() {
if (active) return;
context.drawImage(video, 0, 0, canvas.width, canvas.height);
const imageDataURL = canvas.toDataURL("image/jpeg");
const imageFile = new File([dataURItoBlob(imageDataURL)], "image.jpg", {
type: "image/jpeg",
});
const image = await fileToGenerativePart(imageFile);
const API_KEY = document.querySelector("#api").value;
if (API_KEY.trim() === "") {
show("Please provide an API_KEY.");
return;
}
// Top class error handling
let genAI;
try {
genAI = new GoogleGenerativeAI(API_KEY);
} catch (e) {
show(`Oops something went wrong.\nError: ${e}`);
}
if (active) return;
context.drawImage(video, 0, 0, canvas.width, canvas.height);
const imageDataURL = canvas.toDataURL("image/jpeg");
const imageFile = new File([dataURItoBlob(imageDataURL)], "image.jpg", {
type: "image/jpeg",
});
const image = await fileToGenerativePart(imageFile);
const API_KEY = document.querySelector("#api").value;
if (API_KEY.trim() === "") {
show("Please provide an API_KEY.");
return;
}
// Top class error handling
let genAI;
try {
genAI = new GoogleGenerativeAI(API_KEY);
} catch (e) {
show(`Oops something went wrong.\nError: ${e}`);
}

const model = genAI.getGenerativeModel({ model: "gemini-pro-vision" });
show("Loading... (this can take upto 30s)");
let res;
active = true;
try {
res = await model.generateContentStream([promptInput.value, image]);
let text = "";
for await (const chunk of res.stream) {
text += chunk.text();
show(text);
}
} catch (e) {
console.error(e);
show(`Oops something went wrong.\nError: ${e.toString()}`);
active = false;
return;
}
const model = genAI.getGenerativeModel({ model: "gemini-pro-vision" });
show("Loading... (this can take upto 30s)");
let res;
active = true;
try {
res = await model.generateContentStream([promptInput.value, image]);
let text = "";
for await (const chunk of res.stream) {
text += chunk.text();
show(text);
}
} catch (e) {
console.error(e);
show(`Oops something went wrong.\nError: ${e.toString()}`);
active = false;
return;
}

active = false;
active = false;
}

function dataURItoBlob(dataURI) {
// Thanks to ChatGPT for this
const byteString = atob(dataURI.split(",")[1]);
const mimeString = dataURI.split(",")[0].split(":")[1].split(";")[0];
const arrayBuffer = new ArrayBuffer(byteString.length);
const uint8Array = new Uint8Array(arrayBuffer);
// Thanks to ChatGPT for this
const byteString = atob(dataURI.split(",")[1]);
const mimeString = dataURI.split(",")[0].split(":")[1].split(";")[0];
const arrayBuffer = new ArrayBuffer(byteString.length);
const uint8Array = new Uint8Array(arrayBuffer);

for (let i = 0; i < byteString.length; i++) {
uint8Array[i] = byteString.charCodeAt(i);
}
for (let i = 0; i < byteString.length; i++) {
uint8Array[i] = byteString.charCodeAt(i);
}

return new Blob([arrayBuffer], { type: mimeString });
return new Blob([arrayBuffer], { type: mimeString });
}

fetch("./prompts.json")
.then((prompts) => prompts.json())
.then((prompts) => {
console.log(prompts);
prompts.forEach(async (prompt) => {
const option = document.createElement("option");
option.text = prompt.description;
option.value = await (await fetch(`./prompts/${prompt.path}`)).text();
promptSelect.add(option);
});
});
setCamera();

promptSelect.addEventListener("change", (e) => {
document.querySelector("#prompt").value = promptSelect.value;
});
document.querySelector("button").addEventListener("click", captureImage);
Loading

0 comments on commit ba58e96

Please sign in to comment.