Add some prompts

Chiroyce1 · Mar 13, 2024 · ba58e96 · ba58e96
1 parent c14a88f
commit ba58e96
Show file tree

Hide file tree

Showing 7 changed files with 241 additions and 190 deletions.
diff --git a/index.html b/index.html
@@ -27,6 +27,7 @@ <h1>Gemini Pro Vision</h1>
       <div class="right">
         <input type="password" id="api" placeholder="Enter API key (get it from ai.google.dev)">
         <textarea id="prompt" placeholder="Enter prompt here" rows="4" cols="40"></textarea><br>
+        <select id="promptSelect"></select><br>
         <select id="cameraSelect"></select><br>
       </div>
       <div class="left">

diff --git a/prompts.json b/prompts.json
@@ -0,0 +1,11 @@
+[
+	{ "path": "default.txt", "description": "Default prompt" },
+	{
+		"path": "pic2loc.txt",
+		"description": "Determine the location of an image."
+	},
+	{
+		"path": "person.txt",
+		"description": "Identify of a person in an image."
+	}
+]
diff --git a/prompts/default.txt b/prompts/default.txt
@@ -0,0 +1 @@
+What do you see in this picture? Describe in detail, along with reasoning.
diff --git a/prompts/location.txt b/prompts/location.txt
@@ -0,0 +1,15 @@
+Identify and describe distinctive geographical features to determine the likely location.
+Consider natural landmarks, terrain characteristics, signboards, stores, roadsigns, or any unique elements visible. 
+Provide insights on the elements in the image, and make an informed guess about the location based on the identified features.
+If there are buildings or urban locations, try naming them or identifying them specifically.
+Example response in JSON:
+
+DO NOT return anything that is not there in the image, make sure ou confirm and check again to make sure
+that all elements returned do really exist.
+
+{
+	"city":"New York City", 
+	"region":"New York", 
+	"country":"USA", 
+	"elements": "NYC Taxi written on the taxi cabs, street sign saying '5th avenue', architecture of the skyscrapers, an exact Disney store in times square."
+}
diff --git a/prompts/person.txt b/prompts/person.txt
@@ -0,0 +1,7 @@
+Imagine you are a skilled individual with expertise in identifying famous personalities. 
+Develop a paragraph response that details the recognition of an individual in a given image.
+Provide information such as their name, occupation, and any distinctive features contributing to the identification. 
+Consider facial features, attire, and contextual cues. 
+Show an overall confidence score out of 100% ONLY at THE END of the paragraph.
+
+Confidence: 85%
diff --git a/script.js b/script.js
@@ -2,6 +2,7 @@ import { GoogleGenerativeAI } from "@google/generative-ai";
 
 const responseElement = document.getElementById("response");
 const cameraSelect = document.getElementById("cameraSelect");
+const promptSelect = document.getElementById("promptSelect");
 const promptInput = document.getElementById("prompt");
 const video = document.getElementById("webcam");
 const canvas = document.getElementById("canvas");
@@ -13,106 +14,121 @@ promptInput.value = `What do you see in this picture? Describe in detail, along
 const show = (text) => (responseElement.innerText = text);
 
 async function fileToGenerativePart(file) {
-  const base64EncodedDataPromise = new Promise((resolve) => {
-    const reader = new FileReader();
-    reader.onloadend = () => resolve(reader.result.split(",")[1]);
-    reader.readAsDataURL(file);
-  });
-  return {
-    inlineData: { data: await base64EncodedDataPromise, mimeType: file.type },
-  };
+	const base64EncodedDataPromise = new Promise((resolve) => {
+		const reader = new FileReader();
+		reader.onloadend = () => resolve(reader.result.split(",")[1]);
+		reader.readAsDataURL(file);
+	});
+	return {
+		inlineData: { data: await base64EncodedDataPromise, mimeType: file.type },
+	};
 }
 
 navigator.mediaDevices
-  .enumerateDevices()
-  .then((devices) => {
-    devices.forEach((device) => {
-      if (device.kind === "videoinput") {
-        const option = document.createElement("option");
-        option.value = device.deviceId;
-        option.text =
-          device.label || `Camera ${cameraSelect.options.length + 1}`;
-        cameraSelect.add(option);
-      }
-    });
-  })
-  .catch((error) => {
-    show(`Error enumerating devices: ${error}`);
-    console.error(`Error enumerating devices: ${error}`);
-  });
+	.enumerateDevices()
+	.then((devices) => {
+		devices.forEach((device) => {
+			if (device.kind === "videoinput") {
+				const option = document.createElement("option");
+				option.value = device.deviceId;
+				option.text =
+					device.label || `Camera ${cameraSelect.options.length + 1}`;
+				cameraSelect.add(option);
+			}
+		});
+	})
+	.catch((error) => {
+		show(`Error enumerating devices: ${error}`);
+		console.error(`Error enumerating devices: ${error}`);
+	});
 
 cameraSelect.addEventListener("change", setCamera);
 
 function setCamera() {
-  const selectedCameraId = cameraSelect.value;
-  navigator.mediaDevices
-    .getUserMedia({
-      video: { deviceId: selectedCameraId },
-    })
-    .then((stream) => {
-      video.srcObject = stream;
-    })
-    .catch((error) => {
-      console.error(`Error accessing webcam: ${error}`);
-      show(`Error accessing webcam: ${error}`);
-    });
+	const selectedCameraId = cameraSelect.value;
+	navigator.mediaDevices
+		.getUserMedia({
+			video: { deviceId: selectedCameraId },
+		})
+		.then((stream) => {
+			video.srcObject = stream;
+		})
+		.catch((error) => {
+			console.error(`Error accessing webcam: ${error}`);
+			show(`Error accessing webcam: ${error}`);
+		});
 }
 
 async function captureImage() {
-  if (active) return;
-  context.drawImage(video, 0, 0, canvas.width, canvas.height);
-  const imageDataURL = canvas.toDataURL("image/jpeg");
-  const imageFile = new File([dataURItoBlob(imageDataURL)], "image.jpg", {
-    type: "image/jpeg",
-  });
-  const image = await fileToGenerativePart(imageFile);
-  const API_KEY = document.querySelector("#api").value;
-  if (API_KEY.trim() === "") {
-    show("Please provide an API_KEY.");
-    return;
-  }
-  // Top class error handling
-  let genAI;
-  try {
-    genAI = new GoogleGenerativeAI(API_KEY);
-  } catch (e) {
-    show(`Oops something went wrong.\nError: ${e}`);
-  }
+	if (active) return;
+	context.drawImage(video, 0, 0, canvas.width, canvas.height);
+	const imageDataURL = canvas.toDataURL("image/jpeg");
+	const imageFile = new File([dataURItoBlob(imageDataURL)], "image.jpg", {
+		type: "image/jpeg",
+	});
+	const image = await fileToGenerativePart(imageFile);
+	const API_KEY = document.querySelector("#api").value;
+	if (API_KEY.trim() === "") {
+		show("Please provide an API_KEY.");
+		return;
+	}
+	// Top class error handling
+	let genAI;
+	try {
+		genAI = new GoogleGenerativeAI(API_KEY);
+	} catch (e) {
+		show(`Oops something went wrong.\nError: ${e}`);
+	}
 
-  const model = genAI.getGenerativeModel({ model: "gemini-pro-vision" });
-  show("Loading... (this can take upto 30s)");
-  let res;
-  active = true;
-  try {
-    res = await model.generateContentStream([promptInput.value, image]);
-    let text = "";
-    for await (const chunk of res.stream) {
-      text += chunk.text();
-      show(text);
-    }
-  } catch (e) {
-    console.error(e);
-    show(`Oops something went wrong.\nError: ${e.toString()}`);
-    active = false;
-    return;
-  }
+	const model = genAI.getGenerativeModel({ model: "gemini-pro-vision" });
+	show("Loading... (this can take upto 30s)");
+	let res;
+	active = true;
+	try {
+		res = await model.generateContentStream([promptInput.value, image]);
+		let text = "";
+		for await (const chunk of res.stream) {
+			text += chunk.text();
+			show(text);
+		}
+	} catch (e) {
+		console.error(e);
+		show(`Oops something went wrong.\nError: ${e.toString()}`);
+		active = false;
+		return;
+	}
 
-  active = false;
+	active = false;
 }
 
 function dataURItoBlob(dataURI) {
-  // Thanks to ChatGPT for this
-  const byteString = atob(dataURI.split(",")[1]);
-  const mimeString = dataURI.split(",")[0].split(":")[1].split(";")[0];
-  const arrayBuffer = new ArrayBuffer(byteString.length);
-  const uint8Array = new Uint8Array(arrayBuffer);
+	// Thanks to ChatGPT for this
+	const byteString = atob(dataURI.split(",")[1]);
+	const mimeString = dataURI.split(",")[0].split(":")[1].split(";")[0];
+	const arrayBuffer = new ArrayBuffer(byteString.length);
+	const uint8Array = new Uint8Array(arrayBuffer);
 
-  for (let i = 0; i < byteString.length; i++) {
-    uint8Array[i] = byteString.charCodeAt(i);
-  }
+	for (let i = 0; i < byteString.length; i++) {
+		uint8Array[i] = byteString.charCodeAt(i);
+	}
 
-  return new Blob([arrayBuffer], { type: mimeString });
+	return new Blob([arrayBuffer], { type: mimeString });
 }
 
+fetch("./prompts.json")
+	.then((prompts) => prompts.json())
+	.then((prompts) => {
+		console.log(prompts);
+		prompts.forEach(async (prompt) => {
+			const option = document.createElement("option");
+			option.text = prompt.description;
+			option.value = await (await fetch(`./prompts/${prompt.path}`)).text();
+			promptSelect.add(option);
+		});
+	});
 setCamera();
+
+promptSelect.addEventListener("change", (e) => {
+	document.querySelector("#prompt").value = promptSelect.value;
+});
 document.querySelector("button").addEventListener("click", captureImage);