1、首先要解决图片向量化
可以使用用 OpenAI 的 CLIP 模型和 ViT 模型 openai/clip-vit-base-patch32行图片向量化比较相似性,你可以按照以下步骤进行:
准备图片和模型:
确保你有需要比较相似性的两张图片。下载并加载 CLIP-ViT 模型,如 openai/clip-vit-base-patch32。处理图片:
将图片调整为模型的输入大小和格式要求。通常 CLIP-ViT 模型的输入大小为 224x224 像素,并且可能需要归一化。生成图片向量:
使用 CLIP-ViT 模型将每张图片转换为其对应的向量表示。这一步骤需要使用模型的编程接口(例如 Python 中的 PyTorch 或 TensorFlow)来计算图片的特征向量。计算相似性:
使用生成的图片向量计算它们之间的相似性。通常可以使用余弦相似度或欧氏距离等指标来衡量向量之间的相似程度。这些步骤需要一定的编程知识和基础,特别是对模型的调用和图像处理。以下是一个简单的伪代码示例,演示了如何使用 Python 和 PyTorch 来加载 CLIP-ViT 模型并计算两张图片之间的相似性:
import torch
from PIL import Image
from torchvision import transforms
from clip import CLIP
# 加载模型
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CLIP(clip_model='ViT-B/32', jit=False).to(device)
model.eval()
# 图片处理和向量化
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
])
def image_to_vector(image_path):
image = Image.open(image_path)
image = transform(image).unsqueeze(0).to(device)
with torch.no_grad():
image_features = model.encode_image(image)
return image_features
# 例子:计算两张图片之间的相似性
image1_path = 'path/to/image1.jpg'
image2_path = 'path/to/image2.jpg'
vector1 = image_to_vector(image1_path)
vector2 = image_to_vector(image2_path)
# 计算余弦相似度
similarity = torch.cosine_similarity(vector1, vector2, dim=-1)
print(f"Similarity between the images: {similarity.item()}") 在这个例子中,image_to_vector 函数加载图片、处理大小和格式,然后使用 CLIP-ViT 模型计算图像的特征向量。最后,使用余弦相似度计算这两个向量的相似性。
2、图片向量插入qdrant向量数据库并搜索

client = QdrantClient("localhost", port=6333)
print("[INFO] Client created...")
root_dir = "new_dataset"
for subdir, dirs, files in os.walk(root_dir):
for file in files:
#look only for image files with jpeg extension
if file.endswith(".jpeg"):
image_path = os.path.join(subdir, file)
try:
image = Image.open(image_path)
image_dataset.append(image)
except Exception as e:
print(f"Error loading image {image_path}: {e}")
print("[INFO] Loading the model...")
model_name = "openai/clip-vit-base-patch32"
tokenizer = AutoTokenizer.from_pretrained(model_name)
processor = AutoProcessor.from_pretrained(model_name)
model = AutoModelForZeroShotImageClassification.from_pretrained(model_name)
print("[INFO] Creating qdrant data collection...")
client.create_collection(
collection_name="animals_img_db",
vectors_config=models.VectorParams(size=512, distance=models.Distance.COSINE),
)
print("[INFO] Creating a data collection...")
records = []
for idx, sample in tqdm(enumerate(image_dataset), total=len(image_dataset)):
processed_img = processor(text=None, images = sample, return_tensors="pt")['pixel_values']
img_embds = model.get_image_features(processed_img).detach().numpy().tolist()[0]
img_px = list(sample.getdata())
img_size = sample.size
records.append(models.Record(id=idx, vector=img_embds, payload={"pixel_lst":img_px, "img_size": img_size}))
for i in range(30,len(records), 30):
print(f"finished {i}")
client.upload_records(
collection_name="animals_img_db",
records=records[i-30:i],
)3、图片搜索
client = QdrantClient("localhost", port=6333)
print("[INFO] Client created...")
#loading the model
print("[INFO] Loading the model...")
model_name = "openai/clip-vit-base-patch32"
tokenizer = AutoTokenizer.from_pretrained(model_name)
processor = AutoProcessor.from_pretrained(model_name)
model = AutoModelForZeroShotImageClassification.from_pretrained(model_name)
# Gradio Interface
iface = gr.Interface(
title="Building a Reverse Image Search Engine Using Qdrant Vector Search",
description="by Niranjan Akella",
fn=process_text,
inputs=gr.Image(label="Input Image"),
outputs=gr.Gallery(label="Relevant Images"),
)
iface.launch()
def process_text(image):
processed_img = processor(text=None, images = image, return_tensors="pt")['pixel_values']
img_embeddings = model.get_image_features(processed_img).detach().numpy().tolist()[0]
hits = client.search(
collection_name="animals_img_db",
query_vector=img_embeddings,
limit=5,
)
images = []
for hit in hits:
img_size = tuple(hit.payload['img_size'])
pixel_lst = hit.payload['pixel_lst']
# Create an image from pixel data
new_image = Image.new("RGB", img_size)
new_image.putdata(list(map(lambda x: tuple(x), pixel_lst)))
images.append(new_image)
return images
最终效果
网友回复
如何让ai帮我自动在小红书或抖音上自动根据需求截流与潜在客户聊天拉客?
如果用go编写一个在virtualbox中启动的简单操作系统?
go如何搭建一个零信任网络?
如何用python实现一个公网代理访问软件?
如何用go实现一个公网代理访问软件?
如何用python实现一个内网穿透打洞程序,实现内网的80端口暴露到公网上可以访问?
如何用go实现一个内网穿透打洞程序,实现内网的80端口暴露到公网上可以访问?
何为Shadowsocks 代理?
python如何实现类似php的opendir目录相互隔离的fastcgi多租户虚拟空间?
nodejs如何实现类似php的opendir目录相互隔离的fastcgi多租户虚拟空间?


