Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- from sentence_transformers import SentenceTransformer
- from sentence_transformers import util
- from sqlalchemy import create_engine
- import torch
- df = pd.read_sql(query, con=engine)
- product_types = [
- "desk",
- "storage pedestal",
- "return shell",
- "wall mount overhead",
- "glass doors",
- "doors",
- "filing cabinet",
- "task chair",
- "conference table",
- "workstation",
- "accessory",
- "markerboard",
- "lectern",
- "fee",
- "surcharge",
- "freight",
- "dealer service",
- "dealer commission",
- "lateral file",
- "mattress",
- "lock",
- "power infeed",
- "power supply",
- "power module",
- "bracket",
- "grommet",
- "wiring",
- "endcap",
- "fabric tile",
- "hardware",
- "light fixture",
- "lamp"
- ]
- device = "cuda" if torch.cuda.is_available() else "cpu"
- model = SentenceTransformer("all-MiniLM-L12-v2", device=device)
- type_embeddings = model.encode(
- product_types,
- convert_to_tensor=True,
- normalize_embeddings=True
- )
- descriptions = df["desc_clean"].tolist()
- desc_embeddings = model.encode(
- descriptions,
- batch_size=256,
- convert_to_tensor=True,
- show_progress_bar=True,
- normalize_embeddings=True,
- )
- cos_scores = util.cos_sim(desc_embeddings, type_embeddings)
- best_type_idx = torch.argmax(cos_scores, dim=1).cpu().numpy()
- predicted_types = [product_types[i] for i in best_type_idx]
- df["product_type"] = predicted_types
- best_scores = torch.max(cos_scores, dim=1).values.cpu().numpy()
- df["type_confidence"] = best_scores
Advertisement
Add Comment
Please, Sign In to add comment