JonathanGupton

Untitled

Dec 29th, 2025
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.60 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. from sentence_transformers import SentenceTransformer
  4. from sentence_transformers import util
  5. from sqlalchemy import create_engine
  6. import torch
  7.  
  8. df = pd.read_sql(query, con=engine)
  9.  
  10. product_types = [
  11. "desk",
  12. "storage pedestal",
  13. "return shell",
  14. "wall mount overhead",
  15. "glass doors",
  16. "doors",
  17. "filing cabinet",
  18. "task chair",
  19. "conference table",
  20. "workstation",
  21. "accessory",
  22. "markerboard",
  23. "lectern",
  24. "fee",
  25. "surcharge",
  26. "freight",
  27. "dealer service",
  28. "dealer commission",
  29. "lateral file",
  30. "mattress",
  31. "lock",
  32. "power infeed",
  33. "power supply",
  34. "power module",
  35. "bracket",
  36. "grommet",
  37. "wiring",
  38. "endcap",
  39. "fabric tile",
  40. "hardware",
  41. "light fixture",
  42. "lamp"
  43. ]
  44. device = "cuda" if torch.cuda.is_available() else "cpu"
  45. model = SentenceTransformer("all-MiniLM-L12-v2", device=device)
  46.  
  47. type_embeddings = model.encode(
  48. product_types,
  49. convert_to_tensor=True,
  50. normalize_embeddings=True
  51. )
  52.  
  53. descriptions = df["desc_clean"].tolist()
  54.  
  55. desc_embeddings = model.encode(
  56. descriptions,
  57. batch_size=256,
  58. convert_to_tensor=True,
  59. show_progress_bar=True,
  60. normalize_embeddings=True,
  61. )
  62.  
  63. cos_scores = util.cos_sim(desc_embeddings, type_embeddings)
  64.  
  65. best_type_idx = torch.argmax(cos_scores, dim=1).cpu().numpy()
  66.  
  67. predicted_types = [product_types[i] for i in best_type_idx]
  68.  
  69. df["product_type"] = predicted_types
  70.  
  71. best_scores = torch.max(cos_scores, dim=1).values.cpu().numpy()
  72. df["type_confidence"] = best_scores
Advertisement
Add Comment
Please, Sign In to add comment