SUBANGKAR

flickr-img-only-imgcap-dict

Dec 1st, 2020
704
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import numpy as np
  2. import pandas as pd
  3. import matplotlib.pyplot as plt
  4. import seaborn as sns
  5. from pickle import dump
  6.  
  7. #Standard variable
  8. IMAGES_PATH = "data/Flicker8k_Dataset/"
  9. TOKENS_PATH = "data/Flickr8k_text/"
  10.  
  11. def image_to_caption_dict(image_path=IMAGES_PATH, tokens_path=TOKENS_PATH):
  12.     with open(tokens_path+"Flickr8k.token.txt") as f:
  13.         data = f.read()
  14.  
  15.     descriptions = dict()
  16.    
  17.     for el in data.strip().split("\n"):
  18.        
  19.         tokens = el.split()
  20.         image_id , image_desc = tokens[0],tokens[1:]
  21.  
  22.         # dropping .jpg from image id
  23.         image_id = image_id.split(".")[0]
  24.  
  25.         image_desc = " ".join(image_desc)
  26.        
  27.         if image_id in descriptions:
  28.             descriptions[image_id].append(image_desc)
  29.         else:
  30.             descriptions[image_id] = [image_desc]
  31.    
  32.     return descriptions
  33.  
  34. descriptions = image_to_caption_dict()
  35.  
  36.  
  37. descriptions["1000268201_693b08cb0e"]
  38.  
  39. tmp = list(descriptions.keys())[8090]
  40. from IPython.display import Image, display
  41. z = Image(filename=IMAGES_PATH+tmp+'.jpg')
  42. display(z)
  43.  
  44. for cap in descriptions[tmp]:
  45.     print(cap)
RAW Paste Data