Untitled

import gensim
import spacy
import random

# Load the pre-trained models
nlp_en = spacy.load('en_core_web_md')
nlp_pl = spacy.load('pl_core_news_md')
model = gensim.models.KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary=True)

def generate_titles(title, tags, num_titles):
    # Split the title into words
    words = title.split()

    # Determine the language of the title
    language = detect_language(title)

    # Use the appropriate SpaCy model based on the language
    if language == 'en':
        nlp = nlp_en
    elif language == 'pl':
        nlp = nlp_pl
    else:
        raise ValueError("Unsupported language: {}".format(language))

    # Generate alternative titles
    alternative_titles = []
    for i in range(num_titles):
        alternative_title = []
        for word in words:
            if word.lower() in tags:
                # Replace the tag with a synonym
                synonym = find_synonym(word, language)
                if synonym is not None:
                    alternative_title.append(synonym)
                else:
                    alternative_title.append(word)
            else:
                alternative_title.append(word)
        # Convert the list of words back into a string
        alternative_title = " ".join(alternative_title)
        alternative_titles.append(alternative_title)
    return alternative_titles

def detect_language(text):
    # Use SpaCy to detect the language of the text
    doc = nlp_en(text)
    lang = doc.lang_
    return lang

def find_synonym(word, language):
    # Find a synonym for the word using the pre-trained word2vec model
    try:
        synonyms = model.most_similar(positive=[word], topn=10)
        for syn, sim in synonyms:
            if detect_language(syn) == language:
                return syn
        return None
    except KeyError:
        return None

def generate_tags(title):
    # Use SpaCy to extract named entities from the title
    doc = nlp_en(title)
    named_entities = [ent.text for ent in doc.ents]

    # Find the most similar words to each named entity using the pre-trained word2vec model
    tags = []
    for entity in named_entities:
        try:
            synonyms = model.most_similar(positive=[entity], topn=5)
            for syn, sim in synonyms:
                if syn.lower() not in tags:
                    tags.append(syn.lower())
        except KeyError:
            pass
    return tags

# Example usage
title = "How to make a cake"
num_titles = 10
alternative_titles = generate_titles(title, generate_tags(title), num_titles)
tags = generate_tags(title)
print("Original title: {}".format(title))
print("Tags: {}".format(tags))
print("Generated titles:")
for i, alt_title in enumerate(alternative_titles):
    print("{}. {}".format(i+1, alt_title))