Untitled

from sklearn import cluster, covariance
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn import metrics
from sklearn import preprocessing

# List of tickers
cfdtickers = [
    'AUDNZD=X', 'AUDCAD=X', 'AUDCHF=X', 'AUDJPY=X', 'CHFJPY=X',
    'EURGBP=X', 'EURAUD=X', 'EURJPY=X', 'EURCHF=X', 'EURNZD=X',
    'EURCAD=X', 'GBPCHF=X', 'GBPJPY=X', 'CADCHF=X', 'CADJPY=X',
    'GBPAUD=X', 'GBPCAD=X', 'GBPNZD=X', 'NZDCAD=X', 'NZDCHF=X',
    'NZDJPY=X', 'NZDUSD=X', 'USDSGD=X', 'EURUSD=X',
]

# Downloading data for each ticker and extracting the 'Close' column
data = {ticker: yf.download(ticker, start="2023-01-01", end="2024-01-01")['Close'] for ticker in cfdtickers}

# Creating a DataFrame from the downloaded data
df = pd.DataFrame(data)
df = df.pct_change()
df = df.ffill()
df = df.bfill()
df = df.dropna()

# Standardize the data
X = df.copy()
X = preprocessing.StandardScaler().fit_transform(X)

# Calculate the covariance matrix
edge_model = covariance.GraphicalLassoCV(verbose=True)
edge_model.fit(X)
covariance_matrix = edge_model.covariance_

# Perform clustering using affinity propagation on the covariance matrix
_, labels = cluster.affinity_propagation(covariance_matrix)

# Print the labels and the number of labels
print("Labels:", labels)
print("Number of clusters:", len(np.unique(labels)))

# Group tickers by clusters
n_labels = labels.max()
names = np.array(df.columns.tolist())

sectorTickers = {f'Cluster {i+1}': ', '.join(names[labels == i]) for i in range(n_labels + 1)}
print("Sector Tickers:", sectorTickers)

# Calculate the silhouette score
flattened_covariance_matrix = covariance_matrix.flatten().reshape(-1, 1)
flattened_labels = np.repeat(labels, covariance_matrix.shape[0])

silhouette_score = metrics.silhouette_score(flattened_covariance_matrix, flattened_labels)
print("Silhouette Score:", silhouette_score)