Code:
import pandas as pd

df = pd.read_csv('data/data.csv')

# Remove Duplicates
df = df.drop_duplicates()

# Handle Missing Values
df["Age"] = df["Age"].fillna(df["Age"].median())
df = df.dropna(subset=["Salary", "Join_Date"])

# Data Type Conversion
df["Join_Date"] = pd.to_datetime(df["Join_Date"], errors="coerce")

# Simple Outlier Detection
df.loc[df["Age"] > 100, "Age"] = df["Age"].median()

print("Cleaned data:\n", df)
    printf("Bubble: %d\n", bubble_sort(arr2, n));
    memcpy(arr2, arr, n*sizeof(int));
    printf("Selection: %d\n", selection_sort(arr2, n));
    memcpy(arr2, arr, n*sizeof(int));
    printf("Insertion: %d\n", insertion_sort(arr2, n));

    printf("\nSorted array: \n");
    for(int i=0; i<n; i++) {
        printf("%d, ", arr2[i]); 
    }

    printf("\n\n");
    return 0;
}


--------------------------------------------------

from collections import defaultdict
from itertools import combinations

def generate_candidates(lk_1, k):
    result = set()
    list_lk = list(lk_1)
    for i in range(len(list_lk)):
        for j in range(i + 1, len(list_lk)):
            union_set = list_lk[i] | list_lk[j]
            if len(union_set) == k:
                if all(frozenset(s) in lk_1 for s in combinations(union_set, k - 1)):
                    result.add(union_set)
    return result


def apriori(transactions_list, min_support):
    counts = defaultdict(int)
    for t in transactions_list:
        for item in t:
            counts[frozenset([item])] += 1
    l_k = {itemset for itemset, count in counts.items() if count >= min_support}
    freq_itemsets = {itemset: counts[itemset] for itemset in l_k}
    k = 2
    while l_k:
        c_k = generate_candidates(l_k, k)
        if not c_k: break
        candidate_counts = defaultdict(int)
        for t in map(set, transactions_list):
            for candidate in c_k:
                if candidate.issubset(t):
                    candidate_counts[candidate] += 1
        l_k = {c for c in c_k if candidate_counts[c] >= min_support}
        freq_itemsets.update({itemset: candidate_counts[itemset] for itemset in l_k})
        k += 1
    return freq_itemsets


def generate_rules(freq_itemsets, min_confidence):
    rules = []
    for itemset, count in freq_itemsets.items():
        if len(itemset) > 1:
            for i in range(1, len(itemset)):
                for antecedent in map(frozenset, combinations(itemset, i)):
                    consequent = itemset - antecedent
                    confidence = count / freq_itemsets[antecedent]
                    if confidence >= min_confidence:
                        rules.append((antecedent, consequent, confidence))
    return rules

transactions = [
    ['Sausage', 'Peanut', 'Beer'],
    ['Peanut', 'Beer', 'Apple'],
    ['Apple', 'Milk'],
    ['Sausage', 'Peanut', 'Apple'],
    ['Sausage', 'Peanut', 'Beer', 'Milk'],
    ['Sausage', 'Peanut', 'Beer', 'Apple']
]

min_sup, min_conf = 3, 0.7
freq_itemsets = apriori(transactions, min_sup)
rules = generate_rules(freq_itemsets, min_conf)

print("Association Rules:")
for ant, cons, conf in rules:
    print(f"{sorted(list(ant))} => {sorted(list(cons))} (Conf: {conf:.2f})")


--------------------------------------------------

from collections import Counter
import pandas as pd

class TreeNode:
    def __init__(self, name, count, parent):
        self.name = name
        self.count = count
        self.parent = parent
        self.children = {}
        self.node_link = None


def build_tree(data, min_sup):
    counts = Counter(item for trans in data for item in trans)
    header = {k: [v, None] for k, v in counts.items() if v >= min_sup}
    if not header: return None, None

    root = TreeNode("Null", 1, None)
    for trans in data:
        items = sorted([i for i in trans if i in header], 
                       key=lambda x: header[x][0], reverse=True)
        current = root
        for item in items:
            if item not in current.children:
                new_node = TreeNode(item, 0, current)
                current.children[item] = new_node
                # Update node link
                if header[item][1] is None: header[item][1] = new_node
                else:
                    ptr = header[item][1]
                    while ptr.node_link: ptr = ptr.node_link
                    ptr.node_link = new_node
            current = current.children[item]
            current.count += 1
    return root, header


def mine_tree(header, min_sup, prefix, found):
    for item, (count, node_ptr) in sorted(header.items(), key=lambda x: x[1][0]):
        new_set = prefix | {item}
        found.append((new_set, count))
        
        # Find prefix paths
        paths = []
        curr_node = node_ptr
        while curr_node:
            path, parent = [], curr_node.parent
            while parent and parent.name != "Null":
                path.append(parent.name)
                parent = parent.parent
            if path: 
                paths.extend([path] * curr_node.count)
            curr_node = curr_node.node_link
            
        cond_tree, cond_header = build_tree(paths, min_sup)
        if cond_header: 
            mine_tree(cond_header, min_sup, new_set, found)

# Execution
transactions = [
    ['Sausage', 'Peanut', 'Beer'],
    ['Peanut', 'Beer', 'Apple'],
    ['Apple', 'Milk'],
    ['Sausage', 'Peanut', 'Apple'],
    ['Sausage', 'Peanut', 'Beer', 'Milk'],
    ['Sausage', 'Peanut', 'Beer', 'Apple']
]

tree, header = build_tree(transactions, 2)
patterns = []
mine_tree(header, 3, set(), patterns)

df = pd.DataFrame(patterns, columns=["Itemset", "Frequency"])
print(df.sort_values("Frequency", ascending=False).reset_index(drop=True))


--------------------------------------------------



import pandas as pd
import numpy as np

def get_entropy(s):
    p = s.value_counts(normalize=True)
    return -(p * np.log2(p)).sum()

def build_tree(df, target, feats):
    vals = df[target].unique()
    if len(vals) == 1: return vals[0]
    if not feats: return df[target].mode()[0]

    # Find best feature using Information Gain
    base_ent = get_entropy(df[target])
    gains = {}
    for f in feats:
        w_ent = sum(len(sub)/len(df) * get_entropy(sub[target]) 
                    for _, sub in df.groupby(f))
        gains[f] = base_ent - w_ent
    
    best = max(gains, key=gains.get)
    remaining = [i for i in feats if i != best]
    
    # Recursive tree building
    return {best: {v: build_tree(sub, target, remaining) 
                   for v, sub in df.groupby(best)}}

def predict(tree, query):
    if not isinstance(tree, dict): return tree
    root = next(iter(tree))
    val = query.get(root)
    return predict(tree[root][val], query) if val in tree[root] else "Unknown"


df = pd.read_csv('data/class.csv')
target_col = 'Play'
features = [c for c in df.columns if c != target_col]

tree = build_tree(df, target_col, features)
print("Tree Structure:", tree)

query = {'Outlook': 'Rain', 'Temp': 'Cool', 'Humidity': 'Normal', 'Wind': 'Weak'}
print("Prediction:", predict(tree, query))


--------------------------------------------------
        
import pandas as pd

def train_nb(df, target):
    classes = df[target].unique()
    features = [c for c in df.columns if c != target]
    model = {'prior': df[target].value_counts(normalize=True).to_dict(), 'lk': {}}

    for cls in classes:
        df_c = df[df[target] == cls]
        model['lk'][cls] = {
            f: ((df_c[f].value_counts() + 1) / (len(df_c) + df[f].nunique())).to_dict()
            for f in features
        }
    return model


def predict_nb(model, query):
    probs = {}
    for cls, prior in model['prior'].items():
        p = prior
        for f, val in query.items():
            # Get smoothed probability or default to small value if unseen
            p *= model['lk'][cls][f].get(val, 1 / 100)
        probs[cls] = p
    return max(probs, key=probs.get)


df = pd.read_csv('data/class.csv')
model = train_nb(df, 'Play')
query = {'Outlook': 'Rain', 'Temp': 'Mild', 'Humidity': 'High', 'Wind': 'Weak'}

print(f"Query: {query}")
print(f"Predicted: {predict_nb(model, query)}")

--------------------------------------------------


import numpy as np
import pandas as pd

def svm_fit(X, y, lr=0.001, lambda_param=0.01, n_iters=1000):
    n_samples, n_features = X.shape

    unique_classes = np.unique(y)
    y_transformed = np.where(y == unique_classes[0], -1, 1)

    w = np.zeros(n_features)
    b = 0
    
    # Gradient Descent
    for _ in range(n_iters):
        for idx, x_i in enumerate(X):
            condition = y_transformed[idx] * (np.dot(x_i, w) - b) >= 1
            if condition:
                w -= lr * (2 * lambda_param * w)
            else:
                w -= lr * (2 * lambda_param * w - np.dot(x_i, y_transformed[idx]))
                b -= lr * y_transformed[idx]
                
    return w, b, unique_classes


def svm_predict(X, w, b, classes):
    approx = np.dot(X, w) - b
    indices = np.where(approx >= 0, 1, 0)
    return classes[indices]


data = {
    'Hours': [1, 2, 3, 7, 8, 9, 2, 8],
    'Attendance': [30, 40, 50, 80, 90, 95, 20, 70],
    'Result': [0, 0, 0, 1, 1, 1, 0, 1]
}
df = pd.DataFrame(data)

X = df[['Hours', 'Attendance']].values
y = df['Result'].values

weights, bias, classes = svm_fit(X, y, lr=0.001, n_iters=5000)

test_data = np.array([[2, 35], [6, 85]])
predictions = svm_predict(test_data, weights, bias, classes)

print("Predictions: ")
for i, pred in enumerate(predictions):
    status = "Pass" if pred == 1 else "Fail"
    print(
        f"Student {i+1} (Hours: {test_data[i][0]}, "
        f"Attendance: {test_data[i][1]}%): {status}"
    )


--------------------------------------------------


import numpy as np
import matplotlib.pyplot as plt

# Generate linear data
X = np.random.rand(200) * 5
W_act = np.random.randint(1, 5)
B_act = np.random.randint(5)
Y = W_act * X + B_act + np.random.randn(200) * 2

# Linear Regression
x_mean = np.mean(X)
y_mean = np.mean(Y)

W = np.sum((X - x_mean) * (Y - y_mean)) / np.sum((X - x_mean) ** 2)
B = y_mean - W * x_mean

# Visualization
plt.figure(figsize=(10, 5))
plt.scatter(X, Y, label="Data Points", alpha=0.6)

X_line = np.linspace(0, 5, 10)
Z_line = W * X_line + B
plt.plot(X_line, Z_line, color='orange', linewidth=3)

plt.title("Simple Linear Regression")
plt.legend()
plt.show()
    


--------------------------------------------------

import numpy as np
import pandas as pd

# Dummy data
data = {
    "YearsExperience": [1.1, 1.3, 1.5, 2.0, 2.2, 2.9, 3.0, 3.2, 3.2, 3.7],
    "Certifications": [1, 2, 1, 3, 2, 4, 3, 2, 5, 4],
    "Salary": [39.34, 46.20, 37.73, 43.52, 39.89, 56.64, 60.15, 54.44, 56.44, 57.18],
}
df = pd.DataFrame(data)

X = df[["YearsExperience", "Certifications"]].values
y = df["Salary"].values

train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

X_b = np.c_[np.ones((len(X_train), 1)), X_train]

weights = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train)

intercept = weights[0]
coefficients = weights[1:]

def predict(input_data, intercept, coefficients):
    return input_data.dot(coefficients) + intercept

predictions = predict(X_test, intercept, coefficients)

predictions = predict(X_test, intercept, coefficients)

mse = np.mean((predictions - y_test) ** 2)

print(f"Actual values: {y_test}")
print(f"Predicted values: {predictions}")
print(f"Mean Squared Error: {mse:.4f}")


--------------------------------------------------


import numpy as np
from cluster_tools import generate_data, plot_clusters

def kmeans(X, k, max_iters=100):
    # Initialize centroids randomly
    rng = np.random.default_rng(0)
    initial_indices = rng.choice(X.shape[0], k, replace=False)
    centroids = X[initial_indices]

    for i in range(max_iters):
        distances = np.linalg.norm(X[:, np.newaxis] - centroids, axis=2)
        labels = np.argmin(distances, axis=1)
        
        new_centroids = np.array([
            X[labels == j].mean(axis=0) if len(X[labels == j]) > 0 
            else centroids[j] 
            for j in range(k)
        ])

        if np.allclose(centroids, new_centroids):
            print(f"Algorithm converged at iteration {i}")
            break
        centroids = new_centroids
        
    return centroids, labels


K = 4
data = generate_data(n_samples=400, centers=K)

final_centroids, final_labels = kmeans(data, k=K)
plot_clusters(data, final_labels, final_centroids, k=K)

--------------------------------------------------

import numpy as np
from cluster_tools import generate_data, plot_clusters

def k_medioids(X, k, max_iters=100):
    rng = np.random.default_rng(42)
    medoid_indices = rng.choice(X.shape[0], k, replace=False)
    medoids = X[medoid_indices]

    for i in range(max_iters):
        distances = np.linalg.norm(X[:, np.newaxis] - medoids, axis=2)
        labels = np.argmin(distances, axis=1)
        
        old_medoid_indices = medoid_indices.copy()

        for j in range(k):
            cluster_points = X[labels == j]
            if len(cluster_points) > 0:
                dist_matrix = np.linalg.norm(
                    cluster_points[:, np.newaxis] - cluster_points, axis=2
                )
                best_point_idx = np.argmin(dist_matrix.sum(axis=1))
                medoids[j] = cluster_points[best_point_idx]

        if np.allclose(medoids, X[old_medoid_indices]):
            print(f"K-Medoids converged at iteration {i}")
            break
            
    return medoids, labels

K = 4
data = generate_data(n_samples=400, centers=K)

final_centroids, final_labels = k_medioids(data, k=K)
plot_clusters(data, final_labels, final_centroids, k=K)


--------------------------------------------------



import numpy as np
from cluster_tools import generate_data, plot_clusters

def agglomerative(X, k):
    n_samples = X.shape[0]

    clusters = [[i] for i in range(n_samples)]
    dist_matrix = np.linalg.norm(X[:, np.newaxis] - X, axis=2)
    np.fill_diagonal(dist_matrix, np.inf)

    while len(clusters) > k:
        min_dist = np.inf
        to_merge = (0, 0)

        for i in range(len(clusters)):
            for j in range(i + 1, len(clusters)):
                cluster_dist = dist_matrix[np.ix_(clusters[i], clusters[j])]
                current_min = np.min(cluster_dist)

                if current_min < min_dist:
                    min_dist = current_min
                    to_merge = (i, j)

        idx_i, idx_j = to_merge
        clusters[idx_i].extend(clusters[idx_j])

        clusters.pop(idx_j)

    labels = np.zeros(n_samples, dtype=int)
    centroids = np.zeros((k, X.shape[1]))
    
    for cluster_id, point_indices in enumerate(clusters):
        labels[point_indices] = cluster_id
        centroids[cluster_id] = np.mean(X[point_indices], axis=0)

    return centroids, labels

K = 4
data = generate_data(n_samples=150, centers=K)

final_centroids, final_labels = agglomerative(data, k=K)
plot_clusters(data, final_labels, final_centroids, k=K)

--------------------------------------------------


import numpy as np
from cluster_tools import generate_data, plot_clusters

def dbscan(X, eps=0.5, min_samples=5):
    n_samples = X.shape[0]
    labels = np.full(n_samples, -1)
    cluster_id = 0

    dist_matrix = np.linalg.norm(X[:, np.newaxis] - X, axis=2)

    for i in range(n_samples):
        if labels[i] != -1:
            continue

        neighbors = np.where(dist_matrix[i] <= eps)[0]

        if len(neighbors) < min_samples:
            continue
            
        labels[i] = cluster_id
        
        seeds = list(neighbors)
        
        idx = 0
        while idx < len(seeds):
            current_point = seeds[idx]

            if labels[current_point] == -1:
                labels[current_point] = cluster_id

            elif labels[current_point] == -1 or labels[current_point] == cluster_id:
                pass

            new_neighbors = np.where(dist_matrix[current_point] <= eps)[0]
            if len(new_neighbors) >= min_samples:
                for neighbor in new_neighbors:
                    if labels[neighbor] == -1:
                        labels[neighbor] = cluster_id
                        seeds.append(neighbor)
                    
            idx += 1
            
        cluster_id += 1

    unique_labels = [l for l in np.unique(labels) if l != -1]
    num_clusters = len(unique_labels)
    
    centroids = np.zeros((num_clusters, X.shape[1]))
    for idx, l in enumerate(unique_labels):
        centroids[idx] = np.mean(X[labels == l], axis=0)

    return centroids, labels

data = generate_data(n_samples=200, centers=5)

final_centroids, final_labels = dbscan(data, eps=1, min_samples=5)
plot_clusters(data, final_labels, final_centroids, k=len(final_centroids))


--------------------------------------------------


import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def d_sigmoid(x):
    y = sigmoid(x)
    return y * (1-y)

def forward(w01, w12, b1, b2, x):
    h = sigmoid(w01.dot(x) + b1)
    return sigmoid(w12.dot(h) + b2)[0]


def train(w01, w12, b1, b2, x, d):
    u1 = w01.dot(x) + b1
    h = sigmoid(u1)
    u2 = (w12.dot(h) + b2)[0]
    y = sigmoid(u2)
    e = d - y

    delta2 = e * d_sigmoid(u2)
    delta1 = (delta2 * w12.flatten()) * d_sigmoid(u1)

    w12 += alpha * delta2 * h
    b2 += alpha * delta2
    w01 += alpha * np.outer(delta1, x)
    b1 += alpha * delta1

    return w01, w12, b1, b2


alpha = 0.1
N_ITER = 1_000

w01 = np.random.rand(2, 2) * 2 - 1
w12 = np.random.rand(1, 2) * 2 - 1
b1 = np.zeros(2)
b2 = 0

for _ in range(N_ITER):
    x = np.random.randint(0, 2, (2,))
    t = int(x[0] ^ x[1])
    w01, w12, b1, b2 = train(w01, w12, b1, b2, x, t)

print("Classification:")
for x in [(0,0), (0,1), (1,0), (1,1)]:
    y = forward(w01, w12, b1, b2, np.array(x))
    print(f"{x} -> {round(y)}")

--------------------------------------------------