CNN External

"""
Font Classification using CNN - Detailed Parameter Explanations
==============================================================

This script implements a Convolutional Neural Network (CNN) to classify
alphabetic characters (A-Z) from font images with comprehensive parameter explanations.
"""

# =============================================================================
# 1. IMPORT REQUIRED LIBRARIES
# =============================================================================

import tensorflow as tf
import numpy as np
from tensorflow import keras
from keras import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten
from keras.preprocessing import image

# Library Explanations:
# - tensorflow: Main deep learning framework
# - numpy: For numerical operations and array manipulations
# - keras: High-level neural network API (part of TensorFlow)
# - Sequential: Model type that allows stacking layers linearly
# - Conv2D: 2D convolution layer for feature extraction
# - MaxPooling2D: Pooling layer for dimension reduction
# - Dense: Fully connected layer for classification
# - Dropout: Regularization layer to prevent overfitting
# - Flatten: Converts 2D feature maps to 1D vector
# - image: Utilities for image loading and preprocessing

# =============================================================================
# 2. LOAD AND PREPARE DATASET
# =============================================================================

train_ds, val_ds = keras.utils.image_dataset_from_directory(
    "E:/Maroon/College/S9/DL/Fonts",  # directory: Path to dataset folder
                                       # PURPOSE: Location of image dataset with subdirectories as classes
                                       # WHEN TO CHANGE: Update path to your actual dataset location
                                       # STRUCTURE: Each subdirectory (A/, B/, C/, etc.) becomes a class

    color_mode="grayscale",           # color_mode: Image color format
                                      # PURPOSE: Determines number of color channels
                                      # OPTIONS: "grayscale" (1 channel), "rgb" (3 channels), "rgba" (4 channels)
                                      # WHEN TO CHANGE: Use "rgb" for colored images, "grayscale" for B&W

    subset="both",                    # subset: Which data splits to return
                                      # PURPOSE: Specifies training/validation split output
                                      # OPTIONS: "training", "validation", "both"
                                      # WHEN TO CHANGE: Use specific subset if you want only one split

    image_size=(28, 28),              # image_size: Target image dimensions (height, width)
                                      # PURPOSE: Resizes all images to this size
                                      # IMPACT: Affects model input shape and computational requirements
                                      # WHEN TO CHANGE: Larger (64x64, 224x224) for more detail, smaller for speed

    validation_split=0.2,             # validation_split: Fraction for validation set
                                      # PURPOSE: Percentage of data reserved for validation (20% here)
                                      # RANGE: 0.0 to 1.0 (typically 0.1 to 0.3)
                                      # WHEN TO CHANGE: 0.1-0.15 for large datasets, 0.2-0.3 for smaller datasets

    seed=1337                         # seed: Random seed for reproducible splits
                                      # PURPOSE: Ensures same train/val split across runs
                                      # WHEN TO CHANGE: Different values for different splits, remove for random
)

# =============================================================================
# 3. DATA NORMALIZATION
# =============================================================================

train_ds = train_ds.map(lambda x, y: (tf.cast(x, tf.float32)/255.0, y))
val_ds = val_ds.map(lambda x, y: (tf.cast(x, tf.float32)/255.0, y))

# Parameter Explanations:
#
# .map() function:
# - PURPOSE: Applies transformation to each batch in the dataset
# - lambda x, y: Anonymous function where x=images, y=labels
#
# tf.cast(x, tf.float32):
# - PURPOSE: Converts image data type from uint8 to float32
# - WHEN TO CHANGE: Use tf.float16 for memory savings, tf.float64 for precision
# - WHY NEEDED: Neural networks work better with floating-point numbers
#
# /255.0:
# - PURPOSE: Normalizes pixel values from [0,255] to [0,1]
# - WHY CRITICAL: Prevents gradient explosion, helps faster convergence
# - ALTERNATIVES: /127.5 - 1 for [-1,1] range, custom normalization for specific needs

# =============================================================================
# 4. BUILD CNN MODEL ARCHITECTURE
# =============================================================================

model = Sequential([
    Conv2D(32,                        # filters: Number of feature detectors (kernels)
                                      # PURPOSE: How many different features to detect
                                      # WHEN TO INCREASE: More complex datasets (64, 128, 256)
                                      # WHEN TO DECREASE: Simpler datasets or less computation (16, 8)

           (5, 5),                    # kernel_size: Size of convolution window (height, width)
                                      # PURPOSE: Defines the receptive field size
                                      # OPTIONS: (3,3) for fine details, (5,5) for broader features, (7,7) for large patterns
                                      # WHEN TO CHANGE: Smaller for detailed features, larger for global patterns

           activation="relu",         # activation: Non-linear activation function
                                      # PURPOSE: Introduces non-linearity for complex pattern learning
                                      # OPTIONS: "relu" (most common), "tanh", "sigmoid", "leaky_relu"
                                      # WHEN TO CHANGE: "leaky_relu" for dying ReLU, "tanh" for [-1,1] outputs

           padding="same",            # padding: Controls output size after convolution
                                      # PURPOSE: Determines spatial dimension preservation
                                      # OPTIONS: "same" (keeps size), "valid" (reduces size)
                                      # WHEN TO USE: "same" to preserve dimensions, "valid" to reduce them

           input_shape=(28, 28, 1)),  # input_shape: Input tensor dimensions (height, width, channels)
                                      # PURPOSE: Defines the expected input format
                                      # WHEN TO CHANGE: Must match your image dimensions and channels

    MaxPooling2D(padding="same"),     # MaxPooling2D: Downsampling layer
                                      # pool_size: Default (2,2) - size of pooling window
                                      # PURPOSE: Reduces spatial dimensions while retaining features
                                      # WHEN TO CHANGE: (3,3) for aggressive downsampling, (1,1) for minimal
                                      # padding: Same options as Conv2D

    Conv2D(64,                        # filters: Increased to 64 for more complex feature detection
                                      # PURPOSE: Detects combinations of basic features from previous layer
           (5, 5),
           activation="relu",
           padding="same"),

    MaxPooling2D(padding="same"),     # Second pooling layer for further dimension reduction

    Flatten(),                        # Flatten: Converts 2D feature maps to 1D vector
                                      # PURPOSE: Prepares data for dense layers
                                      # NO PARAMETERS: Automatically handles reshaping

    Dense(1024,                       # units: Number of neurons in dense layer
                                      # PURPOSE: Learning capacity of the layer
                                      # WHEN TO INCREASE: More complex patterns (2048, 4096)
                                      # WHEN TO DECREASE: Simpler datasets or prevent overfitting (512, 256)

          activation="relu"),         # activation: Activation function for dense layer
                                      # PURPOSE: Non-linearity for complex pattern learning
                                      # TYPICAL: "relu" for hidden layers

    Dropout(0.2),                     # rate: Fraction of neurons to randomly disable
                                      # PURPOSE: Prevents overfitting during training
                                      # RANGE: 0.0 to 0.8 (typically 0.2 to 0.5)
                                      # WHEN TO INCREASE: If overfitting (0.3, 0.4, 0.5)
                                      # WHEN TO DECREASE: If underfitting (0.1, 0.15)

    Dense(26,                         # units: 26 for output classes (A-Z letters)
                                      # PURPOSE: One neuron per class for classification
                                      # WHEN TO CHANGE: Match number of classes in your dataset

          activation="sigmoid")       # activation: Output layer activation
                                      # PURPOSE: Produces probability-like outputs
                                      # OPTIONS: "sigmoid" for multi-label, "softmax" for multi-class
                                      # NOTE: "softmax" might be more appropriate for single-class prediction
])

# =============================================================================
# 5. COMPILE AND TRAIN THE MODEL
# =============================================================================

model.compile(
    optimizer="adam",                 # optimizer: Weight update algorithm
                                      # PURPOSE: How the model learns from errors
                                      # OPTIONS: "adam" (adaptive), "sgd" (basic), "rmsprop" (RNN-friendly)
                                      # WHEN TO CHANGE: "sgd" with momentum for fine-tuning
                                      # CUSTOM: keras.optimizers.Adam(learning_rate=0.001) for custom LR

    loss="sparse_categorical_crossentropy",  # loss: Error measurement function
                                             # PURPOSE: Quantifies prediction errors
                                             # WHEN TO USE: Multi-class with integer labels (0,1,2,...)
                                             # ALTERNATIVES: "categorical_crossentropy" for one-hot,
                                             #               "binary_crossentropy" for binary classification

    metrics=["accuracy"]              # metrics: Additional monitoring metrics
                                      # PURPOSE: Track performance during training
                                      # OPTIONS: ["accuracy"], ["precision"], ["recall"], ["f1_score"]
                                      # WHEN TO ADD: Use precision/recall for imbalanced datasets
)

model.fit(
    train_ds,                         # x: Training data
                                      # PURPOSE: Data used to update model weights
                                      # FORMAT: tf.data.Dataset with (images, labels)

    epochs=20,                        # epochs: Number of complete training passes
                                      # PURPOSE: How many times to see entire dataset
                                      # WHEN TO INCREASE: If loss still decreasing (30, 50, 100)
                                      # WHEN TO DECREASE: If overfitting early (10, 15)
                                      # MONITORING: Watch validation loss to avoid overfitting

    validation_data=val_ds            # validation_data: Data for performance monitoring
                                      # PURPOSE: Evaluates model during training without affecting weights
                                      # WHY IMPORTANT: Detects overfitting, monitors generalization
                                      # WHEN TO SKIP: Only for very small datasets (not recommended)
)

# Additional useful parameters (not used but available):
# batch_size: Number of samples per weight update (default from dataset)
# callbacks: [EarlyStopping, ModelCheckpoint, ReduceLROnPlateau] for automation
# verbose: 0 (silent), 1 (progress bar), 2 (one line per epoch)

# =============================================================================
# 6. LOAD TEST IMAGE FOR PREDICTION
# =============================================================================

img = image.load_img(
    "E:/Maroon/College/S9/DL/Fonts/test1.png",  # path: File path to test image
                                                # PURPOSE: Location of image to classify
                                                # WHEN TO CHANGE: Test different images
                                                # FORMATS: PNG, JPG, JPEG, BMP, GIF supported

    target_size=(28, 28),             # target_size: Resize dimensions (height, width)
                                      # PURPOSE: Match model's expected input size
                                      # MUST MATCH: Model's input_shape requirements
                                      # WHEN TO CHANGE: Only if model input size changes

    color_mode="grayscale"            # color_mode: Color channel specification
                                      # PURPOSE: Match model's expected channels
                                      # OPTIONS: "grayscale" (1), "rgb" (3), "rgba" (4)
                                      # MUST MATCH: Model's input channel requirements
)

img_array = np.expand_dims(
    image.img_to_array(img)/255.0,    # array: Preprocessed image array
                                      # img_to_array(): PIL image → numpy array (28,28,1)
                                      # /255.0: Normalize to [0,1] (CRITICAL: match training preprocessing)

    axis=0                            # axis: Dimension to expand
                                      # PURPOSE: Add batch dimension for model input
                                      # RESULT: (28,28,1) → (1,28,28,1)
                                      # WHY NEEDED: Model expects batched input even for single image
                                      # OPTIONS: axis=0 (beginning), axis=-1 (end)
)

# =============================================================================
# 7. MAKE PREDICTION
# =============================================================================

pred = model.predict(
    img_array                         # x: Input data for prediction
                                      # PURPOSE: Preprocessed image ready for classification
                                      # EXPECTED SHAPE: (batch_size, height, width, channels)
                                      # DATA TYPE: Must be float32 and normalized
)

# Additional predict() parameters (not used but available):
# batch_size: Control memory usage for large datasets (32, 64, 128)
# verbose: 0 (silent), 1 (progress bar), 2 (one line per batch)
# steps: Number of batches (for generator inputs)

pred_class = np.argmax(
    pred,                             # a: Prediction array with probability scores
                                      # PURPOSE: Array of class probabilities
                                      # SHAPE: (1, 26) - 1 sample, 26 class probabilities

    axis=1                            # axis: Dimension for finding maximum
                                      # PURPOSE: Find class with highest probability
                                      # OPTIONS: axis=0 (across samples), axis=1 (across classes)
                                      # RESULT: Index of most confident prediction
)[0]                                  # [0]: Extract scalar from array
                                      # PURPOSE: Get single prediction from batch
                                      # WHY NEEDED: argmax returns shape (1,), we want scalar

# =============================================================================
# 8. DISPLAY PREDICTION RESULT
# =============================================================================

class_values = [chr(i) for i in range(65, 91)]  # Create A-Z mapping
# chr(): Converts ASCII code to character
# range(65, 91): ASCII codes for A(65) through Z(90)
# WHEN TO CHANGE: range(97, 123) for lowercase a-z
# ALTERNATIVES:
#   - import string; class_values = list(string.ascii_uppercase)
#   - Manual list: ['A', 'B', 'C', ..., 'Z']

print("Predicted values:", class_values[pred_class])
# print() parameters (not used but available):
# sep: Separator between arguments (default ' ')
# end: String at end (default '\n')
# file: Output destination (default stdout)

# =============================================================================
# SUMMARY OF KEY PARAMETER TUNING GUIDELINES
# =============================================================================

"""
COMMON PARAMETER ADJUSTMENTS:

1. OVERFITTING (High training accuracy, low validation accuracy):
   - Increase dropout rate (0.3, 0.4, 0.5)
   - Reduce model complexity (fewer filters, smaller dense layers)
   - Add more data augmentation
   - Reduce epochs

2. UNDERFITTING (Low training and validation accuracy):
   - Increase model complexity (more filters, larger dense layers)
   - Decrease dropout rate (0.1, 0.15)
   - Increase epochs
   - Adjust learning rate

3. SLOW TRAINING:
   - Increase batch_size (64, 128)
   - Use smaller image_size (16x16 instead of 28x28)
   - Reduce model complexity

4. MEMORY ISSUES:
   - Decrease batch_size (16, 8)
   - Use tf.float16 instead of tf.float32
   - Reduce image_size
   - Reduce model complexity

5. POOR ACCURACY:
   - Try different optimizers (sgd, rmsprop)
   - Adjust learning rate
   - Use data augmentation
   - Ensure proper data normalization
"""