Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- Font Classification using CNN - Detailed Parameter Explanations
- ==============================================================
- This script implements a Convolutional Neural Network (CNN) to classify
- alphabetic characters (A-Z) from font images with comprehensive parameter explanations.
- """
- # =============================================================================
- # 1. IMPORT REQUIRED LIBRARIES
- # =============================================================================
- import tensorflow as tf
- import numpy as np
- from tensorflow import keras
- from keras import Sequential
- from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten
- from keras.preprocessing import image
- # Library Explanations:
- # - tensorflow: Main deep learning framework
- # - numpy: For numerical operations and array manipulations
- # - keras: High-level neural network API (part of TensorFlow)
- # - Sequential: Model type that allows stacking layers linearly
- # - Conv2D: 2D convolution layer for feature extraction
- # - MaxPooling2D: Pooling layer for dimension reduction
- # - Dense: Fully connected layer for classification
- # - Dropout: Regularization layer to prevent overfitting
- # - Flatten: Converts 2D feature maps to 1D vector
- # - image: Utilities for image loading and preprocessing
- # =============================================================================
- # 2. LOAD AND PREPARE DATASET
- # =============================================================================
- train_ds, val_ds = keras.utils.image_dataset_from_directory(
- "E:/Maroon/College/S9/DL/Fonts", # directory: Path to dataset folder
- # PURPOSE: Location of image dataset with subdirectories as classes
- # WHEN TO CHANGE: Update path to your actual dataset location
- # STRUCTURE: Each subdirectory (A/, B/, C/, etc.) becomes a class
- color_mode="grayscale", # color_mode: Image color format
- # PURPOSE: Determines number of color channels
- # OPTIONS: "grayscale" (1 channel), "rgb" (3 channels), "rgba" (4 channels)
- # WHEN TO CHANGE: Use "rgb" for colored images, "grayscale" for B&W
- subset="both", # subset: Which data splits to return
- # PURPOSE: Specifies training/validation split output
- # OPTIONS: "training", "validation", "both"
- # WHEN TO CHANGE: Use specific subset if you want only one split
- image_size=(28, 28), # image_size: Target image dimensions (height, width)
- # PURPOSE: Resizes all images to this size
- # IMPACT: Affects model input shape and computational requirements
- # WHEN TO CHANGE: Larger (64x64, 224x224) for more detail, smaller for speed
- validation_split=0.2, # validation_split: Fraction for validation set
- # PURPOSE: Percentage of data reserved for validation (20% here)
- # RANGE: 0.0 to 1.0 (typically 0.1 to 0.3)
- # WHEN TO CHANGE: 0.1-0.15 for large datasets, 0.2-0.3 for smaller datasets
- seed=1337 # seed: Random seed for reproducible splits
- # PURPOSE: Ensures same train/val split across runs
- # WHEN TO CHANGE: Different values for different splits, remove for random
- )
- # =============================================================================
- # 3. DATA NORMALIZATION
- # =============================================================================
- train_ds = train_ds.map(lambda x, y: (tf.cast(x, tf.float32)/255.0, y))
- val_ds = val_ds.map(lambda x, y: (tf.cast(x, tf.float32)/255.0, y))
- # Parameter Explanations:
- #
- # .map() function:
- # - PURPOSE: Applies transformation to each batch in the dataset
- # - lambda x, y: Anonymous function where x=images, y=labels
- #
- # tf.cast(x, tf.float32):
- # - PURPOSE: Converts image data type from uint8 to float32
- # - WHEN TO CHANGE: Use tf.float16 for memory savings, tf.float64 for precision
- # - WHY NEEDED: Neural networks work better with floating-point numbers
- #
- # /255.0:
- # - PURPOSE: Normalizes pixel values from [0,255] to [0,1]
- # - WHY CRITICAL: Prevents gradient explosion, helps faster convergence
- # - ALTERNATIVES: /127.5 - 1 for [-1,1] range, custom normalization for specific needs
- # =============================================================================
- # 4. BUILD CNN MODEL ARCHITECTURE
- # =============================================================================
- model = Sequential([
- Conv2D(32, # filters: Number of feature detectors (kernels)
- # PURPOSE: How many different features to detect
- # WHEN TO INCREASE: More complex datasets (64, 128, 256)
- # WHEN TO DECREASE: Simpler datasets or less computation (16, 8)
- (5, 5), # kernel_size: Size of convolution window (height, width)
- # PURPOSE: Defines the receptive field size
- # OPTIONS: (3,3) for fine details, (5,5) for broader features, (7,7) for large patterns
- # WHEN TO CHANGE: Smaller for detailed features, larger for global patterns
- activation="relu", # activation: Non-linear activation function
- # PURPOSE: Introduces non-linearity for complex pattern learning
- # OPTIONS: "relu" (most common), "tanh", "sigmoid", "leaky_relu"
- # WHEN TO CHANGE: "leaky_relu" for dying ReLU, "tanh" for [-1,1] outputs
- padding="same", # padding: Controls output size after convolution
- # PURPOSE: Determines spatial dimension preservation
- # OPTIONS: "same" (keeps size), "valid" (reduces size)
- # WHEN TO USE: "same" to preserve dimensions, "valid" to reduce them
- input_shape=(28, 28, 1)), # input_shape: Input tensor dimensions (height, width, channels)
- # PURPOSE: Defines the expected input format
- # WHEN TO CHANGE: Must match your image dimensions and channels
- MaxPooling2D(padding="same"), # MaxPooling2D: Downsampling layer
- # pool_size: Default (2,2) - size of pooling window
- # PURPOSE: Reduces spatial dimensions while retaining features
- # WHEN TO CHANGE: (3,3) for aggressive downsampling, (1,1) for minimal
- # padding: Same options as Conv2D
- Conv2D(64, # filters: Increased to 64 for more complex feature detection
- # PURPOSE: Detects combinations of basic features from previous layer
- (5, 5),
- activation="relu",
- padding="same"),
- MaxPooling2D(padding="same"), # Second pooling layer for further dimension reduction
- Flatten(), # Flatten: Converts 2D feature maps to 1D vector
- # PURPOSE: Prepares data for dense layers
- # NO PARAMETERS: Automatically handles reshaping
- Dense(1024, # units: Number of neurons in dense layer
- # PURPOSE: Learning capacity of the layer
- # WHEN TO INCREASE: More complex patterns (2048, 4096)
- # WHEN TO DECREASE: Simpler datasets or prevent overfitting (512, 256)
- activation="relu"), # activation: Activation function for dense layer
- # PURPOSE: Non-linearity for complex pattern learning
- # TYPICAL: "relu" for hidden layers
- Dropout(0.2), # rate: Fraction of neurons to randomly disable
- # PURPOSE: Prevents overfitting during training
- # RANGE: 0.0 to 0.8 (typically 0.2 to 0.5)
- # WHEN TO INCREASE: If overfitting (0.3, 0.4, 0.5)
- # WHEN TO DECREASE: If underfitting (0.1, 0.15)
- Dense(26, # units: 26 for output classes (A-Z letters)
- # PURPOSE: One neuron per class for classification
- # WHEN TO CHANGE: Match number of classes in your dataset
- activation="sigmoid") # activation: Output layer activation
- # PURPOSE: Produces probability-like outputs
- # OPTIONS: "sigmoid" for multi-label, "softmax" for multi-class
- # NOTE: "softmax" might be more appropriate for single-class prediction
- ])
- # =============================================================================
- # 5. COMPILE AND TRAIN THE MODEL
- # =============================================================================
- model.compile(
- optimizer="adam", # optimizer: Weight update algorithm
- # PURPOSE: How the model learns from errors
- # OPTIONS: "adam" (adaptive), "sgd" (basic), "rmsprop" (RNN-friendly)
- # WHEN TO CHANGE: "sgd" with momentum for fine-tuning
- # CUSTOM: keras.optimizers.Adam(learning_rate=0.001) for custom LR
- loss="sparse_categorical_crossentropy", # loss: Error measurement function
- # PURPOSE: Quantifies prediction errors
- # WHEN TO USE: Multi-class with integer labels (0,1,2,...)
- # ALTERNATIVES: "categorical_crossentropy" for one-hot,
- # "binary_crossentropy" for binary classification
- metrics=["accuracy"] # metrics: Additional monitoring metrics
- # PURPOSE: Track performance during training
- # OPTIONS: ["accuracy"], ["precision"], ["recall"], ["f1_score"]
- # WHEN TO ADD: Use precision/recall for imbalanced datasets
- )
- model.fit(
- train_ds, # x: Training data
- # PURPOSE: Data used to update model weights
- # FORMAT: tf.data.Dataset with (images, labels)
- epochs=20, # epochs: Number of complete training passes
- # PURPOSE: How many times to see entire dataset
- # WHEN TO INCREASE: If loss still decreasing (30, 50, 100)
- # WHEN TO DECREASE: If overfitting early (10, 15)
- # MONITORING: Watch validation loss to avoid overfitting
- validation_data=val_ds # validation_data: Data for performance monitoring
- # PURPOSE: Evaluates model during training without affecting weights
- # WHY IMPORTANT: Detects overfitting, monitors generalization
- # WHEN TO SKIP: Only for very small datasets (not recommended)
- )
- # Additional useful parameters (not used but available):
- # batch_size: Number of samples per weight update (default from dataset)
- # callbacks: [EarlyStopping, ModelCheckpoint, ReduceLROnPlateau] for automation
- # verbose: 0 (silent), 1 (progress bar), 2 (one line per epoch)
- # =============================================================================
- # 6. LOAD TEST IMAGE FOR PREDICTION
- # =============================================================================
- img = image.load_img(
- "E:/Maroon/College/S9/DL/Fonts/test1.png", # path: File path to test image
- # PURPOSE: Location of image to classify
- # WHEN TO CHANGE: Test different images
- # FORMATS: PNG, JPG, JPEG, BMP, GIF supported
- target_size=(28, 28), # target_size: Resize dimensions (height, width)
- # PURPOSE: Match model's expected input size
- # MUST MATCH: Model's input_shape requirements
- # WHEN TO CHANGE: Only if model input size changes
- color_mode="grayscale" # color_mode: Color channel specification
- # PURPOSE: Match model's expected channels
- # OPTIONS: "grayscale" (1), "rgb" (3), "rgba" (4)
- # MUST MATCH: Model's input channel requirements
- )
- img_array = np.expand_dims(
- image.img_to_array(img)/255.0, # array: Preprocessed image array
- # img_to_array(): PIL image → numpy array (28,28,1)
- # /255.0: Normalize to [0,1] (CRITICAL: match training preprocessing)
- axis=0 # axis: Dimension to expand
- # PURPOSE: Add batch dimension for model input
- # RESULT: (28,28,1) → (1,28,28,1)
- # WHY NEEDED: Model expects batched input even for single image
- # OPTIONS: axis=0 (beginning), axis=-1 (end)
- )
- # =============================================================================
- # 7. MAKE PREDICTION
- # =============================================================================
- pred = model.predict(
- img_array # x: Input data for prediction
- # PURPOSE: Preprocessed image ready for classification
- # EXPECTED SHAPE: (batch_size, height, width, channels)
- # DATA TYPE: Must be float32 and normalized
- )
- # Additional predict() parameters (not used but available):
- # batch_size: Control memory usage for large datasets (32, 64, 128)
- # verbose: 0 (silent), 1 (progress bar), 2 (one line per batch)
- # steps: Number of batches (for generator inputs)
- pred_class = np.argmax(
- pred, # a: Prediction array with probability scores
- # PURPOSE: Array of class probabilities
- # SHAPE: (1, 26) - 1 sample, 26 class probabilities
- axis=1 # axis: Dimension for finding maximum
- # PURPOSE: Find class with highest probability
- # OPTIONS: axis=0 (across samples), axis=1 (across classes)
- # RESULT: Index of most confident prediction
- )[0] # [0]: Extract scalar from array
- # PURPOSE: Get single prediction from batch
- # WHY NEEDED: argmax returns shape (1,), we want scalar
- # =============================================================================
- # 8. DISPLAY PREDICTION RESULT
- # =============================================================================
- class_values = [chr(i) for i in range(65, 91)] # Create A-Z mapping
- # chr(): Converts ASCII code to character
- # range(65, 91): ASCII codes for A(65) through Z(90)
- # WHEN TO CHANGE: range(97, 123) for lowercase a-z
- # ALTERNATIVES:
- # - import string; class_values = list(string.ascii_uppercase)
- # - Manual list: ['A', 'B', 'C', ..., 'Z']
- print("Predicted values:", class_values[pred_class])
- # print() parameters (not used but available):
- # sep: Separator between arguments (default ' ')
- # end: String at end (default '\n')
- # file: Output destination (default stdout)
- # =============================================================================
- # SUMMARY OF KEY PARAMETER TUNING GUIDELINES
- # =============================================================================
- """
- COMMON PARAMETER ADJUSTMENTS:
- 1. OVERFITTING (High training accuracy, low validation accuracy):
- - Increase dropout rate (0.3, 0.4, 0.5)
- - Reduce model complexity (fewer filters, smaller dense layers)
- - Add more data augmentation
- - Reduce epochs
- 2. UNDERFITTING (Low training and validation accuracy):
- - Increase model complexity (more filters, larger dense layers)
- - Decrease dropout rate (0.1, 0.15)
- - Increase epochs
- - Adjust learning rate
- 3. SLOW TRAINING:
- - Increase batch_size (64, 128)
- - Use smaller image_size (16x16 instead of 28x28)
- - Reduce model complexity
- 4. MEMORY ISSUES:
- - Decrease batch_size (16, 8)
- - Use tf.float16 instead of tf.float32
- - Reduce image_size
- - Reduce model complexity
- 5. POOR ACCURACY:
- - Try different optimizers (sgd, rmsprop)
- - Adjust learning rate
- - Use data augmentation
- - Ensure proper data normalization
- """
Advertisement
Add Comment
Please, Sign In to add comment