stronk_8s

CNN External

Nov 24th, 2025
199
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 17.97 KB | Source Code | 0 0
  1. """
  2. Font Classification using CNN - Detailed Parameter Explanations
  3. ==============================================================
  4.  
  5. This script implements a Convolutional Neural Network (CNN) to classify
  6. alphabetic characters (A-Z) from font images with comprehensive parameter explanations.
  7. """
  8.  
  9. # =============================================================================
  10. # 1. IMPORT REQUIRED LIBRARIES
  11. # =============================================================================
  12.  
  13. import tensorflow as tf
  14. import numpy as np
  15. from tensorflow import keras
  16. from keras import Sequential
  17. from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten
  18. from keras.preprocessing import image
  19.  
  20. # Library Explanations:
  21. # - tensorflow: Main deep learning framework
  22. # - numpy: For numerical operations and array manipulations
  23. # - keras: High-level neural network API (part of TensorFlow)
  24. # - Sequential: Model type that allows stacking layers linearly
  25. # - Conv2D: 2D convolution layer for feature extraction
  26. # - MaxPooling2D: Pooling layer for dimension reduction
  27. # - Dense: Fully connected layer for classification
  28. # - Dropout: Regularization layer to prevent overfitting
  29. # - Flatten: Converts 2D feature maps to 1D vector
  30. # - image: Utilities for image loading and preprocessing
  31.  
  32. # =============================================================================
  33. # 2. LOAD AND PREPARE DATASET
  34. # =============================================================================
  35.  
  36. train_ds, val_ds = keras.utils.image_dataset_from_directory(
  37.     "E:/Maroon/College/S9/DL/Fonts",  # directory: Path to dataset folder
  38.                                        # PURPOSE: Location of image dataset with subdirectories as classes
  39.                                        # WHEN TO CHANGE: Update path to your actual dataset location
  40.                                        # STRUCTURE: Each subdirectory (A/, B/, C/, etc.) becomes a class
  41.    
  42.     color_mode="grayscale",           # color_mode: Image color format
  43.                                       # PURPOSE: Determines number of color channels
  44.                                       # OPTIONS: "grayscale" (1 channel), "rgb" (3 channels), "rgba" (4 channels)
  45.                                       # WHEN TO CHANGE: Use "rgb" for colored images, "grayscale" for B&W
  46.    
  47.     subset="both",                    # subset: Which data splits to return
  48.                                       # PURPOSE: Specifies training/validation split output
  49.                                       # OPTIONS: "training", "validation", "both"
  50.                                       # WHEN TO CHANGE: Use specific subset if you want only one split
  51.    
  52.     image_size=(28, 28),              # image_size: Target image dimensions (height, width)
  53.                                       # PURPOSE: Resizes all images to this size
  54.                                       # IMPACT: Affects model input shape and computational requirements
  55.                                       # WHEN TO CHANGE: Larger (64x64, 224x224) for more detail, smaller for speed
  56.    
  57.     validation_split=0.2,             # validation_split: Fraction for validation set
  58.                                       # PURPOSE: Percentage of data reserved for validation (20% here)
  59.                                       # RANGE: 0.0 to 1.0 (typically 0.1 to 0.3)
  60.                                       # WHEN TO CHANGE: 0.1-0.15 for large datasets, 0.2-0.3 for smaller datasets
  61.    
  62.     seed=1337                         # seed: Random seed for reproducible splits
  63.                                       # PURPOSE: Ensures same train/val split across runs
  64.                                       # WHEN TO CHANGE: Different values for different splits, remove for random
  65. )
  66.  
  67. # =============================================================================
  68. # 3. DATA NORMALIZATION
  69. # =============================================================================
  70.  
  71. train_ds = train_ds.map(lambda x, y: (tf.cast(x, tf.float32)/255.0, y))
  72. val_ds = val_ds.map(lambda x, y: (tf.cast(x, tf.float32)/255.0, y))
  73.  
  74. # Parameter Explanations:
  75. #
  76. # .map() function:
  77. # - PURPOSE: Applies transformation to each batch in the dataset
  78. # - lambda x, y: Anonymous function where x=images, y=labels
  79. #
  80. # tf.cast(x, tf.float32):
  81. # - PURPOSE: Converts image data type from uint8 to float32
  82. # - WHEN TO CHANGE: Use tf.float16 for memory savings, tf.float64 for precision
  83. # - WHY NEEDED: Neural networks work better with floating-point numbers
  84. #
  85. # /255.0:
  86. # - PURPOSE: Normalizes pixel values from [0,255] to [0,1]
  87. # - WHY CRITICAL: Prevents gradient explosion, helps faster convergence
  88. # - ALTERNATIVES: /127.5 - 1 for [-1,1] range, custom normalization for specific needs
  89.  
  90. # =============================================================================
  91. # 4. BUILD CNN MODEL ARCHITECTURE
  92. # =============================================================================
  93.  
  94. model = Sequential([
  95.     Conv2D(32,                        # filters: Number of feature detectors (kernels)
  96.                                       # PURPOSE: How many different features to detect
  97.                                       # WHEN TO INCREASE: More complex datasets (64, 128, 256)
  98.                                       # WHEN TO DECREASE: Simpler datasets or less computation (16, 8)
  99.            
  100.            (5, 5),                    # kernel_size: Size of convolution window (height, width)
  101.                                       # PURPOSE: Defines the receptive field size
  102.                                       # OPTIONS: (3,3) for fine details, (5,5) for broader features, (7,7) for large patterns
  103.                                       # WHEN TO CHANGE: Smaller for detailed features, larger for global patterns
  104.            
  105.            activation="relu",         # activation: Non-linear activation function
  106.                                       # PURPOSE: Introduces non-linearity for complex pattern learning
  107.                                       # OPTIONS: "relu" (most common), "tanh", "sigmoid", "leaky_relu"
  108.                                       # WHEN TO CHANGE: "leaky_relu" for dying ReLU, "tanh" for [-1,1] outputs
  109.            
  110.            padding="same",            # padding: Controls output size after convolution
  111.                                       # PURPOSE: Determines spatial dimension preservation
  112.                                       # OPTIONS: "same" (keeps size), "valid" (reduces size)
  113.                                       # WHEN TO USE: "same" to preserve dimensions, "valid" to reduce them
  114.            
  115.            input_shape=(28, 28, 1)),  # input_shape: Input tensor dimensions (height, width, channels)
  116.                                       # PURPOSE: Defines the expected input format
  117.                                       # WHEN TO CHANGE: Must match your image dimensions and channels
  118.    
  119.     MaxPooling2D(padding="same"),     # MaxPooling2D: Downsampling layer
  120.                                       # pool_size: Default (2,2) - size of pooling window
  121.                                       # PURPOSE: Reduces spatial dimensions while retaining features
  122.                                       # WHEN TO CHANGE: (3,3) for aggressive downsampling, (1,1) for minimal
  123.                                       # padding: Same options as Conv2D
  124.    
  125.     Conv2D(64,                        # filters: Increased to 64 for more complex feature detection
  126.                                       # PURPOSE: Detects combinations of basic features from previous layer
  127.            (5, 5),
  128.            activation="relu",
  129.            padding="same"),
  130.    
  131.     MaxPooling2D(padding="same"),     # Second pooling layer for further dimension reduction
  132.    
  133.     Flatten(),                        # Flatten: Converts 2D feature maps to 1D vector
  134.                                       # PURPOSE: Prepares data for dense layers
  135.                                       # NO PARAMETERS: Automatically handles reshaping
  136.    
  137.     Dense(1024,                       # units: Number of neurons in dense layer
  138.                                       # PURPOSE: Learning capacity of the layer
  139.                                       # WHEN TO INCREASE: More complex patterns (2048, 4096)
  140.                                       # WHEN TO DECREASE: Simpler datasets or prevent overfitting (512, 256)
  141.          
  142.           activation="relu"),         # activation: Activation function for dense layer
  143.                                       # PURPOSE: Non-linearity for complex pattern learning
  144.                                       # TYPICAL: "relu" for hidden layers
  145.    
  146.     Dropout(0.2),                     # rate: Fraction of neurons to randomly disable
  147.                                       # PURPOSE: Prevents overfitting during training
  148.                                       # RANGE: 0.0 to 0.8 (typically 0.2 to 0.5)
  149.                                       # WHEN TO INCREASE: If overfitting (0.3, 0.4, 0.5)
  150.                                       # WHEN TO DECREASE: If underfitting (0.1, 0.15)
  151.    
  152.     Dense(26,                         # units: 26 for output classes (A-Z letters)
  153.                                       # PURPOSE: One neuron per class for classification
  154.                                       # WHEN TO CHANGE: Match number of classes in your dataset
  155.          
  156.           activation="sigmoid")       # activation: Output layer activation
  157.                                       # PURPOSE: Produces probability-like outputs
  158.                                       # OPTIONS: "sigmoid" for multi-label, "softmax" for multi-class
  159.                                       # NOTE: "softmax" might be more appropriate for single-class prediction
  160. ])
  161.  
  162. # =============================================================================
  163. # 5. COMPILE AND TRAIN THE MODEL
  164. # =============================================================================
  165.  
  166. model.compile(
  167.     optimizer="adam",                 # optimizer: Weight update algorithm
  168.                                       # PURPOSE: How the model learns from errors
  169.                                       # OPTIONS: "adam" (adaptive), "sgd" (basic), "rmsprop" (RNN-friendly)
  170.                                       # WHEN TO CHANGE: "sgd" with momentum for fine-tuning
  171.                                       # CUSTOM: keras.optimizers.Adam(learning_rate=0.001) for custom LR
  172.    
  173.     loss="sparse_categorical_crossentropy",  # loss: Error measurement function
  174.                                              # PURPOSE: Quantifies prediction errors
  175.                                              # WHEN TO USE: Multi-class with integer labels (0,1,2,...)
  176.                                              # ALTERNATIVES: "categorical_crossentropy" for one-hot,
  177.                                              #               "binary_crossentropy" for binary classification
  178.    
  179.     metrics=["accuracy"]              # metrics: Additional monitoring metrics
  180.                                       # PURPOSE: Track performance during training
  181.                                       # OPTIONS: ["accuracy"], ["precision"], ["recall"], ["f1_score"]
  182.                                       # WHEN TO ADD: Use precision/recall for imbalanced datasets
  183. )
  184.  
  185. model.fit(
  186.     train_ds,                         # x: Training data
  187.                                       # PURPOSE: Data used to update model weights
  188.                                       # FORMAT: tf.data.Dataset with (images, labels)
  189.    
  190.     epochs=20,                        # epochs: Number of complete training passes
  191.                                       # PURPOSE: How many times to see entire dataset
  192.                                       # WHEN TO INCREASE: If loss still decreasing (30, 50, 100)
  193.                                       # WHEN TO DECREASE: If overfitting early (10, 15)
  194.                                       # MONITORING: Watch validation loss to avoid overfitting
  195.    
  196.     validation_data=val_ds            # validation_data: Data for performance monitoring
  197.                                       # PURPOSE: Evaluates model during training without affecting weights
  198.                                       # WHY IMPORTANT: Detects overfitting, monitors generalization
  199.                                       # WHEN TO SKIP: Only for very small datasets (not recommended)
  200. )
  201.  
  202. # Additional useful parameters (not used but available):
  203. # batch_size: Number of samples per weight update (default from dataset)
  204. # callbacks: [EarlyStopping, ModelCheckpoint, ReduceLROnPlateau] for automation
  205. # verbose: 0 (silent), 1 (progress bar), 2 (one line per epoch)
  206.  
  207. # =============================================================================
  208. # 6. LOAD TEST IMAGE FOR PREDICTION
  209. # =============================================================================
  210.  
  211. img = image.load_img(
  212.     "E:/Maroon/College/S9/DL/Fonts/test1.png",  # path: File path to test image
  213.                                                 # PURPOSE: Location of image to classify
  214.                                                 # WHEN TO CHANGE: Test different images
  215.                                                 # FORMATS: PNG, JPG, JPEG, BMP, GIF supported
  216.    
  217.     target_size=(28, 28),             # target_size: Resize dimensions (height, width)
  218.                                       # PURPOSE: Match model's expected input size
  219.                                       # MUST MATCH: Model's input_shape requirements
  220.                                       # WHEN TO CHANGE: Only if model input size changes
  221.    
  222.     color_mode="grayscale"            # color_mode: Color channel specification
  223.                                       # PURPOSE: Match model's expected channels
  224.                                       # OPTIONS: "grayscale" (1), "rgb" (3), "rgba" (4)
  225.                                       # MUST MATCH: Model's input channel requirements
  226. )
  227.  
  228. img_array = np.expand_dims(
  229.     image.img_to_array(img)/255.0,    # array: Preprocessed image array
  230.                                       # img_to_array(): PIL image → numpy array (28,28,1)
  231.                                       # /255.0: Normalize to [0,1] (CRITICAL: match training preprocessing)
  232.    
  233.     axis=0                            # axis: Dimension to expand
  234.                                       # PURPOSE: Add batch dimension for model input
  235.                                       # RESULT: (28,28,1) → (1,28,28,1)
  236.                                       # WHY NEEDED: Model expects batched input even for single image
  237.                                       # OPTIONS: axis=0 (beginning), axis=-1 (end)
  238. )
  239.  
  240. # =============================================================================
  241. # 7. MAKE PREDICTION
  242. # =============================================================================
  243.  
  244. pred = model.predict(
  245.     img_array                         # x: Input data for prediction
  246.                                       # PURPOSE: Preprocessed image ready for classification
  247.                                       # EXPECTED SHAPE: (batch_size, height, width, channels)
  248.                                       # DATA TYPE: Must be float32 and normalized
  249. )
  250.  
  251. # Additional predict() parameters (not used but available):
  252. # batch_size: Control memory usage for large datasets (32, 64, 128)
  253. # verbose: 0 (silent), 1 (progress bar), 2 (one line per batch)
  254. # steps: Number of batches (for generator inputs)
  255.  
  256. pred_class = np.argmax(
  257.     pred,                             # a: Prediction array with probability scores
  258.                                       # PURPOSE: Array of class probabilities
  259.                                       # SHAPE: (1, 26) - 1 sample, 26 class probabilities
  260.    
  261.     axis=1                            # axis: Dimension for finding maximum
  262.                                       # PURPOSE: Find class with highest probability
  263.                                       # OPTIONS: axis=0 (across samples), axis=1 (across classes)
  264.                                       # RESULT: Index of most confident prediction
  265. )[0]                                  # [0]: Extract scalar from array
  266.                                       # PURPOSE: Get single prediction from batch
  267.                                       # WHY NEEDED: argmax returns shape (1,), we want scalar
  268.  
  269. # =============================================================================
  270. # 8. DISPLAY PREDICTION RESULT
  271. # =============================================================================
  272.  
  273. class_values = [chr(i) for i in range(65, 91)]  # Create A-Z mapping
  274. # chr(): Converts ASCII code to character
  275. # range(65, 91): ASCII codes for A(65) through Z(90)
  276. # WHEN TO CHANGE: range(97, 123) for lowercase a-z
  277. # ALTERNATIVES:
  278. #   - import string; class_values = list(string.ascii_uppercase)
  279. #   - Manual list: ['A', 'B', 'C', ..., 'Z']
  280.  
  281. print("Predicted values:", class_values[pred_class])
  282. # print() parameters (not used but available):
  283. # sep: Separator between arguments (default ' ')
  284. # end: String at end (default '\n')
  285. # file: Output destination (default stdout)
  286.  
  287. # =============================================================================
  288. # SUMMARY OF KEY PARAMETER TUNING GUIDELINES
  289. # =============================================================================
  290.  
  291. """
  292. COMMON PARAMETER ADJUSTMENTS:
  293.  
  294. 1. OVERFITTING (High training accuracy, low validation accuracy):
  295.   - Increase dropout rate (0.3, 0.4, 0.5)
  296.   - Reduce model complexity (fewer filters, smaller dense layers)
  297.   - Add more data augmentation
  298.   - Reduce epochs
  299.  
  300. 2. UNDERFITTING (Low training and validation accuracy):
  301.   - Increase model complexity (more filters, larger dense layers)
  302.   - Decrease dropout rate (0.1, 0.15)
  303.   - Increase epochs
  304.   - Adjust learning rate
  305.  
  306. 3. SLOW TRAINING:
  307.   - Increase batch_size (64, 128)
  308.   - Use smaller image_size (16x16 instead of 28x28)
  309.   - Reduce model complexity
  310.  
  311. 4. MEMORY ISSUES:
  312.   - Decrease batch_size (16, 8)
  313.   - Use tf.float16 instead of tf.float32
  314.   - Reduce image_size
  315.   - Reduce model complexity
  316.  
  317. 5. POOR ACCURACY:
  318.   - Try different optimizers (sgd, rmsprop)
  319.   - Adjust learning rate
  320.   - Use data augmentation
  321.   - Ensure proper data normalization
  322. """
  323.  
Advertisement
Add Comment
Please, Sign In to add comment