1.58bit_LLM

def absmean_quantization_activation(A):
    # Find the maximum absolute value in the activation array
    max_val = np.max(np.abs(A))

    # Avoid scaling factor that would make zero a quantized value
    scaling_factor = 1 / max_val if max_val != 0 else 1

    # Scale the activation matrix to the range [-1, 1]
    scaled_A = A * scaling_factor

    # Apply RoundClip function to each element of the scaled matrix
    quantized_A = np.vectorize(lambda x: round_clip(x, -1, 1))(scaled_A)

    return quantized_A