diff --git a/keras/src/layers/attention/additive_attention.py b/keras/src/layers/attention/additive_attention.py
index 787dd50e71a9..b56185f85bc2 100644
--- a/keras/src/layers/attention/additive_attention.py
+++ b/keras/src/layers/attention/additive_attention.py
@@ -10,7 +10,7 @@ class AdditiveAttention(Attention):
     Inputs are a list with 2 or 3 elements:
     1. A `query` tensor of shape `(batch_size, Tq, dim)`.
     2. A `value` tensor of shape `(batch_size, Tv, dim)`.
-    3. A optional `key` tensor of shape `(batch_size, Tv, dim)`. If none
+    3. An optional `key` tensor of shape `(batch_size, Tv, dim)`. If none
         supplied, `value` will be used as `key`.
 
     The calculation follows the steps:
@@ -33,8 +33,8 @@ class AdditiveAttention(Attention):
             - `query`: Query tensor of shape `(batch_size, Tq, dim)`.
             - `value`: Value tensor of shape `(batch_size, Tv, dim)`.
             - `key`: Optional key tensor of shape `(batch_size, Tv, dim)`. If
-                not given, will use `value` for both `key` and `value`, which is
-                the most common case.
+                not given, will use the`value` for both `key` and `value`,
+                which is the most common case.
         mask: List of the following tensors:
             - `query_mask`: A boolean mask tensor of shape `(batch_size, Tq)`.
                 If given, the output will be zero at the positions where