We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
2 parents 2880e1a + a5f4e39 commit bacc0b9Copy full SHA for bacc0b9
1 file changed
src/modalities/models/gpt2/gpt2_model.py
@@ -475,7 +475,7 @@ def __init__(
475
# so if the model wants to increase the distance between logits
476
# it needs to scale q or k OR adjust the angle between them
477
# qk norm forces the model to mostly adjust the angle between q and k which stabilizes training
478
- if attention_config.attention_config is not None:
+ if attention_config.qk_norm_config is not None:
479
self.q_norm = attention_config.qk_norm_config.norm_type.value(
480
**dict(attention_config.qk_norm_config.config)
481
)
0 commit comments