We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 2880e1a commit a5f4e39Copy full SHA for a5f4e39
1 file changed
src/modalities/models/gpt2/gpt2_model.py
@@ -475,7 +475,7 @@ def __init__(
475
# so if the model wants to increase the distance between logits
476
# it needs to scale q or k OR adjust the angle between them
477
# qk norm forces the model to mostly adjust the angle between q and k which stabilizes training
478
- if attention_config.attention_config is not None:
+ if attention_config.qk_norm_config is not None:
479
self.q_norm = attention_config.qk_norm_config.norm_type.value(
480
**dict(attention_config.qk_norm_config.config)
481
)
0 commit comments