Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions seacrowd/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
pairs_features_score,
pairs_multi_features,
qa_features,
chat_features,
image_features,
image_multi_features,
imqa_features,
Expand Down Expand Up @@ -105,6 +106,7 @@ class Tasks(Enum):
# Multi Text Generation
DIALOGUE_SYSTEM = "DS"
E2E_TASK_ORIENTED_DIALOGUE = "TOD"
MULTI_TURN_CONVERSATION = "MTC"

# Self Supervised & Unsupervised Text
PROMPTING = "PRT"
Expand Down Expand Up @@ -246,6 +248,7 @@ class Licenses(Enum):
Tasks.TOKEN_LEVEL_LANGUAGE_IDENTIFICATION: "SEQ_LABEL",
Tasks.COMMONSENSE_REASONING: "QA",
Tasks.QUESTION_ANSWERING: "QA",
Tasks.MULTI_TURN_CONVERSATION: "CHAT",
Tasks.CONCEPT_ALIGNMENT_CLASSIFICATION: "PAIRS",
Tasks.NEXT_SENTENCE_PREDICTION: "PAIRS",
Tasks.TEXT_RETRIEVAL: "PAIRS",
Expand Down Expand Up @@ -313,6 +316,7 @@ class Licenses(Enum):
"KB": kb_features,
"TREE": tree_features,
"QA": qa_features,
"CHAT": chat_features,
"T2T": text2text_features,
"TEXT": text_features(),
"TEXT_MULTI": text_multi_features(),
Expand Down
4 changes: 3 additions & 1 deletion seacrowd/utils/schemas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .pairs import features_with_continuous_label as pairs_features_score
from .pairs_multilabel import features as pairs_multi_features
from .qa import features as qa_features
from .chat import features as chat_features
from .image import features as image_features
from .image import multi_features as image_multi_features
from .imqa import features as imqa_features
Expand All @@ -28,6 +29,7 @@
"pairs_features_score",
"pairs_multi_features",
"qa_features",
"chat_features",
"image_features",
"image_multi_features",
"imqa_features",
Expand All @@ -42,4 +44,4 @@
"text2text_features",
"video_features",
"tod_features",
]
]
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: add whitespace at EoF

24 changes: 24 additions & 0 deletions seacrowd/utils/schemas/chat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""
Conversational Chat Schema
"""
import datasets

features = datasets.Features(
{
"id": datasets.Value("string"),
"input": datasets.Sequence({
"role": datasets.ClassLabel(names=["system", "user", "assistant"]),
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the role should be implemented as string instead of ClassLabel due to 2 reasons:

  1. There might be some cases where the role can be multiple for a single-sequence dialogues
  2. The actual data (when the examples are generated) will result in an integer instead of string

"content": datasets.Value("string"),
}),
"output": datasets.Value("string"),

# the schema of 'meta' aren't specified either to allow some flexibility
"meta": {}

# notes on how to use this field of 'meta'
# you can choose two of options:
# 1. defining as empty dict if you don't think it's usable in `_generate_examples`, or
# 2. defining meta as dict of key with intended colname meta and its val with dataset.Features class
# in `_info` Dataloader method then populate it with the values in `_general_examples` Dataloader method
}
)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: add whitespace at EoF