Datasets
emozilla/dolma-v1_7-cc_en_head
384 downloads
['size_categories:100M<n<1B''format:parquet''modality:text'
YashJain/GitAI
384 downloads
['license:mit''size_categories:10K<n<100K''format:json'
BAAI/IndustryCorpus2_real_estate_construction
384 downloads
['size_categories:10M<n<100M''format:parquet''modality:tabular'
yifanzhang114/MME-RealWorld-Lmms-eval
384 downloads
['size_categories:10K<n<100K''format:parquet''modality:text'
PULSE-ECG/ECGInstruct
384 downloads
['license:apache-2.0''size_categories:1M<n<10M''format:json'
ieuniversity/group_1_submission
384 downloads
['size_categories:10K<n<100K''format:parquet''modality:text'
NbAiLab/NCC
384 downloads
['task_categories:text-generation''task_ids:language-modeling''annotations_creators:no-annotation'
kreasof-ai/bigc-bem-eng
384 downloads
['task_categories:text-to-speech''language:af''size_categories:10K<n<100K'
qr12138/reddit_dataset_170
384 downloads
['task_categories:text-classification''task_categories:token-classification''task_categories:question-answering'
aisi-whitebox/sec_qa_v2_prompted_sandbagging_llama_31_8b_instruct
384 downloads
['language:en''license:apache-2.0''size_categories:n<1K'