Skip to content

Class: Dataset

A single component of related observations and/or information that can be read, manipulated, transformed, and otherwise interpreted.

URI: dcat:Distribution

classDiagram class Dataset click Dataset href "../Dataset/" Information <|-- Dataset click Information href "../Information/" Dataset <|-- DataSubset click DataSubset href "../DataSubset/" Dataset : acquisition_methods Dataset --> "*" InstanceAcquisition : acquisition_methods click InstanceAcquisition href "../InstanceAcquisition/" Dataset : addressing_gaps Dataset --> "*" AddressingGap : addressing_gaps click AddressingGap href "../AddressingGap/" Dataset : annotation_analyses Dataset --> "*" AnnotationAnalysis : annotation_analyses click AnnotationAnalysis href "../AnnotationAnalysis/" Dataset : anomalies Dataset --> "*" DataAnomaly : anomalies click DataAnomaly href "../DataAnomaly/" Dataset : at_risk_populations Dataset --> "0..1" AtRiskPopulations : at_risk_populations click AtRiskPopulations href "../AtRiskPopulations/" Dataset : citation Dataset : cleaning_strategies Dataset --> "*" CleaningStrategy : cleaning_strategies click CleaningStrategy href "../CleaningStrategy/" Dataset : collection_consents Dataset --> "*" CollectionConsent : collection_consents click CollectionConsent href "../CollectionConsent/" Dataset : collection_mechanisms Dataset --> "*" CollectionMechanism : collection_mechanisms click CollectionMechanism href "../CollectionMechanism/" Dataset : collection_notifications Dataset --> "*" CollectionNotification : collection_notifications click CollectionNotification href "../CollectionNotification/" Dataset : collection_timeframes Dataset --> "*" CollectionTimeframe : collection_timeframes click CollectionTimeframe href "../CollectionTimeframe/" Dataset : compression Dataset --> "0..1" CompressionEnum : compression click CompressionEnum href "../CompressionEnum/" Dataset : confidential_elements Dataset --> "*" Confidentiality : confidential_elements click Confidentiality href "../Confidentiality/" Dataset : conforms_to Dataset : conforms_to_class Dataset : conforms_to_schema Dataset : consent_revocations Dataset --> "*" ConsentRevocation : consent_revocations click ConsentRevocation href "../ConsentRevocation/" Dataset : content_warnings Dataset --> "*" ContentWarning : content_warnings click ContentWarning href "../ContentWarning/" Dataset : created_by Dataset : created_on Dataset : creators Dataset --> "*" Creator : creators click Creator href "../Creator/" Dataset : data_collectors Dataset --> "*" DataCollector : data_collectors click DataCollector href "../DataCollector/" Dataset : data_protection_impacts Dataset --> "*" DataProtectionImpact : data_protection_impacts click DataProtectionImpact href "../DataProtectionImpact/" Dataset : description Dataset : direct_collection Dataset --> "*" DirectCollection : direct_collection click DirectCollection href "../DirectCollection/" Dataset : discouraged_uses Dataset --> "*" DiscouragedUse : discouraged_uses click DiscouragedUse href "../DiscouragedUse/" Dataset : distribution_dates Dataset --> "*" DistributionDate : distribution_dates click DistributionDate href "../DistributionDate/" Dataset : distribution_formats Dataset --> "*" DistributionFormat : distribution_formats click DistributionFormat href "../DistributionFormat/" Dataset : doi Dataset : download_url Dataset : errata Dataset --> "*" Erratum : errata click Erratum href "../Erratum/" Dataset : ethical_reviews Dataset --> "*" EthicalReview : ethical_reviews click EthicalReview href "../EthicalReview/" Dataset : existing_uses Dataset --> "*" ExistingUse : existing_uses click ExistingUse href "../ExistingUse/" Dataset : extension_mechanism Dataset --> "0..1" ExtensionMechanism : extension_mechanism click ExtensionMechanism href "../ExtensionMechanism/" Dataset : external_resources Dataset --> "*" ExternalResource : external_resources click ExternalResource href "../ExternalResource/" Dataset : file_collections Dataset --> "*" FileCollection : file_collections click FileCollection href "../FileCollection/" Dataset : funders Dataset --> "*" FundingMechanism : funders click FundingMechanism href "../FundingMechanism/" Dataset : future_use_impacts Dataset --> "*" FutureUseImpact : future_use_impacts click FutureUseImpact href "../FutureUseImpact/" Dataset : human_subject_research Dataset --> "0..1" HumanSubjectResearch : human_subject_research click HumanSubjectResearch href "../HumanSubjectResearch/" Dataset : id Dataset : imputation_protocols Dataset --> "*" ImputationProtocol : imputation_protocols click ImputationProtocol href "../ImputationProtocol/" Dataset : informed_consent Dataset --> "*" InformedConsent : informed_consent click InformedConsent href "../InformedConsent/" Dataset : instances Dataset --> "*" Instance : instances click Instance href "../Instance/" Dataset : intended_uses Dataset --> "*" IntendedUse : intended_uses click IntendedUse href "../IntendedUse/" Dataset : ip_restrictions Dataset --> "0..1" IPRestrictions : ip_restrictions click IPRestrictions href "../IPRestrictions/" Dataset : is_deidentified Dataset --> "0..1" Deidentification : is_deidentified click Deidentification href "../Deidentification/" Dataset : is_tabular Dataset : issued Dataset : keywords Dataset : known_biases Dataset --> "*" DatasetBias : known_biases click DatasetBias href "../DatasetBias/" Dataset : known_limitations Dataset --> "*" DatasetLimitation : known_limitations click DatasetLimitation href "../DatasetLimitation/" Dataset : labeling_strategies Dataset --> "*" LabelingStrategy : labeling_strategies click LabelingStrategy href "../LabelingStrategy/" Dataset : language Dataset : last_updated_on Dataset : license Dataset : license_and_use_terms Dataset --> "0..1" LicenseAndUseTerms : license_and_use_terms click LicenseAndUseTerms href "../LicenseAndUseTerms/" Dataset : machine_annotation_tools Dataset --> "*" MachineAnnotationTools : machine_annotation_tools click MachineAnnotationTools href "../MachineAnnotationTools/" Dataset : maintainers Dataset --> "*" Maintainer : maintainers click Maintainer href "../Maintainer/" Dataset : missing_data_documentation Dataset --> "*" MissingDataDocumentation : missing_data_documentation click MissingDataDocumentation href "../MissingDataDocumentation/" Dataset : modified_by Dataset : name Dataset : other_tasks Dataset --> "*" OtherTask : other_tasks click OtherTask href "../OtherTask/" Dataset : page Dataset : parent_datasets Dataset --> "*" Dataset : parent_datasets click Dataset href "../Dataset/" Dataset : participant_compensation Dataset --> "*" HumanSubjectCompensation : participant_compensation click HumanSubjectCompensation href "../HumanSubjectCompensation/" Dataset : participant_privacy Dataset --> "*" ParticipantPrivacy : participant_privacy click ParticipantPrivacy href "../ParticipantPrivacy/" Dataset : preprocessing_strategies Dataset --> "*" PreprocessingStrategy : preprocessing_strategies click PreprocessingStrategy href "../PreprocessingStrategy/" Dataset : prohibited_uses Dataset --> "*" ProhibitedUse : prohibited_uses click ProhibitedUse href "../ProhibitedUse/" Dataset : publisher Dataset : purposes Dataset --> "*" Purpose : purposes click Purpose href "../Purpose/" Dataset : raw_data_sources Dataset --> "*" RawDataSource : raw_data_sources click RawDataSource href "../RawDataSource/" Dataset : raw_sources Dataset --> "*" RawData : raw_sources click RawData href "../RawData/" Dataset : regulatory_restrictions Dataset --> "0..1" ExportControlRegulatoryRestrictions : regulatory_restrictions click ExportControlRegulatoryRestrictions href "../ExportControlRegulatoryRestrictions/" Dataset : related_datasets Dataset --> "*" DatasetRelationship : related_datasets click DatasetRelationship href "../DatasetRelationship/" Dataset : relationships Dataset --> "*" Relationships : relationships click Relationships href "../Relationships/" Dataset : resources Dataset --> "*" Dataset : resources click Dataset href "../Dataset/" Dataset : retention_limit Dataset --> "0..1" RetentionLimits : retention_limit click RetentionLimits href "../RetentionLimits/" Dataset : sampling_strategies Dataset --> "*" SamplingStrategy : sampling_strategies click SamplingStrategy href "../SamplingStrategy/" Dataset : sensitive_elements Dataset --> "*" SensitiveElement : sensitive_elements click SensitiveElement href "../SensitiveElement/" Dataset : splits Dataset --> "*" Splits : splits click Splits href "../Splits/" Dataset : status Dataset : subpopulations Dataset --> "*" Subpopulation : subpopulations click Subpopulation href "../Subpopulation/" Dataset : subsets Dataset --> "*" DataSubset : subsets click DataSubset href "../DataSubset/" Dataset : tasks Dataset --> "*" Task : tasks click Task href "../Task/" Dataset : third_party_sharing Dataset --> "*" ThirdPartySharing : third_party_sharing click ThirdPartySharing href "../ThirdPartySharing/" Dataset : title Dataset : total_file_count Dataset : total_size_bytes Dataset : updates Dataset --> "0..1" UpdatePlan : updates click UpdatePlan href "../UpdatePlan/" Dataset : use_repository Dataset --> "*" UseRepository : use_repository click UseRepository href "../UseRepository/" Dataset : variables Dataset --> "*" VariableMetadata : variables click VariableMetadata href "../VariableMetadata/" Dataset : version Dataset : version_access Dataset --> "0..1" VersionAccess : version_access click VersionAccess href "../VersionAccess/" Dataset : was_derived_from

Inheritance

Slots

Name Cardinality and Range Description Inheritance
external_resources *
ExternalResource
External resources referenced at the dataset level (e direct
resources *
Dataset
Sub-resources or component datasets that are part of this dataset direct
file_collections *
FileCollection
Collection of file groups within this dataset direct
total_file_count 0..1
Integer
Total number of files across all file collections in this dataset direct
total_size_bytes 0..1
Integer
Total size of all files in bytes across all file collections direct
purposes *
Purpose
Purposes for which the dataset was created direct
tasks *
Task
Tasks the dataset is intended to support direct
addressing_gaps *
AddressingGap
Research or practical gaps this dataset addresses direct
creators *
Creator
Individuals or organizations who created the dataset direct
funders *
FundingMechanism
Funding mechanisms that supported dataset creation direct
subsets *
DataSubset
Subsets or splits of this dataset direct
instances *
Instance
Individual data instances or records in the dataset direct
anomalies *
DataAnomaly
Known data quality issues, errors, or irregularities in the dataset direct
known_biases *
DatasetBias
List of known biases present in the dataset that may affect fairness, represe... direct
known_limitations *
DatasetLimitation
List of known limitations of the dataset that may affect its use or interpret... direct
confidential_elements *
Confidentiality
Confidential or restricted information within the dataset that requires acces... direct
content_warnings *
ContentWarning
Content warnings for potentially harmful, offensive, or disturbing material i... direct
subpopulations *
Subpopulation
Subpopulations represented within the dataset direct
sensitive_elements *
SensitiveElement
Sensitive data elements requiring special handling or access controls direct
relationships *
Relationships
Explicit relationships between individual instances in the dataset direct
splits *
Splits
Recommended data splits for this dataset direct
acquisition_methods *
InstanceAcquisition
Methods used to acquire or obtain dataset instances direct
collection_mechanisms *
CollectionMechanism
Mechanisms, instruments, or tools used for data collection direct
sampling_strategies *
SamplingStrategy
Strategies used to select data instances from a larger population direct
data_collectors *
DataCollector
Individuals or organizations responsible for collecting the data direct
collection_timeframes *
CollectionTimeframe
Time periods during which data was collected direct
direct_collection *
DirectCollection
Whether data was collected directly from individuals or via third parties direct
collection_notifications *
CollectionNotification
Notifications provided to individuals about data collection direct
collection_consents *
CollectionConsent
Consent obtained from individuals for data collection and use direct
consent_revocations *
ConsentRevocation
Mechanisms for individuals to revoke previously given consent direct
missing_data_documentation *
MissingDataDocumentation
One or more records documenting missing data patterns and handling strategies direct
raw_data_sources *
RawDataSource
List of raw data sources before preprocessing direct
ethical_reviews *
EthicalReview
Ethical reviews and institutional oversight for the dataset direct
data_protection_impacts *
DataProtectionImpact
Data protection impact assessments (DPIAs) conducted for the dataset direct
human_subject_research 0..1
HumanSubjectResearch
Information about whether dataset involves human subjects research, including... direct
informed_consent *
InformedConsent
One or more records detailing informed consent procedures, including consent ... direct
at_risk_populations 0..1
AtRiskPopulations
Information about protections for at-risk populations (e direct
participant_privacy *
ParticipantPrivacy
One or more records describing privacy protections and anonymization procedur... direct
participant_compensation *
HumanSubjectCompensation
One or more records describing compensation or incentives provided to human r... direct
preprocessing_strategies *
PreprocessingStrategy
Preprocessing steps applied to the raw data direct
cleaning_strategies *
CleaningStrategy
Data cleaning and quality control procedures applied to the dataset direct
labeling_strategies *
LabelingStrategy
Labeling or annotation methodologies applied to the data direct
raw_sources *
RawData
Raw, unprocessed source data before any preprocessing was applied direct
imputation_protocols *
ImputationProtocol
Data imputation protocols applied to handle missing values direct
annotation_analyses *
AnnotationAnalysis
One or more analyses of annotation quality and inter-annotator agreement direct
machine_annotation_tools *
MachineAnnotationTools
List of automated annotation tools used in dataset creation direct
existing_uses *
ExistingUse
Known existing uses of the dataset at the time of publication direct
use_repository *
UseRepository
Repositories or registries tracking how the dataset has been used direct
other_tasks *
OtherTask
Additional tasks the dataset may support beyond its original intent direct
future_use_impacts *
FutureUseImpact
Anticipated impacts of future uses, including risks and benefits direct
discouraged_uses *
DiscouragedUse
Uses that are not recommended for this dataset due to limitations, risks, or ... direct
intended_uses *
IntendedUse
List of explicit intended and recommended uses for this dataset direct
prohibited_uses *
ProhibitedUse
List of explicitly prohibited or forbidden uses for this dataset direct
distribution_formats *
DistributionFormat
Formats in which the dataset is distributed or made available direct
distribution_dates *
DistributionDate
Dates when the dataset was or will be distributed or released direct
third_party_sharing *
ThirdPartySharing
Third-party distribution policies for the dataset direct
license_and_use_terms 0..1
LicenseAndUseTerms
License and usage terms governing dataset access and use direct
ip_restrictions 0..1
IPRestrictions
Intellectual property restrictions on dataset use or redistribution direct
regulatory_restrictions 0..1
ExportControlRegulatoryRestrictions
Regulatory and export control restrictions applicable to the dataset direct
maintainers *
Maintainer
Individuals or organizations responsible for maintaining the dataset direct
errata *
Erratum
Known errors or corrections to the dataset since publication direct
updates 0..1
UpdatePlan
Plans for future updates or versioning of the dataset direct
retention_limit 0..1
RetentionLimits
Data retention policies and limits for the dataset direct
version_access 0..1
VersionAccess
Information about access to different versions of the dataset direct
extension_mechanism 0..1
ExtensionMechanism
Mechanisms for extending or contributing to the dataset direct
variables *
VariableMetadata
List of metadata records describing individual variables, fields, or columns ... direct
is_deidentified 0..1
Deidentification
De-identification status and procedures applied to the dataset direct
is_tabular 0..1
Boolean
Whether the dataset is in tabular format (rows and columns) direct
citation 0..1
String
Recommended citation for this dataset in DataCite or BibTeX format direct
parent_datasets *
Dataset
One or more parent datasets that this dataset is part of or derived from direct
related_datasets *
DatasetRelationship
List of related datasets with typed relationships (e direct
compression 0..1
CompressionEnum
Compression format used, if any (e Information
conforms_to 0..1
String
An established standard, specification, or schema to which the resource confo... Information
conforms_to_class 0..1
String
The specific class or type within a schema to which the resource conforms Information
conforms_to_schema 0..1
String
The schema or data model to which the resource conforms Information
created_by 0..1
String
The person or organization primarily responsible for creating the resource Information
created_on 0..1
Datetime
The date and time when the resource was created Information
doi 0..1
String
Digital Object Identifier (DOI) in format 10 Information
download_url 0..1
Uri
URL from which the data can be downloaded Information
issued 0..1
Datetime
Date of formal issuance or publication of the resource Information
keywords *
String
Keywords or tags describing the resource for discovery and classification Information
language 0..1
String
Language in which the information is expressed Information
last_updated_on 0..1
Datetime
The date and time when the resource was most recently modified or updated Information
license 0..1
String
The legal license under which the resource is made available (e Information
modified_by 0..1
String
A person or organization that contributed to modifying or updating the resour... Information
page 0..1
String
A landing page or web page providing access to or information about the resou... Information
publisher 0..1
Uriorcurie
The organization or entity responsible for making the resource available Information
status 0..1
String
The status of the resource (e Information
title 0..1
String
The official title of the element Information
version 0..1
String
The version identifier of the resource (e Information
was_derived_from 0..1
String
A resource from which this resource was derived, in whole or in part Information
id 1
Uriorcurie
A unique identifier for a thing NamedThing
name 0..1
String
A human-readable name for a thing NamedThing
description 0..1
String
A human-readable description for a thing NamedThing

Usages

used by used in type used
DatasetCollection resources range Dataset
Dataset resources range Dataset
Dataset parent_datasets range Dataset
DataSubset resources range Dataset
DataSubset parent_datasets range Dataset
FileCollection resources range Dataset

Aliases

  • data resource
  • data file
  • data package

Identifier and Mapping Information

Schema Source

  • from schema: https://w3id.org/bridge2ai/data-sheets-schema

Mappings

Mapping Type Mapped Value
self dcat:Distribution
native data_sheets_schema:Dataset
exact schema:DataDownload

LinkML Source

Direct

name: Dataset
description: A single component of related observations and/or information that can
  be read, manipulated, transformed, and otherwise interpreted.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
aliases:
- data resource
- data file
- data package
exact_mappings:
- schema:DataDownload
is_a: Information
slots:
- external_resources
- resources
slot_usage:
  external_resources:
    name: external_resources
    description: External resources referenced at the dataset level (e.g., related
      publications, repositories, documentation). For file-level external resources,
      use FileCollection.external_resources.
    range: ExternalResource
    multivalued: true
    inlined_as_list: true
  resources:
    name: resources
    description: 'Sub-resources or component datasets that are part of this dataset.
      Note: For file collections, use the file_collections attribute instead.'
    range: Dataset
    multivalued: true
    inlined_as_list: true
attributes:
  file_collections:
    name: file_collections
    description: Collection of file groups within this dataset. Each entry represents
      a logical grouping of files with shared characteristics (e.g., all training
      data, all image files, all raw data files). Maps to nested RO-Crate Dataset
      entities via schema:hasPart in RO-Crate converters.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    exact_mappings:
    - dcat:distribution
    rank: 1000
    slot_uri: d4d:fileCollections
    domain_of:
    - Dataset
    range: FileCollection
    multivalued: true
    inlined: true
    inlined_as_list: true
  total_file_count:
    name: total_file_count
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: '156'
    description: Total number of files across all file collections in this dataset.
      Can be aggregated from file_collections[].file_count.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:totalFileCount
    domain_of:
    - Dataset
    range: integer
  total_size_bytes:
    name: total_size_bytes
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: 10737418240 (10 GiB = 10 × 1024³ bytes)
    description: Total size of all files in bytes across all file collections. Can
      be aggregated from file_collections[].total_bytes.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: dcat:byteSize
    domain_of:
    - Dataset
    range: integer
  purposes:
    name: purposes
    description: Purposes for which the dataset was created. List of Purpose objects
      from the Motivation module, each describing a specific creation goal or intended
      application.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:purposes
    domain_of:
    - Dataset
    range: Purpose
    multivalued: true
    inlined: true
    inlined_as_list: true
  tasks:
    name: tasks
    description: Tasks the dataset is intended to support. List of Task objects from
      the Motivation module describing specific machine learning, research, or analytical
      tasks.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:tasks
    domain_of:
    - Dataset
    range: Task
    multivalued: true
    inlined: true
    inlined_as_list: true
  addressing_gaps:
    name: addressing_gaps
    description: Research or practical gaps this dataset addresses. List of AddressingGap
      objects from the Motivation module, each describing a gap in existing datasets
      or knowledge that this dataset fills.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:addressingGaps
    domain_of:
    - Dataset
    range: AddressingGap
    multivalued: true
    inlined: true
    inlined_as_list: true
  creators:
    name: creators
    description: Individuals or organizations who created the dataset. List of Creator
      objects describing authorship, roles, and affiliations of dataset creators.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: schema:creator
    domain_of:
    - Dataset
    range: Creator
    multivalued: true
    inlined: true
    inlined_as_list: true
  funders:
    name: funders
    description: Funding mechanisms that supported dataset creation. List of FundingMechanism
      objects describing grants, contracts, or other funding sources including grantors
      and grant identifiers.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: schema:funder
    domain_of:
    - Dataset
    range: FundingMechanism
    multivalued: true
    inlined: true
    inlined_as_list: true
  subsets:
    name: subsets
    description: Subsets or splits of this dataset. List of DataSubset objects from
      the Composition module, each representing a logical partition such as training,
      validation, or test splits, or demographic subgroups.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:dataSubset
    domain_of:
    - Dataset
    range: DataSubset
    multivalued: true
    inlined: true
    inlined_as_list: true
  instances:
    name: instances
    description: Individual data instances or records in the dataset. List of Instance
      objects from the Composition module describing what each data point represents,
      its type, and associated label information.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:instances
    domain_of:
    - Dataset
    range: Instance
    multivalued: true
    inlined: true
    inlined_as_list: true
  anomalies:
    name: anomalies
    description: Known data quality issues, errors, or irregularities in the dataset.
      List of DataAnomaly objects from the Composition module, each documenting a
      specific anomaly and its potential impact.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:anomalies
    domain_of:
    - Dataset
    range: DataAnomaly
    multivalued: true
    inlined: true
    inlined_as_list: true
  known_biases:
    name: known_biases
    description: List of known biases present in the dataset that may affect fairness,
      representativeness, or model performance. Uses BiasTypeEnum for standardized
      bias categorization mapped to the AI Ontology (AIO).
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:known_biases
    domain_of:
    - Dataset
    range: DatasetBias
    multivalued: true
    inlined: true
    inlined_as_list: true
  known_limitations:
    name: known_limitations
    description: List of known limitations of the dataset that may affect its use
      or interpretation. Distinct from biases (systematic errors) and anomalies (data
      quality issues).
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:known_limitations
    domain_of:
    - Dataset
    range: DatasetLimitation
    multivalued: true
    inlined: true
    inlined_as_list: true
  confidential_elements:
    name: confidential_elements
    description: Confidential or restricted information within the dataset that requires
      access controls. List of Confidentiality objects describing what is confidential
      and why it cannot be released.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:confidentialElements
    domain_of:
    - Dataset
    range: Confidentiality
    multivalued: true
    inlined: true
    inlined_as_list: true
  content_warnings:
    name: content_warnings
    description: Content warnings for potentially harmful, offensive, or disturbing
      material in the dataset. List of ContentWarning objects alerting users to sensitive
      content categories.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:contentWarnings
    domain_of:
    - Dataset
    range: ContentWarning
    multivalued: true
    inlined: true
    inlined_as_list: true
  subpopulations:
    name: subpopulations
    description: Subpopulations represented within the dataset. List of Subpopulation
      objects from the Composition module describing demographic or other groups,
      their representation, and any imbalances.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:subpopulations
    domain_of:
    - Dataset
    range: Subpopulation
    multivalued: true
    inlined: true
    inlined_as_list: true
  sensitive_elements:
    name: sensitive_elements
    description: Sensitive data elements requiring special handling or access controls.
      List of SensitiveElement objects identifying sensitive attributes such as personal
      identifiers, protected health information, or legally sensitive content.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:sensitiveElements
    domain_of:
    - Dataset
    range: SensitiveElement
    multivalued: true
    inlined: true
    inlined_as_list: true
  relationships:
    name: relationships
    description: Explicit relationships between individual instances in the dataset.
      List of Relationships objects from the Composition module describing how instances
      relate (e.g., graph edges, ratings, social network links).
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:relationships
    domain_of:
    - Dataset
    range: Relationships
    multivalued: true
    inlined: true
    inlined_as_list: true
  splits:
    name: splits
    description: Recommended data splits for this dataset. List of Splits objects
      from the Composition module describing train/validation/test partitions and
      the rationale for each split strategy.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:splits
    domain_of:
    - Dataset
    range: Splits
    multivalued: true
    inlined: true
    inlined_as_list: true
  acquisition_methods:
    name: acquisition_methods
    description: Methods used to acquire or obtain dataset instances. List of InstanceAcquisition
      objects from the Collection module describing how data was sourced, whether
      directly observed or derived.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:acquisitionMethods
    domain_of:
    - Dataset
    range: InstanceAcquisition
    multivalued: true
    inlined: true
    inlined_as_list: true
  collection_mechanisms:
    name: collection_mechanisms
    description: Mechanisms, instruments, or tools used for data collection. List
      of CollectionMechanism objects from the Collection module describing sensors,
      surveys, APIs, or other collection instruments.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:collectionMechanisms
    domain_of:
    - Dataset
    range: CollectionMechanism
    multivalued: true
    inlined: true
    inlined_as_list: true
  sampling_strategies:
    name: sampling_strategies
    description: Strategies used to select data instances from a larger population.
      List of SamplingStrategy objects from the Collection module describing sampling
      methodology, inclusion criteria, and limitations.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:samplingStrategies
    domain_of:
    - Dataset
    - Instance
    range: SamplingStrategy
    multivalued: true
    inlined: true
    inlined_as_list: true
  data_collectors:
    name: data_collectors
    description: Individuals or organizations responsible for collecting the data.
      List of DataCollector objects from the Collection module describing who performed
      data collection and their roles.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:dataCollectors
    domain_of:
    - Dataset
    range: DataCollector
    multivalued: true
    inlined: true
    inlined_as_list: true
  collection_timeframes:
    name: collection_timeframes
    description: Time periods during which data was collected. List of CollectionTimeframe
      objects from the Collection module describing collection start and end dates,
      and any gaps in the collection period.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:collectionTimeframes
    domain_of:
    - Dataset
    range: CollectionTimeframe
    multivalued: true
    inlined: true
    inlined_as_list: true
  direct_collection:
    name: direct_collection
    description: Whether data was collected directly from individuals or via third
      parties. List of DirectCollection objects from the Collection module describing
      direct vs. indirect collection methods and sources.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:directCollection
    domain_of:
    - Dataset
    range: DirectCollection
    multivalued: true
    inlined: true
    inlined_as_list: true
  collection_notifications:
    name: collection_notifications
    description: Notifications provided to individuals about data collection. List
      of CollectionNotification objects from the Ethics module describing how and
      when individuals were informed about the data collection.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:collectionNotifications
    domain_of:
    - Dataset
    range: CollectionNotification
    multivalued: true
    inlined: true
    inlined_as_list: true
  collection_consents:
    name: collection_consents
    description: Consent obtained from individuals for data collection and use. List
      of CollectionConsent objects from the Ethics module describing how consent was
      requested, provided, and documented.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:collectionConsents
    domain_of:
    - Dataset
    range: CollectionConsent
    multivalued: true
    inlined: true
    inlined_as_list: true
  consent_revocations:
    name: consent_revocations
    description: Mechanisms for individuals to revoke previously given consent. List
      of ConsentRevocation objects from the Ethics module describing how revocation
      works and what happens to data after revocation.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:consentRevocations
    domain_of:
    - Dataset
    range: ConsentRevocation
    multivalued: true
    inlined: true
    inlined_as_list: true
  missing_data_documentation:
    name: missing_data_documentation
    description: One or more records documenting missing data patterns and handling
      strategies.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:missingDataDocumentation
    domain_of:
    - Dataset
    range: MissingDataDocumentation
    multivalued: true
    inlined: true
    inlined_as_list: true
  raw_data_sources:
    name: raw_data_sources
    description: List of raw data sources before preprocessing. Each RawDataSource
      object describes where the original data came from and how it can be accessed.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:rawDataSources
    domain_of:
    - Dataset
    range: RawDataSource
    multivalued: true
    inlined: true
    inlined_as_list: true
  ethical_reviews:
    name: ethical_reviews
    description: Ethical reviews and institutional oversight for the dataset. List
      of EthicalReview objects from the Ethics module describing IRB approvals, ethics
      committee reviews, and compliance certifications.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:ethicalReviews
    domain_of:
    - Dataset
    range: EthicalReview
    multivalued: true
    inlined: true
    inlined_as_list: true
  data_protection_impacts:
    name: data_protection_impacts
    description: Data protection impact assessments (DPIAs) conducted for the dataset.
      List of DataProtectionImpact objects from the Ethics module documenting privacy
      risk assessments and mitigation measures.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:dataProtectionImpacts
    domain_of:
    - Dataset
    range: DataProtectionImpact
    multivalued: true
    inlined: true
    inlined_as_list: true
  human_subject_research:
    name: human_subject_research
    description: Information about whether dataset involves human subjects research,
      including IRB approval, ethics review, and regulatory compliance.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:humanSubjectResearch
    domain_of:
    - Dataset
    range: HumanSubjectResearch
    inlined: true
  informed_consent:
    name: informed_consent
    description: One or more records detailing informed consent procedures, including
      consent type, documentation, and withdrawal mechanisms.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:informedConsent
    domain_of:
    - Dataset
    range: InformedConsent
    multivalued: true
    inlined: true
    inlined_as_list: true
  at_risk_populations:
    name: at_risk_populations
    description: Information about protections for at-risk populations (e.g., minors,
      pregnant women, prisoners) including special safeguards and assent procedures.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:atRiskPopulations
    domain_of:
    - Dataset
    range: AtRiskPopulations
    inlined: true
  participant_privacy:
    name: participant_privacy
    description: One or more records describing privacy protections and anonymization
      procedures for human research participants.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:participantPrivacy
    domain_of:
    - Dataset
    range: ParticipantPrivacy
    multivalued: true
    inlined: true
    inlined_as_list: true
  participant_compensation:
    name: participant_compensation
    description: One or more records describing compensation or incentives provided
      to human research participants.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:participantCompensation
    domain_of:
    - Dataset
    range: HumanSubjectCompensation
    multivalued: true
    inlined: true
    inlined_as_list: true
  preprocessing_strategies:
    name: preprocessing_strategies
    description: Preprocessing steps applied to the raw data. List of PreprocessingStrategy
      objects from the Preprocessing module describing normalization, transformation,
      and other preparation steps.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:preprocessingStrategies
    domain_of:
    - Dataset
    range: PreprocessingStrategy
    multivalued: true
    inlined: true
    inlined_as_list: true
  cleaning_strategies:
    name: cleaning_strategies
    description: Data cleaning and quality control procedures applied to the dataset.
      List of CleaningStrategy objects from the Preprocessing module describing outlier
      removal, deduplication, and error correction steps.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:cleaningStrategies
    domain_of:
    - Dataset
    range: CleaningStrategy
    multivalued: true
    inlined: true
    inlined_as_list: true
  labeling_strategies:
    name: labeling_strategies
    description: Labeling or annotation methodologies applied to the data. List of
      LabelingStrategy objects from the Preprocessing module describing annotation
      procedures, annotator qualifications, and quality controls.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:labelingStrategies
    domain_of:
    - Dataset
    range: LabelingStrategy
    multivalued: true
    inlined: true
    inlined_as_list: true
  raw_sources:
    name: raw_sources
    description: Raw, unprocessed source data before any preprocessing was applied.
      List of RawData objects from the Preprocessing module describing original data
      sources and their formats.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:rawSources
    domain_of:
    - Dataset
    range: RawData
    multivalued: true
    inlined: true
    inlined_as_list: true
  imputation_protocols:
    name: imputation_protocols
    description: Data imputation protocols applied to handle missing values. List
      of ImputationProtocol objects from the Preprocessing module describing the imputation
      technique, affected variables, and rationale.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:imputation_protocols
    domain_of:
    - Dataset
    range: ImputationProtocol
    multivalued: true
    inlined: true
    inlined_as_list: true
  annotation_analyses:
    name: annotation_analyses
    description: One or more analyses of annotation quality and inter-annotator agreement.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:annotation_analyses
    domain_of:
    - Dataset
    range: AnnotationAnalysis
    multivalued: true
    inlined: true
    inlined_as_list: true
  machine_annotation_tools:
    name: machine_annotation_tools
    description: List of automated annotation tools used in dataset creation.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    domain_of:
    - Dataset
    range: MachineAnnotationTools
    multivalued: true
    inlined: true
    inlined_as_list: true
  existing_uses:
    name: existing_uses
    description: Known existing uses of the dataset at the time of publication. List
      of ExistingUse objects from the Uses module describing research, commercial,
      or other applications of the dataset.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:existingUses
    domain_of:
    - Dataset
    range: ExistingUse
    multivalued: true
    inlined: true
    inlined_as_list: true
  use_repository:
    name: use_repository
    description: Repositories or registries tracking how the dataset has been used.
      List of UseRepository objects from the Uses module pointing to papers with code,
      citation indices, or other use-tracking resources.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:useRepository
    domain_of:
    - Dataset
    range: UseRepository
    multivalued: true
    inlined: true
    inlined_as_list: true
  other_tasks:
    name: other_tasks
    description: Additional tasks the dataset may support beyond its original intent.
      List of OtherTask objects from the Uses module describing potential applications
      not originally planned by the dataset creators.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:otherTasks
    domain_of:
    - Dataset
    range: OtherTask
    multivalued: true
    inlined: true
    inlined_as_list: true
  future_use_impacts:
    name: future_use_impacts
    description: Anticipated impacts of future uses, including risks and benefits.
      List of FutureUseImpact objects from the Uses module describing foreseeable
      consequences of using this dataset in new applications.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:futureUseImpacts
    domain_of:
    - Dataset
    range: FutureUseImpact
    multivalued: true
    inlined: true
    inlined_as_list: true
  discouraged_uses:
    name: discouraged_uses
    description: Uses that are not recommended for this dataset due to limitations,
      risks, or ethical concerns. List of DiscouragedUse objects from the Uses module
      explaining why certain applications should be avoided.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:discouragedUses
    domain_of:
    - Dataset
    range: DiscouragedUse
    multivalued: true
    inlined: true
    inlined_as_list: true
  intended_uses:
    name: intended_uses
    description: List of explicit intended and recommended uses for this dataset.
      Complements future_use_impacts by focusing on positive applications.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:intendedUses
    domain_of:
    - Dataset
    range: IntendedUse
    multivalued: true
    inlined: true
    inlined_as_list: true
  prohibited_uses:
    name: prohibited_uses
    description: List of explicitly prohibited or forbidden uses for this dataset.
      Stronger than discouraged_uses - these are not permitted.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:prohibitedUses
    domain_of:
    - Dataset
    range: ProhibitedUse
    multivalued: true
    inlined: true
    inlined_as_list: true
  distribution_formats:
    name: distribution_formats
    description: Formats in which the dataset is distributed or made available. List
      of DistributionFormat objects from the Distribution module describing file formats,
      compression, and access methods.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:distributionFormats
    domain_of:
    - Dataset
    range: DistributionFormat
    multivalued: true
    inlined: true
    inlined_as_list: true
  distribution_dates:
    name: distribution_dates
    description: Dates when the dataset was or will be distributed or released. List
      of DistributionDate objects from the Distribution module describing initial
      release dates, version release dates, and planned future releases.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:distributionDates
    domain_of:
    - Dataset
    range: DistributionDate
    multivalued: true
    inlined: true
    inlined_as_list: true
  third_party_sharing:
    name: third_party_sharing
    description: Third-party distribution policies for the dataset. List of ThirdPartySharing
      objects from the Distribution module describing whether and how the dataset
      is shared with entities outside the creating organization.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:thirdPartySharing
    domain_of:
    - Dataset
    range: ThirdPartySharing
    multivalued: true
    inlined: true
    inlined_as_list: true
  license_and_use_terms:
    name: license_and_use_terms
    description: License and usage terms governing dataset access and use. LicenseAndUseTerms
      object from the Data Governance module describing the applicable license, permitted
      uses, and any restrictions.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: schema:license
    domain_of:
    - Dataset
    range: LicenseAndUseTerms
    inlined: true
  ip_restrictions:
    name: ip_restrictions
    description: Intellectual property restrictions on dataset use or redistribution.
      IPRestrictions object from the Data Governance module describing copyright,
      trademark, or other IP considerations.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:ipRestrictions
    domain_of:
    - Dataset
    range: IPRestrictions
    inlined: true
  regulatory_restrictions:
    name: regulatory_restrictions
    description: Regulatory and export control restrictions applicable to the dataset.
      ExportControlRegulatoryRestrictions object from the Data Governance module describing
      compliance requirements such as ITAR, EAR, or GDPR.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:regulatoryRestrictions
    domain_of:
    - Dataset
    - ExportControlRegulatoryRestrictions
    range: ExportControlRegulatoryRestrictions
    inlined: true
  maintainers:
    name: maintainers
    description: Individuals or organizations responsible for maintaining the dataset.
      List of Maintainer objects from the Maintenance module describing maintenance
      contacts, roles, and support channels.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:maintainers
    domain_of:
    - Dataset
    range: Maintainer
    multivalued: true
    inlined: true
    inlined_as_list: true
  errata:
    name: errata
    description: Known errors or corrections to the dataset since publication. List
      of Erratum objects from the Maintenance module describing discovered errors,
      affected records, and correction procedures.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:errata
    domain_of:
    - Dataset
    range: Erratum
    multivalued: true
    inlined: true
    inlined_as_list: true
  updates:
    name: updates
    description: Plans for future updates or versioning of the dataset. UpdatePlan
      object from the Maintenance module describing update frequency, versioning policy,
      and planned enhancements.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:updates
    domain_of:
    - Dataset
    range: UpdatePlan
    inlined: true
  retention_limit:
    name: retention_limit
    description: Data retention policies and limits for the dataset. RetentionLimits
      object from the Maintenance module describing how long the dataset will be available
      and any deletion schedules.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:retentionLimit
    domain_of:
    - Dataset
    range: RetentionLimits
    inlined: true
  version_access:
    name: version_access
    description: Information about access to different versions of the dataset. VersionAccess
      object from the Maintenance module describing where older versions can be found
      and how version history is maintained.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:versionAccess
    domain_of:
    - Dataset
    range: VersionAccess
    inlined: true
  extension_mechanism:
    name: extension_mechanism
    description: Mechanisms for extending or contributing to the dataset. ExtensionMechanism
      object from the Maintenance module describing how others can propose additions,
      corrections, or expansions.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:extensionMechanism
    domain_of:
    - Dataset
    range: ExtensionMechanism
    inlined: true
  variables:
    name: variables
    description: List of metadata records describing individual variables, fields,
      or columns in the dataset.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    exact_mappings:
    - schema:variableMeasured
    rank: 1000
    slot_uri: schema:variableMeasured
    domain_of:
    - Dataset
    range: VariableMetadata
    multivalued: true
    inlined: true
    inlined_as_list: true
  is_deidentified:
    name: is_deidentified
    description: De-identification status and procedures applied to the dataset. Deidentification
      object describing whether the dataset contains personal data, what de-identification
      methods were applied, and any residual re-identification risks.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:isDeidentified
    domain_of:
    - Dataset
    range: Deidentification
    inlined: true
  is_tabular:
    name: is_tabular
    description: Whether the dataset is in tabular format (rows and columns). True
      if the data is structured as a table (e.g., CSV, TSV, relational database);
      false for unstructured formats such as images or free text.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:isTabular
    domain_of:
    - Dataset
    range: boolean
  citation:
    name: citation
    description: Recommended citation for this dataset in DataCite or BibTeX format.
      Provides a standard way to cite the dataset in publications.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    exact_mappings:
    - schema:citation
    rank: 1000
    slot_uri: schema:citation
    domain_of:
    - Dataset
    range: string
  parent_datasets:
    name: parent_datasets
    description: One or more parent datasets that this dataset is part of or derived
      from. Enables hierarchical dataset composition (hasPart/isPartOf relationships).
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    exact_mappings:
    - schema:isPartOf
    rank: 1000
    slot_uri: schema:isPartOf
    domain_of:
    - Dataset
    range: Dataset
    multivalued: true
    inlined: true
    inlined_as_list: true
  related_datasets:
    name: related_datasets
    description: List of related datasets with typed relationships (e.g., supplements,
      derives from, is version of). Use DatasetRelationship class to specify relationship
      types.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: schema:isRelatedTo
    domain_of:
    - Dataset
    range: DatasetRelationship
    multivalued: true
    inlined: true
    inlined_as_list: true
class_uri: dcat:Distribution

Induced

name: Dataset
description: A single component of related observations and/or information that can
  be read, manipulated, transformed, and otherwise interpreted.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
aliases:
- data resource
- data file
- data package
exact_mappings:
- schema:DataDownload
is_a: Information
slot_usage:
  external_resources:
    name: external_resources
    description: External resources referenced at the dataset level (e.g., related
      publications, repositories, documentation). For file-level external resources,
      use FileCollection.external_resources.
    range: ExternalResource
    multivalued: true
    inlined_as_list: true
  resources:
    name: resources
    description: 'Sub-resources or component datasets that are part of this dataset.
      Note: For file collections, use the file_collections attribute instead.'
    range: Dataset
    multivalued: true
    inlined_as_list: true
attributes:
  file_collections:
    name: file_collections
    description: Collection of file groups within this dataset. Each entry represents
      a logical grouping of files with shared characteristics (e.g., all training
      data, all image files, all raw data files). Maps to nested RO-Crate Dataset
      entities via schema:hasPart in RO-Crate converters.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    exact_mappings:
    - dcat:distribution
    rank: 1000
    slot_uri: d4d:fileCollections
    alias: file_collections
    owner: Dataset
    domain_of:
    - Dataset
    range: FileCollection
    multivalued: true
    inlined: true
    inlined_as_list: true
  total_file_count:
    name: total_file_count
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: '156'
    description: Total number of files across all file collections in this dataset.
      Can be aggregated from file_collections[].file_count.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:totalFileCount
    alias: total_file_count
    owner: Dataset
    domain_of:
    - Dataset
    range: integer
  total_size_bytes:
    name: total_size_bytes
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: 10737418240 (10 GiB = 10 × 1024³ bytes)
    description: Total size of all files in bytes across all file collections. Can
      be aggregated from file_collections[].total_bytes.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: dcat:byteSize
    alias: total_size_bytes
    owner: Dataset
    domain_of:
    - Dataset
    range: integer
  purposes:
    name: purposes
    description: Purposes for which the dataset was created. List of Purpose objects
      from the Motivation module, each describing a specific creation goal or intended
      application.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:purposes
    alias: purposes
    owner: Dataset
    domain_of:
    - Dataset
    range: Purpose
    multivalued: true
    inlined: true
    inlined_as_list: true
  tasks:
    name: tasks
    description: Tasks the dataset is intended to support. List of Task objects from
      the Motivation module describing specific machine learning, research, or analytical
      tasks.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:tasks
    alias: tasks
    owner: Dataset
    domain_of:
    - Dataset
    range: Task
    multivalued: true
    inlined: true
    inlined_as_list: true
  addressing_gaps:
    name: addressing_gaps
    description: Research or practical gaps this dataset addresses. List of AddressingGap
      objects from the Motivation module, each describing a gap in existing datasets
      or knowledge that this dataset fills.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:addressingGaps
    alias: addressing_gaps
    owner: Dataset
    domain_of:
    - Dataset
    range: AddressingGap
    multivalued: true
    inlined: true
    inlined_as_list: true
  creators:
    name: creators
    description: Individuals or organizations who created the dataset. List of Creator
      objects describing authorship, roles, and affiliations of dataset creators.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: schema:creator
    alias: creators
    owner: Dataset
    domain_of:
    - Dataset
    range: Creator
    multivalued: true
    inlined: true
    inlined_as_list: true
  funders:
    name: funders
    description: Funding mechanisms that supported dataset creation. List of FundingMechanism
      objects describing grants, contracts, or other funding sources including grantors
      and grant identifiers.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: schema:funder
    alias: funders
    owner: Dataset
    domain_of:
    - Dataset
    range: FundingMechanism
    multivalued: true
    inlined: true
    inlined_as_list: true
  subsets:
    name: subsets
    description: Subsets or splits of this dataset. List of DataSubset objects from
      the Composition module, each representing a logical partition such as training,
      validation, or test splits, or demographic subgroups.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:dataSubset
    alias: subsets
    owner: Dataset
    domain_of:
    - Dataset
    range: DataSubset
    multivalued: true
    inlined: true
    inlined_as_list: true
  instances:
    name: instances
    description: Individual data instances or records in the dataset. List of Instance
      objects from the Composition module describing what each data point represents,
      its type, and associated label information.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:instances
    alias: instances
    owner: Dataset
    domain_of:
    - Dataset
    range: Instance
    multivalued: true
    inlined: true
    inlined_as_list: true
  anomalies:
    name: anomalies
    description: Known data quality issues, errors, or irregularities in the dataset.
      List of DataAnomaly objects from the Composition module, each documenting a
      specific anomaly and its potential impact.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:anomalies
    alias: anomalies
    owner: Dataset
    domain_of:
    - Dataset
    range: DataAnomaly
    multivalued: true
    inlined: true
    inlined_as_list: true
  known_biases:
    name: known_biases
    description: List of known biases present in the dataset that may affect fairness,
      representativeness, or model performance. Uses BiasTypeEnum for standardized
      bias categorization mapped to the AI Ontology (AIO).
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:known_biases
    alias: known_biases
    owner: Dataset
    domain_of:
    - Dataset
    range: DatasetBias
    multivalued: true
    inlined: true
    inlined_as_list: true
  known_limitations:
    name: known_limitations
    description: List of known limitations of the dataset that may affect its use
      or interpretation. Distinct from biases (systematic errors) and anomalies (data
      quality issues).
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:known_limitations
    alias: known_limitations
    owner: Dataset
    domain_of:
    - Dataset
    range: DatasetLimitation
    multivalued: true
    inlined: true
    inlined_as_list: true
  confidential_elements:
    name: confidential_elements
    description: Confidential or restricted information within the dataset that requires
      access controls. List of Confidentiality objects describing what is confidential
      and why it cannot be released.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:confidentialElements
    alias: confidential_elements
    owner: Dataset
    domain_of:
    - Dataset
    range: Confidentiality
    multivalued: true
    inlined: true
    inlined_as_list: true
  content_warnings:
    name: content_warnings
    description: Content warnings for potentially harmful, offensive, or disturbing
      material in the dataset. List of ContentWarning objects alerting users to sensitive
      content categories.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:contentWarnings
    alias: content_warnings
    owner: Dataset
    domain_of:
    - Dataset
    range: ContentWarning
    multivalued: true
    inlined: true
    inlined_as_list: true
  subpopulations:
    name: subpopulations
    description: Subpopulations represented within the dataset. List of Subpopulation
      objects from the Composition module describing demographic or other groups,
      their representation, and any imbalances.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:subpopulations
    alias: subpopulations
    owner: Dataset
    domain_of:
    - Dataset
    range: Subpopulation
    multivalued: true
    inlined: true
    inlined_as_list: true
  sensitive_elements:
    name: sensitive_elements
    description: Sensitive data elements requiring special handling or access controls.
      List of SensitiveElement objects identifying sensitive attributes such as personal
      identifiers, protected health information, or legally sensitive content.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:sensitiveElements
    alias: sensitive_elements
    owner: Dataset
    domain_of:
    - Dataset
    range: SensitiveElement
    multivalued: true
    inlined: true
    inlined_as_list: true
  relationships:
    name: relationships
    description: Explicit relationships between individual instances in the dataset.
      List of Relationships objects from the Composition module describing how instances
      relate (e.g., graph edges, ratings, social network links).
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:relationships
    alias: relationships
    owner: Dataset
    domain_of:
    - Dataset
    range: Relationships
    multivalued: true
    inlined: true
    inlined_as_list: true
  splits:
    name: splits
    description: Recommended data splits for this dataset. List of Splits objects
      from the Composition module describing train/validation/test partitions and
      the rationale for each split strategy.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:splits
    alias: splits
    owner: Dataset
    domain_of:
    - Dataset
    range: Splits
    multivalued: true
    inlined: true
    inlined_as_list: true
  acquisition_methods:
    name: acquisition_methods
    description: Methods used to acquire or obtain dataset instances. List of InstanceAcquisition
      objects from the Collection module describing how data was sourced, whether
      directly observed or derived.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:acquisitionMethods
    alias: acquisition_methods
    owner: Dataset
    domain_of:
    - Dataset
    range: InstanceAcquisition
    multivalued: true
    inlined: true
    inlined_as_list: true
  collection_mechanisms:
    name: collection_mechanisms
    description: Mechanisms, instruments, or tools used for data collection. List
      of CollectionMechanism objects from the Collection module describing sensors,
      surveys, APIs, or other collection instruments.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:collectionMechanisms
    alias: collection_mechanisms
    owner: Dataset
    domain_of:
    - Dataset
    range: CollectionMechanism
    multivalued: true
    inlined: true
    inlined_as_list: true
  sampling_strategies:
    name: sampling_strategies
    description: Strategies used to select data instances from a larger population.
      List of SamplingStrategy objects from the Collection module describing sampling
      methodology, inclusion criteria, and limitations.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:samplingStrategies
    alias: sampling_strategies
    owner: Dataset
    domain_of:
    - Dataset
    - Instance
    range: SamplingStrategy
    multivalued: true
    inlined: true
    inlined_as_list: true
  data_collectors:
    name: data_collectors
    description: Individuals or organizations responsible for collecting the data.
      List of DataCollector objects from the Collection module describing who performed
      data collection and their roles.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:dataCollectors
    alias: data_collectors
    owner: Dataset
    domain_of:
    - Dataset
    range: DataCollector
    multivalued: true
    inlined: true
    inlined_as_list: true
  collection_timeframes:
    name: collection_timeframes
    description: Time periods during which data was collected. List of CollectionTimeframe
      objects from the Collection module describing collection start and end dates,
      and any gaps in the collection period.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:collectionTimeframes
    alias: collection_timeframes
    owner: Dataset
    domain_of:
    - Dataset
    range: CollectionTimeframe
    multivalued: true
    inlined: true
    inlined_as_list: true
  direct_collection:
    name: direct_collection
    description: Whether data was collected directly from individuals or via third
      parties. List of DirectCollection objects from the Collection module describing
      direct vs. indirect collection methods and sources.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:directCollection
    alias: direct_collection
    owner: Dataset
    domain_of:
    - Dataset
    range: DirectCollection
    multivalued: true
    inlined: true
    inlined_as_list: true
  collection_notifications:
    name: collection_notifications
    description: Notifications provided to individuals about data collection. List
      of CollectionNotification objects from the Ethics module describing how and
      when individuals were informed about the data collection.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:collectionNotifications
    alias: collection_notifications
    owner: Dataset
    domain_of:
    - Dataset
    range: CollectionNotification
    multivalued: true
    inlined: true
    inlined_as_list: true
  collection_consents:
    name: collection_consents
    description: Consent obtained from individuals for data collection and use. List
      of CollectionConsent objects from the Ethics module describing how consent was
      requested, provided, and documented.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:collectionConsents
    alias: collection_consents
    owner: Dataset
    domain_of:
    - Dataset
    range: CollectionConsent
    multivalued: true
    inlined: true
    inlined_as_list: true
  consent_revocations:
    name: consent_revocations
    description: Mechanisms for individuals to revoke previously given consent. List
      of ConsentRevocation objects from the Ethics module describing how revocation
      works and what happens to data after revocation.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:consentRevocations
    alias: consent_revocations
    owner: Dataset
    domain_of:
    - Dataset
    range: ConsentRevocation
    multivalued: true
    inlined: true
    inlined_as_list: true
  missing_data_documentation:
    name: missing_data_documentation
    description: One or more records documenting missing data patterns and handling
      strategies.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:missingDataDocumentation
    alias: missing_data_documentation
    owner: Dataset
    domain_of:
    - Dataset
    range: MissingDataDocumentation
    multivalued: true
    inlined: true
    inlined_as_list: true
  raw_data_sources:
    name: raw_data_sources
    description: List of raw data sources before preprocessing. Each RawDataSource
      object describes where the original data came from and how it can be accessed.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:rawDataSources
    alias: raw_data_sources
    owner: Dataset
    domain_of:
    - Dataset
    range: RawDataSource
    multivalued: true
    inlined: true
    inlined_as_list: true
  ethical_reviews:
    name: ethical_reviews
    description: Ethical reviews and institutional oversight for the dataset. List
      of EthicalReview objects from the Ethics module describing IRB approvals, ethics
      committee reviews, and compliance certifications.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:ethicalReviews
    alias: ethical_reviews
    owner: Dataset
    domain_of:
    - Dataset
    range: EthicalReview
    multivalued: true
    inlined: true
    inlined_as_list: true
  data_protection_impacts:
    name: data_protection_impacts
    description: Data protection impact assessments (DPIAs) conducted for the dataset.
      List of DataProtectionImpact objects from the Ethics module documenting privacy
      risk assessments and mitigation measures.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:dataProtectionImpacts
    alias: data_protection_impacts
    owner: Dataset
    domain_of:
    - Dataset
    range: DataProtectionImpact
    multivalued: true
    inlined: true
    inlined_as_list: true
  human_subject_research:
    name: human_subject_research
    description: Information about whether dataset involves human subjects research,
      including IRB approval, ethics review, and regulatory compliance.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:humanSubjectResearch
    alias: human_subject_research
    owner: Dataset
    domain_of:
    - Dataset
    range: HumanSubjectResearch
    inlined: true
  informed_consent:
    name: informed_consent
    description: One or more records detailing informed consent procedures, including
      consent type, documentation, and withdrawal mechanisms.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:informedConsent
    alias: informed_consent
    owner: Dataset
    domain_of:
    - Dataset
    range: InformedConsent
    multivalued: true
    inlined: true
    inlined_as_list: true
  at_risk_populations:
    name: at_risk_populations
    description: Information about protections for at-risk populations (e.g., minors,
      pregnant women, prisoners) including special safeguards and assent procedures.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:atRiskPopulations
    alias: at_risk_populations
    owner: Dataset
    domain_of:
    - Dataset
    range: AtRiskPopulations
    inlined: true
  participant_privacy:
    name: participant_privacy
    description: One or more records describing privacy protections and anonymization
      procedures for human research participants.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:participantPrivacy
    alias: participant_privacy
    owner: Dataset
    domain_of:
    - Dataset
    range: ParticipantPrivacy
    multivalued: true
    inlined: true
    inlined_as_list: true
  participant_compensation:
    name: participant_compensation
    description: One or more records describing compensation or incentives provided
      to human research participants.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:participantCompensation
    alias: participant_compensation
    owner: Dataset
    domain_of:
    - Dataset
    range: HumanSubjectCompensation
    multivalued: true
    inlined: true
    inlined_as_list: true
  preprocessing_strategies:
    name: preprocessing_strategies
    description: Preprocessing steps applied to the raw data. List of PreprocessingStrategy
      objects from the Preprocessing module describing normalization, transformation,
      and other preparation steps.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:preprocessingStrategies
    alias: preprocessing_strategies
    owner: Dataset
    domain_of:
    - Dataset
    range: PreprocessingStrategy
    multivalued: true
    inlined: true
    inlined_as_list: true
  cleaning_strategies:
    name: cleaning_strategies
    description: Data cleaning and quality control procedures applied to the dataset.
      List of CleaningStrategy objects from the Preprocessing module describing outlier
      removal, deduplication, and error correction steps.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:cleaningStrategies
    alias: cleaning_strategies
    owner: Dataset
    domain_of:
    - Dataset
    range: CleaningStrategy
    multivalued: true
    inlined: true
    inlined_as_list: true
  labeling_strategies:
    name: labeling_strategies
    description: Labeling or annotation methodologies applied to the data. List of
      LabelingStrategy objects from the Preprocessing module describing annotation
      procedures, annotator qualifications, and quality controls.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:labelingStrategies
    alias: labeling_strategies
    owner: Dataset
    domain_of:
    - Dataset
    range: LabelingStrategy
    multivalued: true
    inlined: true
    inlined_as_list: true
  raw_sources:
    name: raw_sources
    description: Raw, unprocessed source data before any preprocessing was applied.
      List of RawData objects from the Preprocessing module describing original data
      sources and their formats.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:rawSources
    alias: raw_sources
    owner: Dataset
    domain_of:
    - Dataset
    range: RawData
    multivalued: true
    inlined: true
    inlined_as_list: true
  imputation_protocols:
    name: imputation_protocols
    description: Data imputation protocols applied to handle missing values. List
      of ImputationProtocol objects from the Preprocessing module describing the imputation
      technique, affected variables, and rationale.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:imputation_protocols
    alias: imputation_protocols
    owner: Dataset
    domain_of:
    - Dataset
    range: ImputationProtocol
    multivalued: true
    inlined: true
    inlined_as_list: true
  annotation_analyses:
    name: annotation_analyses
    description: One or more analyses of annotation quality and inter-annotator agreement.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:annotation_analyses
    alias: annotation_analyses
    owner: Dataset
    domain_of:
    - Dataset
    range: AnnotationAnalysis
    multivalued: true
    inlined: true
    inlined_as_list: true
  machine_annotation_tools:
    name: machine_annotation_tools
    description: List of automated annotation tools used in dataset creation.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    alias: machine_annotation_tools
    owner: Dataset
    domain_of:
    - Dataset
    range: MachineAnnotationTools
    multivalued: true
    inlined: true
    inlined_as_list: true
  existing_uses:
    name: existing_uses
    description: Known existing uses of the dataset at the time of publication. List
      of ExistingUse objects from the Uses module describing research, commercial,
      or other applications of the dataset.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:existingUses
    alias: existing_uses
    owner: Dataset
    domain_of:
    - Dataset
    range: ExistingUse
    multivalued: true
    inlined: true
    inlined_as_list: true
  use_repository:
    name: use_repository
    description: Repositories or registries tracking how the dataset has been used.
      List of UseRepository objects from the Uses module pointing to papers with code,
      citation indices, or other use-tracking resources.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:useRepository
    alias: use_repository
    owner: Dataset
    domain_of:
    - Dataset
    range: UseRepository
    multivalued: true
    inlined: true
    inlined_as_list: true
  other_tasks:
    name: other_tasks
    description: Additional tasks the dataset may support beyond its original intent.
      List of OtherTask objects from the Uses module describing potential applications
      not originally planned by the dataset creators.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:otherTasks
    alias: other_tasks
    owner: Dataset
    domain_of:
    - Dataset
    range: OtherTask
    multivalued: true
    inlined: true
    inlined_as_list: true
  future_use_impacts:
    name: future_use_impacts
    description: Anticipated impacts of future uses, including risks and benefits.
      List of FutureUseImpact objects from the Uses module describing foreseeable
      consequences of using this dataset in new applications.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:futureUseImpacts
    alias: future_use_impacts
    owner: Dataset
    domain_of:
    - Dataset
    range: FutureUseImpact
    multivalued: true
    inlined: true
    inlined_as_list: true
  discouraged_uses:
    name: discouraged_uses
    description: Uses that are not recommended for this dataset due to limitations,
      risks, or ethical concerns. List of DiscouragedUse objects from the Uses module
      explaining why certain applications should be avoided.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:discouragedUses
    alias: discouraged_uses
    owner: Dataset
    domain_of:
    - Dataset
    range: DiscouragedUse
    multivalued: true
    inlined: true
    inlined_as_list: true
  intended_uses:
    name: intended_uses
    description: List of explicit intended and recommended uses for this dataset.
      Complements future_use_impacts by focusing on positive applications.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:intendedUses
    alias: intended_uses
    owner: Dataset
    domain_of:
    - Dataset
    range: IntendedUse
    multivalued: true
    inlined: true
    inlined_as_list: true
  prohibited_uses:
    name: prohibited_uses
    description: List of explicitly prohibited or forbidden uses for this dataset.
      Stronger than discouraged_uses - these are not permitted.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:prohibitedUses
    alias: prohibited_uses
    owner: Dataset
    domain_of:
    - Dataset
    range: ProhibitedUse
    multivalued: true
    inlined: true
    inlined_as_list: true
  distribution_formats:
    name: distribution_formats
    description: Formats in which the dataset is distributed or made available. List
      of DistributionFormat objects from the Distribution module describing file formats,
      compression, and access methods.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:distributionFormats
    alias: distribution_formats
    owner: Dataset
    domain_of:
    - Dataset
    range: DistributionFormat
    multivalued: true
    inlined: true
    inlined_as_list: true
  distribution_dates:
    name: distribution_dates
    description: Dates when the dataset was or will be distributed or released. List
      of DistributionDate objects from the Distribution module describing initial
      release dates, version release dates, and planned future releases.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:distributionDates
    alias: distribution_dates
    owner: Dataset
    domain_of:
    - Dataset
    range: DistributionDate
    multivalued: true
    inlined: true
    inlined_as_list: true
  third_party_sharing:
    name: third_party_sharing
    description: Third-party distribution policies for the dataset. List of ThirdPartySharing
      objects from the Distribution module describing whether and how the dataset
      is shared with entities outside the creating organization.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:thirdPartySharing
    alias: third_party_sharing
    owner: Dataset
    domain_of:
    - Dataset
    range: ThirdPartySharing
    multivalued: true
    inlined: true
    inlined_as_list: true
  license_and_use_terms:
    name: license_and_use_terms
    description: License and usage terms governing dataset access and use. LicenseAndUseTerms
      object from the Data Governance module describing the applicable license, permitted
      uses, and any restrictions.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: schema:license
    alias: license_and_use_terms
    owner: Dataset
    domain_of:
    - Dataset
    range: LicenseAndUseTerms
    inlined: true
  ip_restrictions:
    name: ip_restrictions
    description: Intellectual property restrictions on dataset use or redistribution.
      IPRestrictions object from the Data Governance module describing copyright,
      trademark, or other IP considerations.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:ipRestrictions
    alias: ip_restrictions
    owner: Dataset
    domain_of:
    - Dataset
    range: IPRestrictions
    inlined: true
  regulatory_restrictions:
    name: regulatory_restrictions
    description: Regulatory and export control restrictions applicable to the dataset.
      ExportControlRegulatoryRestrictions object from the Data Governance module describing
      compliance requirements such as ITAR, EAR, or GDPR.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:regulatoryRestrictions
    alias: regulatory_restrictions
    owner: Dataset
    domain_of:
    - Dataset
    - ExportControlRegulatoryRestrictions
    range: ExportControlRegulatoryRestrictions
    inlined: true
  maintainers:
    name: maintainers
    description: Individuals or organizations responsible for maintaining the dataset.
      List of Maintainer objects from the Maintenance module describing maintenance
      contacts, roles, and support channels.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:maintainers
    alias: maintainers
    owner: Dataset
    domain_of:
    - Dataset
    range: Maintainer
    multivalued: true
    inlined: true
    inlined_as_list: true
  errata:
    name: errata
    description: Known errors or corrections to the dataset since publication. List
      of Erratum objects from the Maintenance module describing discovered errors,
      affected records, and correction procedures.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:errata
    alias: errata
    owner: Dataset
    domain_of:
    - Dataset
    range: Erratum
    multivalued: true
    inlined: true
    inlined_as_list: true
  updates:
    name: updates
    description: Plans for future updates or versioning of the dataset. UpdatePlan
      object from the Maintenance module describing update frequency, versioning policy,
      and planned enhancements.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:updates
    alias: updates
    owner: Dataset
    domain_of:
    - Dataset
    range: UpdatePlan
    inlined: true
  retention_limit:
    name: retention_limit
    description: Data retention policies and limits for the dataset. RetentionLimits
      object from the Maintenance module describing how long the dataset will be available
      and any deletion schedules.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:retentionLimit
    alias: retention_limit
    owner: Dataset
    domain_of:
    - Dataset
    range: RetentionLimits
    inlined: true
  version_access:
    name: version_access
    description: Information about access to different versions of the dataset. VersionAccess
      object from the Maintenance module describing where older versions can be found
      and how version history is maintained.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:versionAccess
    alias: version_access
    owner: Dataset
    domain_of:
    - Dataset
    range: VersionAccess
    inlined: true
  extension_mechanism:
    name: extension_mechanism
    description: Mechanisms for extending or contributing to the dataset. ExtensionMechanism
      object from the Maintenance module describing how others can propose additions,
      corrections, or expansions.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:extensionMechanism
    alias: extension_mechanism
    owner: Dataset
    domain_of:
    - Dataset
    range: ExtensionMechanism
    inlined: true
  variables:
    name: variables
    description: List of metadata records describing individual variables, fields,
      or columns in the dataset.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    exact_mappings:
    - schema:variableMeasured
    rank: 1000
    slot_uri: schema:variableMeasured
    alias: variables
    owner: Dataset
    domain_of:
    - Dataset
    range: VariableMetadata
    multivalued: true
    inlined: true
    inlined_as_list: true
  is_deidentified:
    name: is_deidentified
    description: De-identification status and procedures applied to the dataset. Deidentification
      object describing whether the dataset contains personal data, what de-identification
      methods were applied, and any residual re-identification risks.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:isDeidentified
    alias: is_deidentified
    owner: Dataset
    domain_of:
    - Dataset
    range: Deidentification
    inlined: true
  is_tabular:
    name: is_tabular
    description: Whether the dataset is in tabular format (rows and columns). True
      if the data is structured as a table (e.g., CSV, TSV, relational database);
      false for unstructured formats such as images or free text.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:isTabular
    alias: is_tabular
    owner: Dataset
    domain_of:
    - Dataset
    range: boolean
  citation:
    name: citation
    description: Recommended citation for this dataset in DataCite or BibTeX format.
      Provides a standard way to cite the dataset in publications.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    exact_mappings:
    - schema:citation
    rank: 1000
    slot_uri: schema:citation
    alias: citation
    owner: Dataset
    domain_of:
    - Dataset
    range: string
  parent_datasets:
    name: parent_datasets
    description: One or more parent datasets that this dataset is part of or derived
      from. Enables hierarchical dataset composition (hasPart/isPartOf relationships).
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    exact_mappings:
    - schema:isPartOf
    rank: 1000
    slot_uri: schema:isPartOf
    alias: parent_datasets
    owner: Dataset
    domain_of:
    - Dataset
    range: Dataset
    multivalued: true
    inlined: true
    inlined_as_list: true
  related_datasets:
    name: related_datasets
    description: List of related datasets with typed relationships (e.g., supplements,
      derives from, is version of). Use DatasetRelationship class to specify relationship
      types.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: schema:isRelatedTo
    alias: related_datasets
    owner: Dataset
    domain_of:
    - Dataset
    range: DatasetRelationship
    multivalued: true
    inlined: true
    inlined_as_list: true
  external_resources:
    name: external_resources
    description: External resources referenced at the dataset level (e.g., related
      publications, repositories, documentation). For file-level external resources,
      use FileCollection.external_resources.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: dcterms:references
    alias: external_resources
    owner: Dataset
    domain_of:
    - Dataset
    - ExternalResource
    - FileCollection
    range: ExternalResource
    multivalued: true
    inlined_as_list: true
  resources:
    name: resources
    description: 'Sub-resources or component datasets that are part of this dataset.
      Note: For file collections, use the file_collections attribute instead.'
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: schema:hasPart
    alias: resources
    owner: Dataset
    domain_of:
    - DatasetCollection
    - Dataset
    - FileCollection
    range: Dataset
    multivalued: true
    inlined_as_list: true
  compression:
    name: compression
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: zip
    description: Compression format used, if any (e.g., gzip, bzip2, zip).
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: dcat:compressFormat
    alias: compression
    owner: Dataset
    domain_of:
    - Information
    - File
    - FileCollection
    range: CompressionEnum
  conforms_to:
    name: conforms_to
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: https://www.w3.org/TR/vocab-dcat-3/
    description: An established standard, specification, or schema to which the resource
      conforms.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: dcterms:conformsTo
    alias: conforms_to
    owner: Dataset
    domain_of:
    - Information
    range: string
  conforms_to_class:
    name: conforms_to_class
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: Dataset
    description: The specific class or type within a schema to which the resource
      conforms.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    broad_mappings:
    - dcterms:conformsTo
    rank: 1000
    slot_uri: d4d:conformsToClass
    alias: conforms_to_class
    owner: Dataset
    domain_of:
    - Information
    range: string
  conforms_to_schema:
    name: conforms_to_schema
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: https://w3id.org/bridge2ai/data-sheets-schema
    description: The schema or data model to which the resource conforms.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    broad_mappings:
    - dcterms:conformsTo
    rank: 1000
    slot_uri: d4d:conformsToSchema
    alias: conforms_to_schema
    owner: Dataset
    domain_of:
    - Information
    range: string
  created_by:
    name: created_by
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: orcid:0000-0002-1234-5678
    description: The person or organization primarily responsible for creating the
      resource.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: dcterms:creator
    alias: created_by
    owner: Dataset
    domain_of:
    - Information
    range: string
  created_on:
    name: created_on
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: '2023-07-18T00:00:00'
    description: The date and time when the resource was created.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: dcterms:created
    alias: created_on
    owner: Dataset
    domain_of:
    - Information
    range: datetime
  doi:
    name: doi
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: 10.5281/zenodo.10642459
    description: Digital Object Identifier (DOI) in format 10.xxxx/xxxxx providing
      persistent identification (e.g., '10.1038/s41586-020-2649-2', '10.5281/zenodo.1234567').
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    exact_mappings:
    - schema:identifier
    broad_mappings:
    - dcterms:identifier
    rank: 1000
    slot_uri: d4d:doiIdentifier
    alias: doi
    owner: Dataset
    domain_of:
    - Information
    range: string
    pattern: 10\.\d{4,}\/.+
  download_url:
    name: download_url
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: https://fairhub.io/datasets/2/download
    description: URL from which the data can be downloaded. This is not the same as
      the landing page, which is a page that describes the dataset. Rather, this URL
      points directly to the data itself.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    exact_mappings:
    - schema:url
    rank: 1000
    slot_uri: dcat:downloadURL
    alias: download_url
    owner: Dataset
    domain_of:
    - Information
    range: uri
  issued:
    name: issued
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: '2024-11-15T00:00:00'
    description: Date of formal issuance or publication of the resource.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: dcterms:issued
    alias: issued
    owner: Dataset
    domain_of:
    - Information
    range: datetime
  keywords:
    name: keywords
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: diabetes, retinal imaging, multimodal, clinical data
    description: Keywords or tags describing the resource for discovery and classification.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: dcat:keyword
    alias: keywords
    owner: Dataset
    domain_of:
    - Information
    range: string
    multivalued: true
  language:
    name: language
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: en
    description: Language in which the information is expressed.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    exact_mappings:
    - schema:inLanguage
    rank: 1000
    slot_uri: dcterms:language
    alias: language
    owner: Dataset
    domain_of:
    - Information
    range: string
  last_updated_on:
    name: last_updated_on
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: '2024-11-15T00:00:00'
    description: The date and time when the resource was most recently modified or
      updated.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: dcterms:modified
    alias: last_updated_on
    owner: Dataset
    domain_of:
    - Information
    range: datetime
  license:
    name: license
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: CC-BY-NC-4.0
    description: The legal license under which the resource is made available (e.g.,
      "MIT", "CC-BY-4.0").
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: dcterms:license
    alias: license
    owner: Dataset
    domain_of:
    - Software
    - Information
    range: string
  modified_by:
    name: modified_by
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: orcid:0000-0002-9876-5432
    description: A person or organization that contributed to modifying or updating
      the resource.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: dcterms:contributor
    alias: modified_by
    owner: Dataset
    domain_of:
    - Information
    range: string
  page:
    name: page
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: https://fairhub.io/datasets/2
    description: A landing page or web page providing access to or information about
      the resource.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: dcat:landingPage
    alias: page
    owner: Dataset
    domain_of:
    - Information
    range: string
  publisher:
    name: publisher
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: 'ror:04t3en479  # use a ROR ID, DOI, or URL — not a plain name'
    description: The organization or entity responsible for making the resource available.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: dcterms:publisher
    alias: publisher
    owner: Dataset
    domain_of:
    - Information
    range: uriorcurie
  status:
    name: status
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: published
    description: The status of the resource (e.g., draft, published, deprecated).
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: d4d:publicationStatus
    alias: status
    owner: Dataset
    domain_of:
    - Information
    range: string
  title:
    name: title
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: 'AI-READI: Salutogenesis Study of Type 2 Diabetes'
    description: The official title of the element.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: dcterms:title
    alias: title
    owner: Dataset
    domain_of:
    - Information
    range: string
  version:
    name: version
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: 2.0.0
    description: The version identifier of the resource (e.g., "1.0", "2.3.1").
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: schema:version
    alias: version
    owner: Dataset
    domain_of:
    - Software
    - Information
    range: string
  was_derived_from:
    name: was_derived_from
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: https://fairhub.io/datasets/2/versions/1
    description: A resource from which this resource was derived, in whole or in part.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    exact_mappings:
    - dcterms:source
    rank: 1000
    slot_uri: prov:wasDerivedFrom
    alias: was_derived_from
    owner: Dataset
    domain_of:
    - Information
    range: string
  id:
    name: id
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: https://example.org/dataset/my-dataset-001
    description: A unique identifier for a thing.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base
    rank: 1000
    slot_uri: schema:identifier
    identifier: true
    alias: id
    owner: Dataset
    domain_of:
    - NamedThing
    - DatasetProperty
    range: uriorcurie
    required: true
  name:
    name: name
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: AI-READI Dataset
    description: A human-readable name for a thing.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base
    rank: 1000
    slot_uri: schema:name
    alias: name
    owner: Dataset
    domain_of:
    - NamedThing
    - DatasetProperty
    range: string
  description:
    name: description
    annotations:
      d4d:docExample:
        tag: d4d:docExample
        value: A multimodal dataset of 4,000 participants with Type 2 Diabetes.
    description: A human-readable description for a thing.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base
    rank: 1000
    slot_uri: schema:description
    alias: description
    owner: Dataset
    domain_of:
    - NamedThing
    - DatasetProperty
    - DatasetRelationship
    range: string
class_uri: dcat:Distribution