Class: Dataset
A single component of related observations and/or information that can be read, manipulated, transformed, and otherwise interpreted.
URI: dcat:Distribution
classDiagram
class Dataset
click Dataset href "../Dataset/"
Information <|-- Dataset
click Information href "../Information/"
Dataset <|-- DataSubset
click DataSubset href "../DataSubset/"
Dataset : acquisition_methods
Dataset --> "*" InstanceAcquisition : acquisition_methods
click InstanceAcquisition href "../InstanceAcquisition/"
Dataset : addressing_gaps
Dataset --> "*" AddressingGap : addressing_gaps
click AddressingGap href "../AddressingGap/"
Dataset : annotation_analyses
Dataset --> "*" AnnotationAnalysis : annotation_analyses
click AnnotationAnalysis href "../AnnotationAnalysis/"
Dataset : anomalies
Dataset --> "*" DataAnomaly : anomalies
click DataAnomaly href "../DataAnomaly/"
Dataset : at_risk_populations
Dataset --> "0..1" AtRiskPopulations : at_risk_populations
click AtRiskPopulations href "../AtRiskPopulations/"
Dataset : citation
Dataset : cleaning_strategies
Dataset --> "*" CleaningStrategy : cleaning_strategies
click CleaningStrategy href "../CleaningStrategy/"
Dataset : collection_consents
Dataset --> "*" CollectionConsent : collection_consents
click CollectionConsent href "../CollectionConsent/"
Dataset : collection_mechanisms
Dataset --> "*" CollectionMechanism : collection_mechanisms
click CollectionMechanism href "../CollectionMechanism/"
Dataset : collection_notifications
Dataset --> "*" CollectionNotification : collection_notifications
click CollectionNotification href "../CollectionNotification/"
Dataset : collection_timeframes
Dataset --> "*" CollectionTimeframe : collection_timeframes
click CollectionTimeframe href "../CollectionTimeframe/"
Dataset : compression
Dataset --> "0..1" CompressionEnum : compression
click CompressionEnum href "../CompressionEnum/"
Dataset : confidential_elements
Dataset --> "*" Confidentiality : confidential_elements
click Confidentiality href "../Confidentiality/"
Dataset : conforms_to
Dataset : conforms_to_class
Dataset : conforms_to_schema
Dataset : consent_revocations
Dataset --> "*" ConsentRevocation : consent_revocations
click ConsentRevocation href "../ConsentRevocation/"
Dataset : content_warnings
Dataset --> "*" ContentWarning : content_warnings
click ContentWarning href "../ContentWarning/"
Dataset : created_by
Dataset : created_on
Dataset : creators
Dataset --> "*" Creator : creators
click Creator href "../Creator/"
Dataset : data_collectors
Dataset --> "*" DataCollector : data_collectors
click DataCollector href "../DataCollector/"
Dataset : data_protection_impacts
Dataset --> "*" DataProtectionImpact : data_protection_impacts
click DataProtectionImpact href "../DataProtectionImpact/"
Dataset : description
Dataset : direct_collection
Dataset --> "*" DirectCollection : direct_collection
click DirectCollection href "../DirectCollection/"
Dataset : discouraged_uses
Dataset --> "*" DiscouragedUse : discouraged_uses
click DiscouragedUse href "../DiscouragedUse/"
Dataset : distribution_dates
Dataset --> "*" DistributionDate : distribution_dates
click DistributionDate href "../DistributionDate/"
Dataset : distribution_formats
Dataset --> "*" DistributionFormat : distribution_formats
click DistributionFormat href "../DistributionFormat/"
Dataset : doi
Dataset : download_url
Dataset : errata
Dataset --> "*" Erratum : errata
click Erratum href "../Erratum/"
Dataset : ethical_reviews
Dataset --> "*" EthicalReview : ethical_reviews
click EthicalReview href "../EthicalReview/"
Dataset : existing_uses
Dataset --> "*" ExistingUse : existing_uses
click ExistingUse href "../ExistingUse/"
Dataset : extension_mechanism
Dataset --> "0..1" ExtensionMechanism : extension_mechanism
click ExtensionMechanism href "../ExtensionMechanism/"
Dataset : external_resources
Dataset --> "*" ExternalResource : external_resources
click ExternalResource href "../ExternalResource/"
Dataset : file_collections
Dataset --> "*" FileCollection : file_collections
click FileCollection href "../FileCollection/"
Dataset : funders
Dataset --> "*" FundingMechanism : funders
click FundingMechanism href "../FundingMechanism/"
Dataset : future_use_impacts
Dataset --> "*" FutureUseImpact : future_use_impacts
click FutureUseImpact href "../FutureUseImpact/"
Dataset : human_subject_research
Dataset --> "0..1" HumanSubjectResearch : human_subject_research
click HumanSubjectResearch href "../HumanSubjectResearch/"
Dataset : id
Dataset : imputation_protocols
Dataset --> "*" ImputationProtocol : imputation_protocols
click ImputationProtocol href "../ImputationProtocol/"
Dataset : informed_consent
Dataset --> "*" InformedConsent : informed_consent
click InformedConsent href "../InformedConsent/"
Dataset : instances
Dataset --> "*" Instance : instances
click Instance href "../Instance/"
Dataset : intended_uses
Dataset --> "*" IntendedUse : intended_uses
click IntendedUse href "../IntendedUse/"
Dataset : ip_restrictions
Dataset --> "0..1" IPRestrictions : ip_restrictions
click IPRestrictions href "../IPRestrictions/"
Dataset : is_deidentified
Dataset --> "0..1" Deidentification : is_deidentified
click Deidentification href "../Deidentification/"
Dataset : is_tabular
Dataset : issued
Dataset : keywords
Dataset : known_biases
Dataset --> "*" DatasetBias : known_biases
click DatasetBias href "../DatasetBias/"
Dataset : known_limitations
Dataset --> "*" DatasetLimitation : known_limitations
click DatasetLimitation href "../DatasetLimitation/"
Dataset : labeling_strategies
Dataset --> "*" LabelingStrategy : labeling_strategies
click LabelingStrategy href "../LabelingStrategy/"
Dataset : language
Dataset : last_updated_on
Dataset : license
Dataset : license_and_use_terms
Dataset --> "0..1" LicenseAndUseTerms : license_and_use_terms
click LicenseAndUseTerms href "../LicenseAndUseTerms/"
Dataset : machine_annotation_tools
Dataset --> "*" MachineAnnotationTools : machine_annotation_tools
click MachineAnnotationTools href "../MachineAnnotationTools/"
Dataset : maintainers
Dataset --> "*" Maintainer : maintainers
click Maintainer href "../Maintainer/"
Dataset : missing_data_documentation
Dataset --> "*" MissingDataDocumentation : missing_data_documentation
click MissingDataDocumentation href "../MissingDataDocumentation/"
Dataset : modified_by
Dataset : name
Dataset : other_tasks
Dataset --> "*" OtherTask : other_tasks
click OtherTask href "../OtherTask/"
Dataset : page
Dataset : parent_datasets
Dataset --> "*" Dataset : parent_datasets
click Dataset href "../Dataset/"
Dataset : participant_compensation
Dataset --> "*" HumanSubjectCompensation : participant_compensation
click HumanSubjectCompensation href "../HumanSubjectCompensation/"
Dataset : participant_privacy
Dataset --> "*" ParticipantPrivacy : participant_privacy
click ParticipantPrivacy href "../ParticipantPrivacy/"
Dataset : preprocessing_strategies
Dataset --> "*" PreprocessingStrategy : preprocessing_strategies
click PreprocessingStrategy href "../PreprocessingStrategy/"
Dataset : prohibited_uses
Dataset --> "*" ProhibitedUse : prohibited_uses
click ProhibitedUse href "../ProhibitedUse/"
Dataset : publisher
Dataset : purposes
Dataset --> "*" Purpose : purposes
click Purpose href "../Purpose/"
Dataset : raw_data_sources
Dataset --> "*" RawDataSource : raw_data_sources
click RawDataSource href "../RawDataSource/"
Dataset : raw_sources
Dataset --> "*" RawData : raw_sources
click RawData href "../RawData/"
Dataset : regulatory_restrictions
Dataset --> "0..1" ExportControlRegulatoryRestrictions : regulatory_restrictions
click ExportControlRegulatoryRestrictions href "../ExportControlRegulatoryRestrictions/"
Dataset : related_datasets
Dataset --> "*" DatasetRelationship : related_datasets
click DatasetRelationship href "../DatasetRelationship/"
Dataset : relationships
Dataset --> "*" Relationships : relationships
click Relationships href "../Relationships/"
Dataset : resources
Dataset --> "*" Dataset : resources
click Dataset href "../Dataset/"
Dataset : retention_limit
Dataset --> "0..1" RetentionLimits : retention_limit
click RetentionLimits href "../RetentionLimits/"
Dataset : sampling_strategies
Dataset --> "*" SamplingStrategy : sampling_strategies
click SamplingStrategy href "../SamplingStrategy/"
Dataset : sensitive_elements
Dataset --> "*" SensitiveElement : sensitive_elements
click SensitiveElement href "../SensitiveElement/"
Dataset : splits
Dataset --> "*" Splits : splits
click Splits href "../Splits/"
Dataset : status
Dataset : subpopulations
Dataset --> "*" Subpopulation : subpopulations
click Subpopulation href "../Subpopulation/"
Dataset : subsets
Dataset --> "*" DataSubset : subsets
click DataSubset href "../DataSubset/"
Dataset : tasks
Dataset --> "*" Task : tasks
click Task href "../Task/"
Dataset : third_party_sharing
Dataset --> "*" ThirdPartySharing : third_party_sharing
click ThirdPartySharing href "../ThirdPartySharing/"
Dataset : title
Dataset : total_file_count
Dataset : total_size_bytes
Dataset : updates
Dataset --> "0..1" UpdatePlan : updates
click UpdatePlan href "../UpdatePlan/"
Dataset : use_repository
Dataset --> "*" UseRepository : use_repository
click UseRepository href "../UseRepository/"
Dataset : variables
Dataset --> "*" VariableMetadata : variables
click VariableMetadata href "../VariableMetadata/"
Dataset : version
Dataset : version_access
Dataset --> "0..1" VersionAccess : version_access
click VersionAccess href "../VersionAccess/"
Dataset : was_derived_from
Inheritance
- NamedThing
- Information
- Dataset
- Information
Slots
| Name | Cardinality and Range | Description | Inheritance |
|---|---|---|---|
| external_resources | * ExternalResource |
External resources referenced at the dataset level (e | direct |
| resources | * Dataset |
Sub-resources or component datasets that are part of this dataset | direct |
| file_collections | * FileCollection |
Collection of file groups within this dataset | direct |
| total_file_count | 0..1 Integer |
Total number of files across all file collections in this dataset | direct |
| total_size_bytes | 0..1 Integer |
Total size of all files in bytes across all file collections | direct |
| purposes | * Purpose |
Purposes for which the dataset was created | direct |
| tasks | * Task |
Tasks the dataset is intended to support | direct |
| addressing_gaps | * AddressingGap |
Research or practical gaps this dataset addresses | direct |
| creators | * Creator |
Individuals or organizations who created the dataset | direct |
| funders | * FundingMechanism |
Funding mechanisms that supported dataset creation | direct |
| subsets | * DataSubset |
Subsets or splits of this dataset | direct |
| instances | * Instance |
Individual data instances or records in the dataset | direct |
| anomalies | * DataAnomaly |
Known data quality issues, errors, or irregularities in the dataset | direct |
| known_biases | * DatasetBias |
List of known biases present in the dataset that may affect fairness, represe... | direct |
| known_limitations | * DatasetLimitation |
List of known limitations of the dataset that may affect its use or interpret... | direct |
| confidential_elements | * Confidentiality |
Confidential or restricted information within the dataset that requires acces... | direct |
| content_warnings | * ContentWarning |
Content warnings for potentially harmful, offensive, or disturbing material i... | direct |
| subpopulations | * Subpopulation |
Subpopulations represented within the dataset | direct |
| sensitive_elements | * SensitiveElement |
Sensitive data elements requiring special handling or access controls | direct |
| relationships | * Relationships |
Explicit relationships between individual instances in the dataset | direct |
| splits | * Splits |
Recommended data splits for this dataset | direct |
| acquisition_methods | * InstanceAcquisition |
Methods used to acquire or obtain dataset instances | direct |
| collection_mechanisms | * CollectionMechanism |
Mechanisms, instruments, or tools used for data collection | direct |
| sampling_strategies | * SamplingStrategy |
Strategies used to select data instances from a larger population | direct |
| data_collectors | * DataCollector |
Individuals or organizations responsible for collecting the data | direct |
| collection_timeframes | * CollectionTimeframe |
Time periods during which data was collected | direct |
| direct_collection | * DirectCollection |
Whether data was collected directly from individuals or via third parties | direct |
| collection_notifications | * CollectionNotification |
Notifications provided to individuals about data collection | direct |
| collection_consents | * CollectionConsent |
Consent obtained from individuals for data collection and use | direct |
| consent_revocations | * ConsentRevocation |
Mechanisms for individuals to revoke previously given consent | direct |
| missing_data_documentation | * MissingDataDocumentation |
One or more records documenting missing data patterns and handling strategies | direct |
| raw_data_sources | * RawDataSource |
List of raw data sources before preprocessing | direct |
| ethical_reviews | * EthicalReview |
Ethical reviews and institutional oversight for the dataset | direct |
| data_protection_impacts | * DataProtectionImpact |
Data protection impact assessments (DPIAs) conducted for the dataset | direct |
| human_subject_research | 0..1 HumanSubjectResearch |
Information about whether dataset involves human subjects research, including... | direct |
| informed_consent | * InformedConsent |
One or more records detailing informed consent procedures, including consent ... | direct |
| at_risk_populations | 0..1 AtRiskPopulations |
Information about protections for at-risk populations (e | direct |
| participant_privacy | * ParticipantPrivacy |
One or more records describing privacy protections and anonymization procedur... | direct |
| participant_compensation | * HumanSubjectCompensation |
One or more records describing compensation or incentives provided to human r... | direct |
| preprocessing_strategies | * PreprocessingStrategy |
Preprocessing steps applied to the raw data | direct |
| cleaning_strategies | * CleaningStrategy |
Data cleaning and quality control procedures applied to the dataset | direct |
| labeling_strategies | * LabelingStrategy |
Labeling or annotation methodologies applied to the data | direct |
| raw_sources | * RawData |
Raw, unprocessed source data before any preprocessing was applied | direct |
| imputation_protocols | * ImputationProtocol |
Data imputation protocols applied to handle missing values | direct |
| annotation_analyses | * AnnotationAnalysis |
One or more analyses of annotation quality and inter-annotator agreement | direct |
| machine_annotation_tools | * MachineAnnotationTools |
List of automated annotation tools used in dataset creation | direct |
| existing_uses | * ExistingUse |
Known existing uses of the dataset at the time of publication | direct |
| use_repository | * UseRepository |
Repositories or registries tracking how the dataset has been used | direct |
| other_tasks | * OtherTask |
Additional tasks the dataset may support beyond its original intent | direct |
| future_use_impacts | * FutureUseImpact |
Anticipated impacts of future uses, including risks and benefits | direct |
| discouraged_uses | * DiscouragedUse |
Uses that are not recommended for this dataset due to limitations, risks, or ... | direct |
| intended_uses | * IntendedUse |
List of explicit intended and recommended uses for this dataset | direct |
| prohibited_uses | * ProhibitedUse |
List of explicitly prohibited or forbidden uses for this dataset | direct |
| distribution_formats | * DistributionFormat |
Formats in which the dataset is distributed or made available | direct |
| distribution_dates | * DistributionDate |
Dates when the dataset was or will be distributed or released | direct |
| third_party_sharing | * ThirdPartySharing |
Third-party distribution policies for the dataset | direct |
| license_and_use_terms | 0..1 LicenseAndUseTerms |
License and usage terms governing dataset access and use | direct |
| ip_restrictions | 0..1 IPRestrictions |
Intellectual property restrictions on dataset use or redistribution | direct |
| regulatory_restrictions | 0..1 ExportControlRegulatoryRestrictions |
Regulatory and export control restrictions applicable to the dataset | direct |
| maintainers | * Maintainer |
Individuals or organizations responsible for maintaining the dataset | direct |
| errata | * Erratum |
Known errors or corrections to the dataset since publication | direct |
| updates | 0..1 UpdatePlan |
Plans for future updates or versioning of the dataset | direct |
| retention_limit | 0..1 RetentionLimits |
Data retention policies and limits for the dataset | direct |
| version_access | 0..1 VersionAccess |
Information about access to different versions of the dataset | direct |
| extension_mechanism | 0..1 ExtensionMechanism |
Mechanisms for extending or contributing to the dataset | direct |
| variables | * VariableMetadata |
List of metadata records describing individual variables, fields, or columns ... | direct |
| is_deidentified | 0..1 Deidentification |
De-identification status and procedures applied to the dataset | direct |
| is_tabular | 0..1 Boolean |
Whether the dataset is in tabular format (rows and columns) | direct |
| citation | 0..1 String |
Recommended citation for this dataset in DataCite or BibTeX format | direct |
| parent_datasets | * Dataset |
One or more parent datasets that this dataset is part of or derived from | direct |
| related_datasets | * DatasetRelationship |
List of related datasets with typed relationships (e | direct |
| compression | 0..1 CompressionEnum |
Compression format used, if any (e | Information |
| conforms_to | 0..1 String |
An established standard, specification, or schema to which the resource confo... | Information |
| conforms_to_class | 0..1 String |
The specific class or type within a schema to which the resource conforms | Information |
| conforms_to_schema | 0..1 String |
The schema or data model to which the resource conforms | Information |
| created_by | 0..1 String |
The person or organization primarily responsible for creating the resource | Information |
| created_on | 0..1 Datetime |
The date and time when the resource was created | Information |
| doi | 0..1 String |
Digital Object Identifier (DOI) in format 10 | Information |
| download_url | 0..1 Uri |
URL from which the data can be downloaded | Information |
| issued | 0..1 Datetime |
Date of formal issuance or publication of the resource | Information |
| keywords | * String |
Keywords or tags describing the resource for discovery and classification | Information |
| language | 0..1 String |
Language in which the information is expressed | Information |
| last_updated_on | 0..1 Datetime |
The date and time when the resource was most recently modified or updated | Information |
| license | 0..1 String |
The legal license under which the resource is made available (e | Information |
| modified_by | 0..1 String |
A person or organization that contributed to modifying or updating the resour... | Information |
| page | 0..1 String |
A landing page or web page providing access to or information about the resou... | Information |
| publisher | 0..1 Uriorcurie |
The organization or entity responsible for making the resource available | Information |
| status | 0..1 String |
The status of the resource (e | Information |
| title | 0..1 String |
The official title of the element | Information |
| version | 0..1 String |
The version identifier of the resource (e | Information |
| was_derived_from | 0..1 String |
A resource from which this resource was derived, in whole or in part | Information |
| id | 1 Uriorcurie |
A unique identifier for a thing | NamedThing |
| name | 0..1 String |
A human-readable name for a thing | NamedThing |
| description | 0..1 String |
A human-readable description for a thing | NamedThing |
Usages
| used by | used in | type | used |
|---|---|---|---|
| DatasetCollection | resources | range | Dataset |
| Dataset | resources | range | Dataset |
| Dataset | parent_datasets | range | Dataset |
| DataSubset | resources | range | Dataset |
| DataSubset | parent_datasets | range | Dataset |
| FileCollection | resources | range | Dataset |
Aliases
- data resource
- data file
- data package
Identifier and Mapping Information
Schema Source
- from schema: https://w3id.org/bridge2ai/data-sheets-schema
Mappings
| Mapping Type | Mapped Value |
|---|---|
| self | dcat:Distribution |
| native | data_sheets_schema:Dataset |
| exact | schema:DataDownload |
LinkML Source
Direct
name: Dataset
description: A single component of related observations and/or information that can
be read, manipulated, transformed, and otherwise interpreted.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
aliases:
- data resource
- data file
- data package
exact_mappings:
- schema:DataDownload
is_a: Information
slots:
- external_resources
- resources
slot_usage:
external_resources:
name: external_resources
description: External resources referenced at the dataset level (e.g., related
publications, repositories, documentation). For file-level external resources,
use FileCollection.external_resources.
range: ExternalResource
multivalued: true
inlined_as_list: true
resources:
name: resources
description: 'Sub-resources or component datasets that are part of this dataset.
Note: For file collections, use the file_collections attribute instead.'
range: Dataset
multivalued: true
inlined_as_list: true
attributes:
file_collections:
name: file_collections
description: Collection of file groups within this dataset. Each entry represents
a logical grouping of files with shared characteristics (e.g., all training
data, all image files, all raw data files). Maps to nested RO-Crate Dataset
entities via schema:hasPart in RO-Crate converters.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- dcat:distribution
rank: 1000
slot_uri: d4d:fileCollections
domain_of:
- Dataset
range: FileCollection
multivalued: true
inlined: true
inlined_as_list: true
total_file_count:
name: total_file_count
annotations:
d4d:docExample:
tag: d4d:docExample
value: '156'
description: Total number of files across all file collections in this dataset.
Can be aggregated from file_collections[].file_count.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:totalFileCount
domain_of:
- Dataset
range: integer
total_size_bytes:
name: total_size_bytes
annotations:
d4d:docExample:
tag: d4d:docExample
value: 10737418240 (10 GiB = 10 × 1024³ bytes)
description: Total size of all files in bytes across all file collections. Can
be aggregated from file_collections[].total_bytes.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcat:byteSize
domain_of:
- Dataset
range: integer
purposes:
name: purposes
description: Purposes for which the dataset was created. List of Purpose objects
from the Motivation module, each describing a specific creation goal or intended
application.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:purposes
domain_of:
- Dataset
range: Purpose
multivalued: true
inlined: true
inlined_as_list: true
tasks:
name: tasks
description: Tasks the dataset is intended to support. List of Task objects from
the Motivation module describing specific machine learning, research, or analytical
tasks.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:tasks
domain_of:
- Dataset
range: Task
multivalued: true
inlined: true
inlined_as_list: true
addressing_gaps:
name: addressing_gaps
description: Research or practical gaps this dataset addresses. List of AddressingGap
objects from the Motivation module, each describing a gap in existing datasets
or knowledge that this dataset fills.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:addressingGaps
domain_of:
- Dataset
range: AddressingGap
multivalued: true
inlined: true
inlined_as_list: true
creators:
name: creators
description: Individuals or organizations who created the dataset. List of Creator
objects describing authorship, roles, and affiliations of dataset creators.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: schema:creator
domain_of:
- Dataset
range: Creator
multivalued: true
inlined: true
inlined_as_list: true
funders:
name: funders
description: Funding mechanisms that supported dataset creation. List of FundingMechanism
objects describing grants, contracts, or other funding sources including grantors
and grant identifiers.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: schema:funder
domain_of:
- Dataset
range: FundingMechanism
multivalued: true
inlined: true
inlined_as_list: true
subsets:
name: subsets
description: Subsets or splits of this dataset. List of DataSubset objects from
the Composition module, each representing a logical partition such as training,
validation, or test splits, or demographic subgroups.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:dataSubset
domain_of:
- Dataset
range: DataSubset
multivalued: true
inlined: true
inlined_as_list: true
instances:
name: instances
description: Individual data instances or records in the dataset. List of Instance
objects from the Composition module describing what each data point represents,
its type, and associated label information.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:instances
domain_of:
- Dataset
range: Instance
multivalued: true
inlined: true
inlined_as_list: true
anomalies:
name: anomalies
description: Known data quality issues, errors, or irregularities in the dataset.
List of DataAnomaly objects from the Composition module, each documenting a
specific anomaly and its potential impact.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:anomalies
domain_of:
- Dataset
range: DataAnomaly
multivalued: true
inlined: true
inlined_as_list: true
known_biases:
name: known_biases
description: List of known biases present in the dataset that may affect fairness,
representativeness, or model performance. Uses BiasTypeEnum for standardized
bias categorization mapped to the AI Ontology (AIO).
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:known_biases
domain_of:
- Dataset
range: DatasetBias
multivalued: true
inlined: true
inlined_as_list: true
known_limitations:
name: known_limitations
description: List of known limitations of the dataset that may affect its use
or interpretation. Distinct from biases (systematic errors) and anomalies (data
quality issues).
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:known_limitations
domain_of:
- Dataset
range: DatasetLimitation
multivalued: true
inlined: true
inlined_as_list: true
confidential_elements:
name: confidential_elements
description: Confidential or restricted information within the dataset that requires
access controls. List of Confidentiality objects describing what is confidential
and why it cannot be released.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:confidentialElements
domain_of:
- Dataset
range: Confidentiality
multivalued: true
inlined: true
inlined_as_list: true
content_warnings:
name: content_warnings
description: Content warnings for potentially harmful, offensive, or disturbing
material in the dataset. List of ContentWarning objects alerting users to sensitive
content categories.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:contentWarnings
domain_of:
- Dataset
range: ContentWarning
multivalued: true
inlined: true
inlined_as_list: true
subpopulations:
name: subpopulations
description: Subpopulations represented within the dataset. List of Subpopulation
objects from the Composition module describing demographic or other groups,
their representation, and any imbalances.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:subpopulations
domain_of:
- Dataset
range: Subpopulation
multivalued: true
inlined: true
inlined_as_list: true
sensitive_elements:
name: sensitive_elements
description: Sensitive data elements requiring special handling or access controls.
List of SensitiveElement objects identifying sensitive attributes such as personal
identifiers, protected health information, or legally sensitive content.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:sensitiveElements
domain_of:
- Dataset
range: SensitiveElement
multivalued: true
inlined: true
inlined_as_list: true
relationships:
name: relationships
description: Explicit relationships between individual instances in the dataset.
List of Relationships objects from the Composition module describing how instances
relate (e.g., graph edges, ratings, social network links).
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:relationships
domain_of:
- Dataset
range: Relationships
multivalued: true
inlined: true
inlined_as_list: true
splits:
name: splits
description: Recommended data splits for this dataset. List of Splits objects
from the Composition module describing train/validation/test partitions and
the rationale for each split strategy.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:splits
domain_of:
- Dataset
range: Splits
multivalued: true
inlined: true
inlined_as_list: true
acquisition_methods:
name: acquisition_methods
description: Methods used to acquire or obtain dataset instances. List of InstanceAcquisition
objects from the Collection module describing how data was sourced, whether
directly observed or derived.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:acquisitionMethods
domain_of:
- Dataset
range: InstanceAcquisition
multivalued: true
inlined: true
inlined_as_list: true
collection_mechanisms:
name: collection_mechanisms
description: Mechanisms, instruments, or tools used for data collection. List
of CollectionMechanism objects from the Collection module describing sensors,
surveys, APIs, or other collection instruments.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:collectionMechanisms
domain_of:
- Dataset
range: CollectionMechanism
multivalued: true
inlined: true
inlined_as_list: true
sampling_strategies:
name: sampling_strategies
description: Strategies used to select data instances from a larger population.
List of SamplingStrategy objects from the Collection module describing sampling
methodology, inclusion criteria, and limitations.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:samplingStrategies
domain_of:
- Dataset
- Instance
range: SamplingStrategy
multivalued: true
inlined: true
inlined_as_list: true
data_collectors:
name: data_collectors
description: Individuals or organizations responsible for collecting the data.
List of DataCollector objects from the Collection module describing who performed
data collection and their roles.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:dataCollectors
domain_of:
- Dataset
range: DataCollector
multivalued: true
inlined: true
inlined_as_list: true
collection_timeframes:
name: collection_timeframes
description: Time periods during which data was collected. List of CollectionTimeframe
objects from the Collection module describing collection start and end dates,
and any gaps in the collection period.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:collectionTimeframes
domain_of:
- Dataset
range: CollectionTimeframe
multivalued: true
inlined: true
inlined_as_list: true
direct_collection:
name: direct_collection
description: Whether data was collected directly from individuals or via third
parties. List of DirectCollection objects from the Collection module describing
direct vs. indirect collection methods and sources.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:directCollection
domain_of:
- Dataset
range: DirectCollection
multivalued: true
inlined: true
inlined_as_list: true
collection_notifications:
name: collection_notifications
description: Notifications provided to individuals about data collection. List
of CollectionNotification objects from the Ethics module describing how and
when individuals were informed about the data collection.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:collectionNotifications
domain_of:
- Dataset
range: CollectionNotification
multivalued: true
inlined: true
inlined_as_list: true
collection_consents:
name: collection_consents
description: Consent obtained from individuals for data collection and use. List
of CollectionConsent objects from the Ethics module describing how consent was
requested, provided, and documented.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:collectionConsents
domain_of:
- Dataset
range: CollectionConsent
multivalued: true
inlined: true
inlined_as_list: true
consent_revocations:
name: consent_revocations
description: Mechanisms for individuals to revoke previously given consent. List
of ConsentRevocation objects from the Ethics module describing how revocation
works and what happens to data after revocation.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:consentRevocations
domain_of:
- Dataset
range: ConsentRevocation
multivalued: true
inlined: true
inlined_as_list: true
missing_data_documentation:
name: missing_data_documentation
description: One or more records documenting missing data patterns and handling
strategies.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:missingDataDocumentation
domain_of:
- Dataset
range: MissingDataDocumentation
multivalued: true
inlined: true
inlined_as_list: true
raw_data_sources:
name: raw_data_sources
description: List of raw data sources before preprocessing. Each RawDataSource
object describes where the original data came from and how it can be accessed.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:rawDataSources
domain_of:
- Dataset
range: RawDataSource
multivalued: true
inlined: true
inlined_as_list: true
ethical_reviews:
name: ethical_reviews
description: Ethical reviews and institutional oversight for the dataset. List
of EthicalReview objects from the Ethics module describing IRB approvals, ethics
committee reviews, and compliance certifications.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:ethicalReviews
domain_of:
- Dataset
range: EthicalReview
multivalued: true
inlined: true
inlined_as_list: true
data_protection_impacts:
name: data_protection_impacts
description: Data protection impact assessments (DPIAs) conducted for the dataset.
List of DataProtectionImpact objects from the Ethics module documenting privacy
risk assessments and mitigation measures.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:dataProtectionImpacts
domain_of:
- Dataset
range: DataProtectionImpact
multivalued: true
inlined: true
inlined_as_list: true
human_subject_research:
name: human_subject_research
description: Information about whether dataset involves human subjects research,
including IRB approval, ethics review, and regulatory compliance.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:humanSubjectResearch
domain_of:
- Dataset
range: HumanSubjectResearch
inlined: true
informed_consent:
name: informed_consent
description: One or more records detailing informed consent procedures, including
consent type, documentation, and withdrawal mechanisms.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:informedConsent
domain_of:
- Dataset
range: InformedConsent
multivalued: true
inlined: true
inlined_as_list: true
at_risk_populations:
name: at_risk_populations
description: Information about protections for at-risk populations (e.g., minors,
pregnant women, prisoners) including special safeguards and assent procedures.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:atRiskPopulations
domain_of:
- Dataset
range: AtRiskPopulations
inlined: true
participant_privacy:
name: participant_privacy
description: One or more records describing privacy protections and anonymization
procedures for human research participants.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:participantPrivacy
domain_of:
- Dataset
range: ParticipantPrivacy
multivalued: true
inlined: true
inlined_as_list: true
participant_compensation:
name: participant_compensation
description: One or more records describing compensation or incentives provided
to human research participants.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:participantCompensation
domain_of:
- Dataset
range: HumanSubjectCompensation
multivalued: true
inlined: true
inlined_as_list: true
preprocessing_strategies:
name: preprocessing_strategies
description: Preprocessing steps applied to the raw data. List of PreprocessingStrategy
objects from the Preprocessing module describing normalization, transformation,
and other preparation steps.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:preprocessingStrategies
domain_of:
- Dataset
range: PreprocessingStrategy
multivalued: true
inlined: true
inlined_as_list: true
cleaning_strategies:
name: cleaning_strategies
description: Data cleaning and quality control procedures applied to the dataset.
List of CleaningStrategy objects from the Preprocessing module describing outlier
removal, deduplication, and error correction steps.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:cleaningStrategies
domain_of:
- Dataset
range: CleaningStrategy
multivalued: true
inlined: true
inlined_as_list: true
labeling_strategies:
name: labeling_strategies
description: Labeling or annotation methodologies applied to the data. List of
LabelingStrategy objects from the Preprocessing module describing annotation
procedures, annotator qualifications, and quality controls.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:labelingStrategies
domain_of:
- Dataset
range: LabelingStrategy
multivalued: true
inlined: true
inlined_as_list: true
raw_sources:
name: raw_sources
description: Raw, unprocessed source data before any preprocessing was applied.
List of RawData objects from the Preprocessing module describing original data
sources and their formats.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:rawSources
domain_of:
- Dataset
range: RawData
multivalued: true
inlined: true
inlined_as_list: true
imputation_protocols:
name: imputation_protocols
description: Data imputation protocols applied to handle missing values. List
of ImputationProtocol objects from the Preprocessing module describing the imputation
technique, affected variables, and rationale.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:imputation_protocols
domain_of:
- Dataset
range: ImputationProtocol
multivalued: true
inlined: true
inlined_as_list: true
annotation_analyses:
name: annotation_analyses
description: One or more analyses of annotation quality and inter-annotator agreement.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:annotation_analyses
domain_of:
- Dataset
range: AnnotationAnalysis
multivalued: true
inlined: true
inlined_as_list: true
machine_annotation_tools:
name: machine_annotation_tools
description: List of automated annotation tools used in dataset creation.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: MachineAnnotationTools
multivalued: true
inlined: true
inlined_as_list: true
existing_uses:
name: existing_uses
description: Known existing uses of the dataset at the time of publication. List
of ExistingUse objects from the Uses module describing research, commercial,
or other applications of the dataset.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:existingUses
domain_of:
- Dataset
range: ExistingUse
multivalued: true
inlined: true
inlined_as_list: true
use_repository:
name: use_repository
description: Repositories or registries tracking how the dataset has been used.
List of UseRepository objects from the Uses module pointing to papers with code,
citation indices, or other use-tracking resources.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:useRepository
domain_of:
- Dataset
range: UseRepository
multivalued: true
inlined: true
inlined_as_list: true
other_tasks:
name: other_tasks
description: Additional tasks the dataset may support beyond its original intent.
List of OtherTask objects from the Uses module describing potential applications
not originally planned by the dataset creators.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:otherTasks
domain_of:
- Dataset
range: OtherTask
multivalued: true
inlined: true
inlined_as_list: true
future_use_impacts:
name: future_use_impacts
description: Anticipated impacts of future uses, including risks and benefits.
List of FutureUseImpact objects from the Uses module describing foreseeable
consequences of using this dataset in new applications.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:futureUseImpacts
domain_of:
- Dataset
range: FutureUseImpact
multivalued: true
inlined: true
inlined_as_list: true
discouraged_uses:
name: discouraged_uses
description: Uses that are not recommended for this dataset due to limitations,
risks, or ethical concerns. List of DiscouragedUse objects from the Uses module
explaining why certain applications should be avoided.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:discouragedUses
domain_of:
- Dataset
range: DiscouragedUse
multivalued: true
inlined: true
inlined_as_list: true
intended_uses:
name: intended_uses
description: List of explicit intended and recommended uses for this dataset.
Complements future_use_impacts by focusing on positive applications.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:intendedUses
domain_of:
- Dataset
range: IntendedUse
multivalued: true
inlined: true
inlined_as_list: true
prohibited_uses:
name: prohibited_uses
description: List of explicitly prohibited or forbidden uses for this dataset.
Stronger than discouraged_uses - these are not permitted.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:prohibitedUses
domain_of:
- Dataset
range: ProhibitedUse
multivalued: true
inlined: true
inlined_as_list: true
distribution_formats:
name: distribution_formats
description: Formats in which the dataset is distributed or made available. List
of DistributionFormat objects from the Distribution module describing file formats,
compression, and access methods.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:distributionFormats
domain_of:
- Dataset
range: DistributionFormat
multivalued: true
inlined: true
inlined_as_list: true
distribution_dates:
name: distribution_dates
description: Dates when the dataset was or will be distributed or released. List
of DistributionDate objects from the Distribution module describing initial
release dates, version release dates, and planned future releases.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:distributionDates
domain_of:
- Dataset
range: DistributionDate
multivalued: true
inlined: true
inlined_as_list: true
third_party_sharing:
name: third_party_sharing
description: Third-party distribution policies for the dataset. List of ThirdPartySharing
objects from the Distribution module describing whether and how the dataset
is shared with entities outside the creating organization.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:thirdPartySharing
domain_of:
- Dataset
range: ThirdPartySharing
multivalued: true
inlined: true
inlined_as_list: true
license_and_use_terms:
name: license_and_use_terms
description: License and usage terms governing dataset access and use. LicenseAndUseTerms
object from the Data Governance module describing the applicable license, permitted
uses, and any restrictions.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: schema:license
domain_of:
- Dataset
range: LicenseAndUseTerms
inlined: true
ip_restrictions:
name: ip_restrictions
description: Intellectual property restrictions on dataset use or redistribution.
IPRestrictions object from the Data Governance module describing copyright,
trademark, or other IP considerations.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:ipRestrictions
domain_of:
- Dataset
range: IPRestrictions
inlined: true
regulatory_restrictions:
name: regulatory_restrictions
description: Regulatory and export control restrictions applicable to the dataset.
ExportControlRegulatoryRestrictions object from the Data Governance module describing
compliance requirements such as ITAR, EAR, or GDPR.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:regulatoryRestrictions
domain_of:
- Dataset
- ExportControlRegulatoryRestrictions
range: ExportControlRegulatoryRestrictions
inlined: true
maintainers:
name: maintainers
description: Individuals or organizations responsible for maintaining the dataset.
List of Maintainer objects from the Maintenance module describing maintenance
contacts, roles, and support channels.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:maintainers
domain_of:
- Dataset
range: Maintainer
multivalued: true
inlined: true
inlined_as_list: true
errata:
name: errata
description: Known errors or corrections to the dataset since publication. List
of Erratum objects from the Maintenance module describing discovered errors,
affected records, and correction procedures.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:errata
domain_of:
- Dataset
range: Erratum
multivalued: true
inlined: true
inlined_as_list: true
updates:
name: updates
description: Plans for future updates or versioning of the dataset. UpdatePlan
object from the Maintenance module describing update frequency, versioning policy,
and planned enhancements.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:updates
domain_of:
- Dataset
range: UpdatePlan
inlined: true
retention_limit:
name: retention_limit
description: Data retention policies and limits for the dataset. RetentionLimits
object from the Maintenance module describing how long the dataset will be available
and any deletion schedules.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:retentionLimit
domain_of:
- Dataset
range: RetentionLimits
inlined: true
version_access:
name: version_access
description: Information about access to different versions of the dataset. VersionAccess
object from the Maintenance module describing where older versions can be found
and how version history is maintained.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:versionAccess
domain_of:
- Dataset
range: VersionAccess
inlined: true
extension_mechanism:
name: extension_mechanism
description: Mechanisms for extending or contributing to the dataset. ExtensionMechanism
object from the Maintenance module describing how others can propose additions,
corrections, or expansions.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:extensionMechanism
domain_of:
- Dataset
range: ExtensionMechanism
inlined: true
variables:
name: variables
description: List of metadata records describing individual variables, fields,
or columns in the dataset.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:variableMeasured
rank: 1000
slot_uri: schema:variableMeasured
domain_of:
- Dataset
range: VariableMetadata
multivalued: true
inlined: true
inlined_as_list: true
is_deidentified:
name: is_deidentified
description: De-identification status and procedures applied to the dataset. Deidentification
object describing whether the dataset contains personal data, what de-identification
methods were applied, and any residual re-identification risks.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:isDeidentified
domain_of:
- Dataset
range: Deidentification
inlined: true
is_tabular:
name: is_tabular
description: Whether the dataset is in tabular format (rows and columns). True
if the data is structured as a table (e.g., CSV, TSV, relational database);
false for unstructured formats such as images or free text.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:isTabular
domain_of:
- Dataset
range: boolean
citation:
name: citation
description: Recommended citation for this dataset in DataCite or BibTeX format.
Provides a standard way to cite the dataset in publications.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:citation
rank: 1000
slot_uri: schema:citation
domain_of:
- Dataset
range: string
parent_datasets:
name: parent_datasets
description: One or more parent datasets that this dataset is part of or derived
from. Enables hierarchical dataset composition (hasPart/isPartOf relationships).
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:isPartOf
rank: 1000
slot_uri: schema:isPartOf
domain_of:
- Dataset
range: Dataset
multivalued: true
inlined: true
inlined_as_list: true
related_datasets:
name: related_datasets
description: List of related datasets with typed relationships (e.g., supplements,
derives from, is version of). Use DatasetRelationship class to specify relationship
types.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: schema:isRelatedTo
domain_of:
- Dataset
range: DatasetRelationship
multivalued: true
inlined: true
inlined_as_list: true
class_uri: dcat:Distribution
Induced
name: Dataset
description: A single component of related observations and/or information that can
be read, manipulated, transformed, and otherwise interpreted.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
aliases:
- data resource
- data file
- data package
exact_mappings:
- schema:DataDownload
is_a: Information
slot_usage:
external_resources:
name: external_resources
description: External resources referenced at the dataset level (e.g., related
publications, repositories, documentation). For file-level external resources,
use FileCollection.external_resources.
range: ExternalResource
multivalued: true
inlined_as_list: true
resources:
name: resources
description: 'Sub-resources or component datasets that are part of this dataset.
Note: For file collections, use the file_collections attribute instead.'
range: Dataset
multivalued: true
inlined_as_list: true
attributes:
file_collections:
name: file_collections
description: Collection of file groups within this dataset. Each entry represents
a logical grouping of files with shared characteristics (e.g., all training
data, all image files, all raw data files). Maps to nested RO-Crate Dataset
entities via schema:hasPart in RO-Crate converters.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- dcat:distribution
rank: 1000
slot_uri: d4d:fileCollections
alias: file_collections
owner: Dataset
domain_of:
- Dataset
range: FileCollection
multivalued: true
inlined: true
inlined_as_list: true
total_file_count:
name: total_file_count
annotations:
d4d:docExample:
tag: d4d:docExample
value: '156'
description: Total number of files across all file collections in this dataset.
Can be aggregated from file_collections[].file_count.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:totalFileCount
alias: total_file_count
owner: Dataset
domain_of:
- Dataset
range: integer
total_size_bytes:
name: total_size_bytes
annotations:
d4d:docExample:
tag: d4d:docExample
value: 10737418240 (10 GiB = 10 × 1024³ bytes)
description: Total size of all files in bytes across all file collections. Can
be aggregated from file_collections[].total_bytes.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcat:byteSize
alias: total_size_bytes
owner: Dataset
domain_of:
- Dataset
range: integer
purposes:
name: purposes
description: Purposes for which the dataset was created. List of Purpose objects
from the Motivation module, each describing a specific creation goal or intended
application.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:purposes
alias: purposes
owner: Dataset
domain_of:
- Dataset
range: Purpose
multivalued: true
inlined: true
inlined_as_list: true
tasks:
name: tasks
description: Tasks the dataset is intended to support. List of Task objects from
the Motivation module describing specific machine learning, research, or analytical
tasks.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:tasks
alias: tasks
owner: Dataset
domain_of:
- Dataset
range: Task
multivalued: true
inlined: true
inlined_as_list: true
addressing_gaps:
name: addressing_gaps
description: Research or practical gaps this dataset addresses. List of AddressingGap
objects from the Motivation module, each describing a gap in existing datasets
or knowledge that this dataset fills.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:addressingGaps
alias: addressing_gaps
owner: Dataset
domain_of:
- Dataset
range: AddressingGap
multivalued: true
inlined: true
inlined_as_list: true
creators:
name: creators
description: Individuals or organizations who created the dataset. List of Creator
objects describing authorship, roles, and affiliations of dataset creators.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: schema:creator
alias: creators
owner: Dataset
domain_of:
- Dataset
range: Creator
multivalued: true
inlined: true
inlined_as_list: true
funders:
name: funders
description: Funding mechanisms that supported dataset creation. List of FundingMechanism
objects describing grants, contracts, or other funding sources including grantors
and grant identifiers.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: schema:funder
alias: funders
owner: Dataset
domain_of:
- Dataset
range: FundingMechanism
multivalued: true
inlined: true
inlined_as_list: true
subsets:
name: subsets
description: Subsets or splits of this dataset. List of DataSubset objects from
the Composition module, each representing a logical partition such as training,
validation, or test splits, or demographic subgroups.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:dataSubset
alias: subsets
owner: Dataset
domain_of:
- Dataset
range: DataSubset
multivalued: true
inlined: true
inlined_as_list: true
instances:
name: instances
description: Individual data instances or records in the dataset. List of Instance
objects from the Composition module describing what each data point represents,
its type, and associated label information.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:instances
alias: instances
owner: Dataset
domain_of:
- Dataset
range: Instance
multivalued: true
inlined: true
inlined_as_list: true
anomalies:
name: anomalies
description: Known data quality issues, errors, or irregularities in the dataset.
List of DataAnomaly objects from the Composition module, each documenting a
specific anomaly and its potential impact.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:anomalies
alias: anomalies
owner: Dataset
domain_of:
- Dataset
range: DataAnomaly
multivalued: true
inlined: true
inlined_as_list: true
known_biases:
name: known_biases
description: List of known biases present in the dataset that may affect fairness,
representativeness, or model performance. Uses BiasTypeEnum for standardized
bias categorization mapped to the AI Ontology (AIO).
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:known_biases
alias: known_biases
owner: Dataset
domain_of:
- Dataset
range: DatasetBias
multivalued: true
inlined: true
inlined_as_list: true
known_limitations:
name: known_limitations
description: List of known limitations of the dataset that may affect its use
or interpretation. Distinct from biases (systematic errors) and anomalies (data
quality issues).
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:known_limitations
alias: known_limitations
owner: Dataset
domain_of:
- Dataset
range: DatasetLimitation
multivalued: true
inlined: true
inlined_as_list: true
confidential_elements:
name: confidential_elements
description: Confidential or restricted information within the dataset that requires
access controls. List of Confidentiality objects describing what is confidential
and why it cannot be released.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:confidentialElements
alias: confidential_elements
owner: Dataset
domain_of:
- Dataset
range: Confidentiality
multivalued: true
inlined: true
inlined_as_list: true
content_warnings:
name: content_warnings
description: Content warnings for potentially harmful, offensive, or disturbing
material in the dataset. List of ContentWarning objects alerting users to sensitive
content categories.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:contentWarnings
alias: content_warnings
owner: Dataset
domain_of:
- Dataset
range: ContentWarning
multivalued: true
inlined: true
inlined_as_list: true
subpopulations:
name: subpopulations
description: Subpopulations represented within the dataset. List of Subpopulation
objects from the Composition module describing demographic or other groups,
their representation, and any imbalances.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:subpopulations
alias: subpopulations
owner: Dataset
domain_of:
- Dataset
range: Subpopulation
multivalued: true
inlined: true
inlined_as_list: true
sensitive_elements:
name: sensitive_elements
description: Sensitive data elements requiring special handling or access controls.
List of SensitiveElement objects identifying sensitive attributes such as personal
identifiers, protected health information, or legally sensitive content.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:sensitiveElements
alias: sensitive_elements
owner: Dataset
domain_of:
- Dataset
range: SensitiveElement
multivalued: true
inlined: true
inlined_as_list: true
relationships:
name: relationships
description: Explicit relationships between individual instances in the dataset.
List of Relationships objects from the Composition module describing how instances
relate (e.g., graph edges, ratings, social network links).
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:relationships
alias: relationships
owner: Dataset
domain_of:
- Dataset
range: Relationships
multivalued: true
inlined: true
inlined_as_list: true
splits:
name: splits
description: Recommended data splits for this dataset. List of Splits objects
from the Composition module describing train/validation/test partitions and
the rationale for each split strategy.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:splits
alias: splits
owner: Dataset
domain_of:
- Dataset
range: Splits
multivalued: true
inlined: true
inlined_as_list: true
acquisition_methods:
name: acquisition_methods
description: Methods used to acquire or obtain dataset instances. List of InstanceAcquisition
objects from the Collection module describing how data was sourced, whether
directly observed or derived.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:acquisitionMethods
alias: acquisition_methods
owner: Dataset
domain_of:
- Dataset
range: InstanceAcquisition
multivalued: true
inlined: true
inlined_as_list: true
collection_mechanisms:
name: collection_mechanisms
description: Mechanisms, instruments, or tools used for data collection. List
of CollectionMechanism objects from the Collection module describing sensors,
surveys, APIs, or other collection instruments.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:collectionMechanisms
alias: collection_mechanisms
owner: Dataset
domain_of:
- Dataset
range: CollectionMechanism
multivalued: true
inlined: true
inlined_as_list: true
sampling_strategies:
name: sampling_strategies
description: Strategies used to select data instances from a larger population.
List of SamplingStrategy objects from the Collection module describing sampling
methodology, inclusion criteria, and limitations.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:samplingStrategies
alias: sampling_strategies
owner: Dataset
domain_of:
- Dataset
- Instance
range: SamplingStrategy
multivalued: true
inlined: true
inlined_as_list: true
data_collectors:
name: data_collectors
description: Individuals or organizations responsible for collecting the data.
List of DataCollector objects from the Collection module describing who performed
data collection and their roles.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:dataCollectors
alias: data_collectors
owner: Dataset
domain_of:
- Dataset
range: DataCollector
multivalued: true
inlined: true
inlined_as_list: true
collection_timeframes:
name: collection_timeframes
description: Time periods during which data was collected. List of CollectionTimeframe
objects from the Collection module describing collection start and end dates,
and any gaps in the collection period.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:collectionTimeframes
alias: collection_timeframes
owner: Dataset
domain_of:
- Dataset
range: CollectionTimeframe
multivalued: true
inlined: true
inlined_as_list: true
direct_collection:
name: direct_collection
description: Whether data was collected directly from individuals or via third
parties. List of DirectCollection objects from the Collection module describing
direct vs. indirect collection methods and sources.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:directCollection
alias: direct_collection
owner: Dataset
domain_of:
- Dataset
range: DirectCollection
multivalued: true
inlined: true
inlined_as_list: true
collection_notifications:
name: collection_notifications
description: Notifications provided to individuals about data collection. List
of CollectionNotification objects from the Ethics module describing how and
when individuals were informed about the data collection.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:collectionNotifications
alias: collection_notifications
owner: Dataset
domain_of:
- Dataset
range: CollectionNotification
multivalued: true
inlined: true
inlined_as_list: true
collection_consents:
name: collection_consents
description: Consent obtained from individuals for data collection and use. List
of CollectionConsent objects from the Ethics module describing how consent was
requested, provided, and documented.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:collectionConsents
alias: collection_consents
owner: Dataset
domain_of:
- Dataset
range: CollectionConsent
multivalued: true
inlined: true
inlined_as_list: true
consent_revocations:
name: consent_revocations
description: Mechanisms for individuals to revoke previously given consent. List
of ConsentRevocation objects from the Ethics module describing how revocation
works and what happens to data after revocation.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:consentRevocations
alias: consent_revocations
owner: Dataset
domain_of:
- Dataset
range: ConsentRevocation
multivalued: true
inlined: true
inlined_as_list: true
missing_data_documentation:
name: missing_data_documentation
description: One or more records documenting missing data patterns and handling
strategies.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:missingDataDocumentation
alias: missing_data_documentation
owner: Dataset
domain_of:
- Dataset
range: MissingDataDocumentation
multivalued: true
inlined: true
inlined_as_list: true
raw_data_sources:
name: raw_data_sources
description: List of raw data sources before preprocessing. Each RawDataSource
object describes where the original data came from and how it can be accessed.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:rawDataSources
alias: raw_data_sources
owner: Dataset
domain_of:
- Dataset
range: RawDataSource
multivalued: true
inlined: true
inlined_as_list: true
ethical_reviews:
name: ethical_reviews
description: Ethical reviews and institutional oversight for the dataset. List
of EthicalReview objects from the Ethics module describing IRB approvals, ethics
committee reviews, and compliance certifications.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:ethicalReviews
alias: ethical_reviews
owner: Dataset
domain_of:
- Dataset
range: EthicalReview
multivalued: true
inlined: true
inlined_as_list: true
data_protection_impacts:
name: data_protection_impacts
description: Data protection impact assessments (DPIAs) conducted for the dataset.
List of DataProtectionImpact objects from the Ethics module documenting privacy
risk assessments and mitigation measures.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:dataProtectionImpacts
alias: data_protection_impacts
owner: Dataset
domain_of:
- Dataset
range: DataProtectionImpact
multivalued: true
inlined: true
inlined_as_list: true
human_subject_research:
name: human_subject_research
description: Information about whether dataset involves human subjects research,
including IRB approval, ethics review, and regulatory compliance.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:humanSubjectResearch
alias: human_subject_research
owner: Dataset
domain_of:
- Dataset
range: HumanSubjectResearch
inlined: true
informed_consent:
name: informed_consent
description: One or more records detailing informed consent procedures, including
consent type, documentation, and withdrawal mechanisms.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:informedConsent
alias: informed_consent
owner: Dataset
domain_of:
- Dataset
range: InformedConsent
multivalued: true
inlined: true
inlined_as_list: true
at_risk_populations:
name: at_risk_populations
description: Information about protections for at-risk populations (e.g., minors,
pregnant women, prisoners) including special safeguards and assent procedures.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:atRiskPopulations
alias: at_risk_populations
owner: Dataset
domain_of:
- Dataset
range: AtRiskPopulations
inlined: true
participant_privacy:
name: participant_privacy
description: One or more records describing privacy protections and anonymization
procedures for human research participants.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:participantPrivacy
alias: participant_privacy
owner: Dataset
domain_of:
- Dataset
range: ParticipantPrivacy
multivalued: true
inlined: true
inlined_as_list: true
participant_compensation:
name: participant_compensation
description: One or more records describing compensation or incentives provided
to human research participants.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:participantCompensation
alias: participant_compensation
owner: Dataset
domain_of:
- Dataset
range: HumanSubjectCompensation
multivalued: true
inlined: true
inlined_as_list: true
preprocessing_strategies:
name: preprocessing_strategies
description: Preprocessing steps applied to the raw data. List of PreprocessingStrategy
objects from the Preprocessing module describing normalization, transformation,
and other preparation steps.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:preprocessingStrategies
alias: preprocessing_strategies
owner: Dataset
domain_of:
- Dataset
range: PreprocessingStrategy
multivalued: true
inlined: true
inlined_as_list: true
cleaning_strategies:
name: cleaning_strategies
description: Data cleaning and quality control procedures applied to the dataset.
List of CleaningStrategy objects from the Preprocessing module describing outlier
removal, deduplication, and error correction steps.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:cleaningStrategies
alias: cleaning_strategies
owner: Dataset
domain_of:
- Dataset
range: CleaningStrategy
multivalued: true
inlined: true
inlined_as_list: true
labeling_strategies:
name: labeling_strategies
description: Labeling or annotation methodologies applied to the data. List of
LabelingStrategy objects from the Preprocessing module describing annotation
procedures, annotator qualifications, and quality controls.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:labelingStrategies
alias: labeling_strategies
owner: Dataset
domain_of:
- Dataset
range: LabelingStrategy
multivalued: true
inlined: true
inlined_as_list: true
raw_sources:
name: raw_sources
description: Raw, unprocessed source data before any preprocessing was applied.
List of RawData objects from the Preprocessing module describing original data
sources and their formats.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:rawSources
alias: raw_sources
owner: Dataset
domain_of:
- Dataset
range: RawData
multivalued: true
inlined: true
inlined_as_list: true
imputation_protocols:
name: imputation_protocols
description: Data imputation protocols applied to handle missing values. List
of ImputationProtocol objects from the Preprocessing module describing the imputation
technique, affected variables, and rationale.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:imputation_protocols
alias: imputation_protocols
owner: Dataset
domain_of:
- Dataset
range: ImputationProtocol
multivalued: true
inlined: true
inlined_as_list: true
annotation_analyses:
name: annotation_analyses
description: One or more analyses of annotation quality and inter-annotator agreement.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:annotation_analyses
alias: annotation_analyses
owner: Dataset
domain_of:
- Dataset
range: AnnotationAnalysis
multivalued: true
inlined: true
inlined_as_list: true
machine_annotation_tools:
name: machine_annotation_tools
description: List of automated annotation tools used in dataset creation.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: machine_annotation_tools
owner: Dataset
domain_of:
- Dataset
range: MachineAnnotationTools
multivalued: true
inlined: true
inlined_as_list: true
existing_uses:
name: existing_uses
description: Known existing uses of the dataset at the time of publication. List
of ExistingUse objects from the Uses module describing research, commercial,
or other applications of the dataset.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:existingUses
alias: existing_uses
owner: Dataset
domain_of:
- Dataset
range: ExistingUse
multivalued: true
inlined: true
inlined_as_list: true
use_repository:
name: use_repository
description: Repositories or registries tracking how the dataset has been used.
List of UseRepository objects from the Uses module pointing to papers with code,
citation indices, or other use-tracking resources.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:useRepository
alias: use_repository
owner: Dataset
domain_of:
- Dataset
range: UseRepository
multivalued: true
inlined: true
inlined_as_list: true
other_tasks:
name: other_tasks
description: Additional tasks the dataset may support beyond its original intent.
List of OtherTask objects from the Uses module describing potential applications
not originally planned by the dataset creators.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:otherTasks
alias: other_tasks
owner: Dataset
domain_of:
- Dataset
range: OtherTask
multivalued: true
inlined: true
inlined_as_list: true
future_use_impacts:
name: future_use_impacts
description: Anticipated impacts of future uses, including risks and benefits.
List of FutureUseImpact objects from the Uses module describing foreseeable
consequences of using this dataset in new applications.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:futureUseImpacts
alias: future_use_impacts
owner: Dataset
domain_of:
- Dataset
range: FutureUseImpact
multivalued: true
inlined: true
inlined_as_list: true
discouraged_uses:
name: discouraged_uses
description: Uses that are not recommended for this dataset due to limitations,
risks, or ethical concerns. List of DiscouragedUse objects from the Uses module
explaining why certain applications should be avoided.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:discouragedUses
alias: discouraged_uses
owner: Dataset
domain_of:
- Dataset
range: DiscouragedUse
multivalued: true
inlined: true
inlined_as_list: true
intended_uses:
name: intended_uses
description: List of explicit intended and recommended uses for this dataset.
Complements future_use_impacts by focusing on positive applications.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:intendedUses
alias: intended_uses
owner: Dataset
domain_of:
- Dataset
range: IntendedUse
multivalued: true
inlined: true
inlined_as_list: true
prohibited_uses:
name: prohibited_uses
description: List of explicitly prohibited or forbidden uses for this dataset.
Stronger than discouraged_uses - these are not permitted.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:prohibitedUses
alias: prohibited_uses
owner: Dataset
domain_of:
- Dataset
range: ProhibitedUse
multivalued: true
inlined: true
inlined_as_list: true
distribution_formats:
name: distribution_formats
description: Formats in which the dataset is distributed or made available. List
of DistributionFormat objects from the Distribution module describing file formats,
compression, and access methods.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:distributionFormats
alias: distribution_formats
owner: Dataset
domain_of:
- Dataset
range: DistributionFormat
multivalued: true
inlined: true
inlined_as_list: true
distribution_dates:
name: distribution_dates
description: Dates when the dataset was or will be distributed or released. List
of DistributionDate objects from the Distribution module describing initial
release dates, version release dates, and planned future releases.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:distributionDates
alias: distribution_dates
owner: Dataset
domain_of:
- Dataset
range: DistributionDate
multivalued: true
inlined: true
inlined_as_list: true
third_party_sharing:
name: third_party_sharing
description: Third-party distribution policies for the dataset. List of ThirdPartySharing
objects from the Distribution module describing whether and how the dataset
is shared with entities outside the creating organization.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:thirdPartySharing
alias: third_party_sharing
owner: Dataset
domain_of:
- Dataset
range: ThirdPartySharing
multivalued: true
inlined: true
inlined_as_list: true
license_and_use_terms:
name: license_and_use_terms
description: License and usage terms governing dataset access and use. LicenseAndUseTerms
object from the Data Governance module describing the applicable license, permitted
uses, and any restrictions.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: schema:license
alias: license_and_use_terms
owner: Dataset
domain_of:
- Dataset
range: LicenseAndUseTerms
inlined: true
ip_restrictions:
name: ip_restrictions
description: Intellectual property restrictions on dataset use or redistribution.
IPRestrictions object from the Data Governance module describing copyright,
trademark, or other IP considerations.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:ipRestrictions
alias: ip_restrictions
owner: Dataset
domain_of:
- Dataset
range: IPRestrictions
inlined: true
regulatory_restrictions:
name: regulatory_restrictions
description: Regulatory and export control restrictions applicable to the dataset.
ExportControlRegulatoryRestrictions object from the Data Governance module describing
compliance requirements such as ITAR, EAR, or GDPR.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:regulatoryRestrictions
alias: regulatory_restrictions
owner: Dataset
domain_of:
- Dataset
- ExportControlRegulatoryRestrictions
range: ExportControlRegulatoryRestrictions
inlined: true
maintainers:
name: maintainers
description: Individuals or organizations responsible for maintaining the dataset.
List of Maintainer objects from the Maintenance module describing maintenance
contacts, roles, and support channels.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:maintainers
alias: maintainers
owner: Dataset
domain_of:
- Dataset
range: Maintainer
multivalued: true
inlined: true
inlined_as_list: true
errata:
name: errata
description: Known errors or corrections to the dataset since publication. List
of Erratum objects from the Maintenance module describing discovered errors,
affected records, and correction procedures.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:errata
alias: errata
owner: Dataset
domain_of:
- Dataset
range: Erratum
multivalued: true
inlined: true
inlined_as_list: true
updates:
name: updates
description: Plans for future updates or versioning of the dataset. UpdatePlan
object from the Maintenance module describing update frequency, versioning policy,
and planned enhancements.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:updates
alias: updates
owner: Dataset
domain_of:
- Dataset
range: UpdatePlan
inlined: true
retention_limit:
name: retention_limit
description: Data retention policies and limits for the dataset. RetentionLimits
object from the Maintenance module describing how long the dataset will be available
and any deletion schedules.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:retentionLimit
alias: retention_limit
owner: Dataset
domain_of:
- Dataset
range: RetentionLimits
inlined: true
version_access:
name: version_access
description: Information about access to different versions of the dataset. VersionAccess
object from the Maintenance module describing where older versions can be found
and how version history is maintained.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:versionAccess
alias: version_access
owner: Dataset
domain_of:
- Dataset
range: VersionAccess
inlined: true
extension_mechanism:
name: extension_mechanism
description: Mechanisms for extending or contributing to the dataset. ExtensionMechanism
object from the Maintenance module describing how others can propose additions,
corrections, or expansions.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:extensionMechanism
alias: extension_mechanism
owner: Dataset
domain_of:
- Dataset
range: ExtensionMechanism
inlined: true
variables:
name: variables
description: List of metadata records describing individual variables, fields,
or columns in the dataset.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:variableMeasured
rank: 1000
slot_uri: schema:variableMeasured
alias: variables
owner: Dataset
domain_of:
- Dataset
range: VariableMetadata
multivalued: true
inlined: true
inlined_as_list: true
is_deidentified:
name: is_deidentified
description: De-identification status and procedures applied to the dataset. Deidentification
object describing whether the dataset contains personal data, what de-identification
methods were applied, and any residual re-identification risks.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:isDeidentified
alias: is_deidentified
owner: Dataset
domain_of:
- Dataset
range: Deidentification
inlined: true
is_tabular:
name: is_tabular
description: Whether the dataset is in tabular format (rows and columns). True
if the data is structured as a table (e.g., CSV, TSV, relational database);
false for unstructured formats such as images or free text.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:isTabular
alias: is_tabular
owner: Dataset
domain_of:
- Dataset
range: boolean
citation:
name: citation
description: Recommended citation for this dataset in DataCite or BibTeX format.
Provides a standard way to cite the dataset in publications.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:citation
rank: 1000
slot_uri: schema:citation
alias: citation
owner: Dataset
domain_of:
- Dataset
range: string
parent_datasets:
name: parent_datasets
description: One or more parent datasets that this dataset is part of or derived
from. Enables hierarchical dataset composition (hasPart/isPartOf relationships).
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:isPartOf
rank: 1000
slot_uri: schema:isPartOf
alias: parent_datasets
owner: Dataset
domain_of:
- Dataset
range: Dataset
multivalued: true
inlined: true
inlined_as_list: true
related_datasets:
name: related_datasets
description: List of related datasets with typed relationships (e.g., supplements,
derives from, is version of). Use DatasetRelationship class to specify relationship
types.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: schema:isRelatedTo
alias: related_datasets
owner: Dataset
domain_of:
- Dataset
range: DatasetRelationship
multivalued: true
inlined: true
inlined_as_list: true
external_resources:
name: external_resources
description: External resources referenced at the dataset level (e.g., related
publications, repositories, documentation). For file-level external resources,
use FileCollection.external_resources.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:references
alias: external_resources
owner: Dataset
domain_of:
- Dataset
- ExternalResource
- FileCollection
range: ExternalResource
multivalued: true
inlined_as_list: true
resources:
name: resources
description: 'Sub-resources or component datasets that are part of this dataset.
Note: For file collections, use the file_collections attribute instead.'
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: schema:hasPart
alias: resources
owner: Dataset
domain_of:
- DatasetCollection
- Dataset
- FileCollection
range: Dataset
multivalued: true
inlined_as_list: true
compression:
name: compression
annotations:
d4d:docExample:
tag: d4d:docExample
value: zip
description: Compression format used, if any (e.g., gzip, bzip2, zip).
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcat:compressFormat
alias: compression
owner: Dataset
domain_of:
- Information
- File
- FileCollection
range: CompressionEnum
conforms_to:
name: conforms_to
annotations:
d4d:docExample:
tag: d4d:docExample
value: https://www.w3.org/TR/vocab-dcat-3/
description: An established standard, specification, or schema to which the resource
conforms.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:conformsTo
alias: conforms_to
owner: Dataset
domain_of:
- Information
range: string
conforms_to_class:
name: conforms_to_class
annotations:
d4d:docExample:
tag: d4d:docExample
value: Dataset
description: The specific class or type within a schema to which the resource
conforms.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
broad_mappings:
- dcterms:conformsTo
rank: 1000
slot_uri: d4d:conformsToClass
alias: conforms_to_class
owner: Dataset
domain_of:
- Information
range: string
conforms_to_schema:
name: conforms_to_schema
annotations:
d4d:docExample:
tag: d4d:docExample
value: https://w3id.org/bridge2ai/data-sheets-schema
description: The schema or data model to which the resource conforms.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
broad_mappings:
- dcterms:conformsTo
rank: 1000
slot_uri: d4d:conformsToSchema
alias: conforms_to_schema
owner: Dataset
domain_of:
- Information
range: string
created_by:
name: created_by
annotations:
d4d:docExample:
tag: d4d:docExample
value: orcid:0000-0002-1234-5678
description: The person or organization primarily responsible for creating the
resource.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:creator
alias: created_by
owner: Dataset
domain_of:
- Information
range: string
created_on:
name: created_on
annotations:
d4d:docExample:
tag: d4d:docExample
value: '2023-07-18T00:00:00'
description: The date and time when the resource was created.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:created
alias: created_on
owner: Dataset
domain_of:
- Information
range: datetime
doi:
name: doi
annotations:
d4d:docExample:
tag: d4d:docExample
value: 10.5281/zenodo.10642459
description: Digital Object Identifier (DOI) in format 10.xxxx/xxxxx providing
persistent identification (e.g., '10.1038/s41586-020-2649-2', '10.5281/zenodo.1234567').
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:identifier
broad_mappings:
- dcterms:identifier
rank: 1000
slot_uri: d4d:doiIdentifier
alias: doi
owner: Dataset
domain_of:
- Information
range: string
pattern: 10\.\d{4,}\/.+
download_url:
name: download_url
annotations:
d4d:docExample:
tag: d4d:docExample
value: https://fairhub.io/datasets/2/download
description: URL from which the data can be downloaded. This is not the same as
the landing page, which is a page that describes the dataset. Rather, this URL
points directly to the data itself.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:url
rank: 1000
slot_uri: dcat:downloadURL
alias: download_url
owner: Dataset
domain_of:
- Information
range: uri
issued:
name: issued
annotations:
d4d:docExample:
tag: d4d:docExample
value: '2024-11-15T00:00:00'
description: Date of formal issuance or publication of the resource.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:issued
alias: issued
owner: Dataset
domain_of:
- Information
range: datetime
keywords:
name: keywords
annotations:
d4d:docExample:
tag: d4d:docExample
value: diabetes, retinal imaging, multimodal, clinical data
description: Keywords or tags describing the resource for discovery and classification.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcat:keyword
alias: keywords
owner: Dataset
domain_of:
- Information
range: string
multivalued: true
language:
name: language
annotations:
d4d:docExample:
tag: d4d:docExample
value: en
description: Language in which the information is expressed.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:inLanguage
rank: 1000
slot_uri: dcterms:language
alias: language
owner: Dataset
domain_of:
- Information
range: string
last_updated_on:
name: last_updated_on
annotations:
d4d:docExample:
tag: d4d:docExample
value: '2024-11-15T00:00:00'
description: The date and time when the resource was most recently modified or
updated.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:modified
alias: last_updated_on
owner: Dataset
domain_of:
- Information
range: datetime
license:
name: license
annotations:
d4d:docExample:
tag: d4d:docExample
value: CC-BY-NC-4.0
description: The legal license under which the resource is made available (e.g.,
"MIT", "CC-BY-4.0").
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:license
alias: license
owner: Dataset
domain_of:
- Software
- Information
range: string
modified_by:
name: modified_by
annotations:
d4d:docExample:
tag: d4d:docExample
value: orcid:0000-0002-9876-5432
description: A person or organization that contributed to modifying or updating
the resource.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:contributor
alias: modified_by
owner: Dataset
domain_of:
- Information
range: string
page:
name: page
annotations:
d4d:docExample:
tag: d4d:docExample
value: https://fairhub.io/datasets/2
description: A landing page or web page providing access to or information about
the resource.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcat:landingPage
alias: page
owner: Dataset
domain_of:
- Information
range: string
publisher:
name: publisher
annotations:
d4d:docExample:
tag: d4d:docExample
value: 'ror:04t3en479 # use a ROR ID, DOI, or URL — not a plain name'
description: The organization or entity responsible for making the resource available.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:publisher
alias: publisher
owner: Dataset
domain_of:
- Information
range: uriorcurie
status:
name: status
annotations:
d4d:docExample:
tag: d4d:docExample
value: published
description: The status of the resource (e.g., draft, published, deprecated).
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: d4d:publicationStatus
alias: status
owner: Dataset
domain_of:
- Information
range: string
title:
name: title
annotations:
d4d:docExample:
tag: d4d:docExample
value: 'AI-READI: Salutogenesis Study of Type 2 Diabetes'
description: The official title of the element.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:title
alias: title
owner: Dataset
domain_of:
- Information
range: string
version:
name: version
annotations:
d4d:docExample:
tag: d4d:docExample
value: 2.0.0
description: The version identifier of the resource (e.g., "1.0", "2.3.1").
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: schema:version
alias: version
owner: Dataset
domain_of:
- Software
- Information
range: string
was_derived_from:
name: was_derived_from
annotations:
d4d:docExample:
tag: d4d:docExample
value: https://fairhub.io/datasets/2/versions/1
description: A resource from which this resource was derived, in whole or in part.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- dcterms:source
rank: 1000
slot_uri: prov:wasDerivedFrom
alias: was_derived_from
owner: Dataset
domain_of:
- Information
range: string
id:
name: id
annotations:
d4d:docExample:
tag: d4d:docExample
value: https://example.org/dataset/my-dataset-001
description: A unique identifier for a thing.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base
rank: 1000
slot_uri: schema:identifier
identifier: true
alias: id
owner: Dataset
domain_of:
- NamedThing
- DatasetProperty
range: uriorcurie
required: true
name:
name: name
annotations:
d4d:docExample:
tag: d4d:docExample
value: AI-READI Dataset
description: A human-readable name for a thing.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base
rank: 1000
slot_uri: schema:name
alias: name
owner: Dataset
domain_of:
- NamedThing
- DatasetProperty
range: string
description:
name: description
annotations:
d4d:docExample:
tag: d4d:docExample
value: A multimodal dataset of 4,000 participants with Type 2 Diabetes.
description: A human-readable description for a thing.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base
rank: 1000
slot_uri: schema:description
alias: description
owner: Dataset
domain_of:
- NamedThing
- DatasetProperty
- DatasetRelationship
range: string
class_uri: dcat:Distribution