Class: Dataset
A single component of related observations and/or information that can be read, manipulated, transformed, and otherwise interpreted.
URI: dcat:Distribution
classDiagram
class Dataset
click Dataset href "../Dataset/"
Information <|-- Dataset
click Information href "../Information/"
Dataset <|-- DataSubset
click DataSubset href "../DataSubset/"
Dataset : acquisition_methods
Dataset --> "*" InstanceAcquisition : acquisition_methods
click InstanceAcquisition href "../InstanceAcquisition/"
Dataset : addressing_gaps
Dataset --> "*" AddressingGap : addressing_gaps
click AddressingGap href "../AddressingGap/"
Dataset : annotation_analyses
Dataset --> "*" AnnotationAnalysis : annotation_analyses
click AnnotationAnalysis href "../AnnotationAnalysis/"
Dataset : anomalies
Dataset --> "*" DataAnomaly : anomalies
click DataAnomaly href "../DataAnomaly/"
Dataset : bytes
Dataset : citation
Dataset : cleaning_strategies
Dataset --> "*" CleaningStrategy : cleaning_strategies
click CleaningStrategy href "../CleaningStrategy/"
Dataset : collection_mechanisms
Dataset --> "*" CollectionMechanism : collection_mechanisms
click CollectionMechanism href "../CollectionMechanism/"
Dataset : collection_timeframes
Dataset --> "*" CollectionTimeframe : collection_timeframes
click CollectionTimeframe href "../CollectionTimeframe/"
Dataset : compression
Dataset --> "0..1" CompressionEnum : compression
click CompressionEnum href "../CompressionEnum/"
Dataset : confidential_elements
Dataset --> "*" Confidentiality : confidential_elements
click Confidentiality href "../Confidentiality/"
Dataset : conforms_to
Dataset : conforms_to_class
Dataset : conforms_to_schema
Dataset : content_warnings
Dataset --> "*" ContentWarning : content_warnings
click ContentWarning href "../ContentWarning/"
Dataset : created_by
Dataset : created_on
Dataset : creators
Dataset --> "*" Creator : creators
click Creator href "../Creator/"
Dataset : data_collectors
Dataset --> "*" DataCollector : data_collectors
click DataCollector href "../DataCollector/"
Dataset : data_protection_impacts
Dataset --> "*" DataProtectionImpact : data_protection_impacts
click DataProtectionImpact href "../DataProtectionImpact/"
Dataset : description
Dataset : dialect
Dataset : discouraged_uses
Dataset --> "*" DiscouragedUse : discouraged_uses
click DiscouragedUse href "../DiscouragedUse/"
Dataset : distribution_dates
Dataset --> "*" DistributionDate : distribution_dates
click DistributionDate href "../DistributionDate/"
Dataset : distribution_formats
Dataset --> "*" DistributionFormat : distribution_formats
click DistributionFormat href "../DistributionFormat/"
Dataset : doi
Dataset : download_url
Dataset : encoding
Dataset --> "0..1" EncodingEnum : encoding
click EncodingEnum href "../EncodingEnum/"
Dataset : errata
Dataset --> "*" Erratum : errata
click Erratum href "../Erratum/"
Dataset : ethical_reviews
Dataset --> "*" EthicalReview : ethical_reviews
click EthicalReview href "../EthicalReview/"
Dataset : existing_uses
Dataset --> "*" ExistingUse : existing_uses
click ExistingUse href "../ExistingUse/"
Dataset : extension_mechanism
Dataset --> "0..1" ExtensionMechanism : extension_mechanism
click ExtensionMechanism href "../ExtensionMechanism/"
Dataset : external_resources
Dataset --> "*" ExternalResource : external_resources
click ExternalResource href "../ExternalResource/"
Dataset : format
Dataset --> "0..1" FormatEnum : format
click FormatEnum href "../FormatEnum/"
Dataset : funders
Dataset --> "*" FundingMechanism : funders
click FundingMechanism href "../FundingMechanism/"
Dataset : future_use_impacts
Dataset --> "*" FutureUseImpact : future_use_impacts
click FutureUseImpact href "../FutureUseImpact/"
Dataset : hash
Dataset : human_subject_research
Dataset --> "0..1" HumanSubjectResearch : human_subject_research
click HumanSubjectResearch href "../HumanSubjectResearch/"
Dataset : id
Dataset : imputation_protocols
Dataset --> "*" ImputationProtocol : imputation_protocols
click ImputationProtocol href "../ImputationProtocol/"
Dataset : informed_consent
Dataset --> "*" InformedConsent : informed_consent
click InformedConsent href "../InformedConsent/"
Dataset : instances
Dataset --> "*" Instance : instances
click Instance href "../Instance/"
Dataset : intended_uses
Dataset --> "*" IntendedUse : intended_uses
click IntendedUse href "../IntendedUse/"
Dataset : ip_restrictions
Dataset --> "0..1" IPRestrictions : ip_restrictions
click IPRestrictions href "../IPRestrictions/"
Dataset : is_deidentified
Dataset --> "0..1" Deidentification : is_deidentified
click Deidentification href "../Deidentification/"
Dataset : is_tabular
Dataset : issued
Dataset : keywords
Dataset : known_biases
Dataset --> "*" DatasetBias : known_biases
click DatasetBias href "../DatasetBias/"
Dataset : known_limitations
Dataset --> "*" DatasetLimitation : known_limitations
click DatasetLimitation href "../DatasetLimitation/"
Dataset : labeling_strategies
Dataset --> "*" LabelingStrategy : labeling_strategies
click LabelingStrategy href "../LabelingStrategy/"
Dataset : language
Dataset : last_updated_on
Dataset : license
Dataset : license_and_use_terms
Dataset --> "0..1" LicenseAndUseTerms : license_and_use_terms
click LicenseAndUseTerms href "../LicenseAndUseTerms/"
Dataset : machine_annotation_tools
Dataset --> "*" MachineAnnotationTools : machine_annotation_tools
click MachineAnnotationTools href "../MachineAnnotationTools/"
Dataset : maintainers
Dataset --> "*" Maintainer : maintainers
click Maintainer href "../Maintainer/"
Dataset : md5
Dataset : media_type
Dataset --> "0..1" MediaTypeEnum : media_type
click MediaTypeEnum href "../MediaTypeEnum/"
Dataset : missing_data_documentation
Dataset --> "*" MissingDataDocumentation : missing_data_documentation
click MissingDataDocumentation href "../MissingDataDocumentation/"
Dataset : modified_by
Dataset : name
Dataset : other_tasks
Dataset --> "*" OtherTask : other_tasks
click OtherTask href "../OtherTask/"
Dataset : page
Dataset : parent_datasets
Dataset --> "*" Dataset : parent_datasets
click Dataset href "../Dataset/"
Dataset : participant_compensation
Dataset --> "0..1" HumanSubjectCompensation : participant_compensation
click HumanSubjectCompensation href "../HumanSubjectCompensation/"
Dataset : participant_privacy
Dataset --> "*" ParticipantPrivacy : participant_privacy
click ParticipantPrivacy href "../ParticipantPrivacy/"
Dataset : path
Dataset : preprocessing_strategies
Dataset --> "*" PreprocessingStrategy : preprocessing_strategies
click PreprocessingStrategy href "../PreprocessingStrategy/"
Dataset : prohibited_uses
Dataset --> "*" ProhibitedUse : prohibited_uses
click ProhibitedUse href "../ProhibitedUse/"
Dataset : publisher
Dataset : purposes
Dataset --> "*" Purpose : purposes
click Purpose href "../Purpose/"
Dataset : raw_data_sources
Dataset --> "*" RawDataSource : raw_data_sources
click RawDataSource href "../RawDataSource/"
Dataset : raw_sources
Dataset --> "*" RawData : raw_sources
click RawData href "../RawData/"
Dataset : regulatory_restrictions
Dataset --> "0..1" ExportControlRegulatoryRestrictions : regulatory_restrictions
click ExportControlRegulatoryRestrictions href "../ExportControlRegulatoryRestrictions/"
Dataset : related_datasets
Dataset --> "*" DatasetRelationship : related_datasets
click DatasetRelationship href "../DatasetRelationship/"
Dataset : resources
Dataset --> "*" Dataset : resources
click Dataset href "../Dataset/"
Dataset : retention_limit
Dataset --> "0..1" RetentionLimits : retention_limit
click RetentionLimits href "../RetentionLimits/"
Dataset : sampling_strategies
Dataset --> "*" SamplingStrategy : sampling_strategies
click SamplingStrategy href "../SamplingStrategy/"
Dataset : sensitive_elements
Dataset --> "*" SensitiveElement : sensitive_elements
click SensitiveElement href "../SensitiveElement/"
Dataset : sha256
Dataset : status
Dataset : subpopulations
Dataset --> "*" Subpopulation : subpopulations
click Subpopulation href "../Subpopulation/"
Dataset : subsets
Dataset --> "*" DataSubset : subsets
click DataSubset href "../DataSubset/"
Dataset : tasks
Dataset --> "*" Task : tasks
click Task href "../Task/"
Dataset : title
Dataset : updates
Dataset --> "0..1" UpdatePlan : updates
click UpdatePlan href "../UpdatePlan/"
Dataset : use_repository
Dataset --> "*" UseRepository : use_repository
click UseRepository href "../UseRepository/"
Dataset : variables
Dataset --> "*" VariableMetadata : variables
click VariableMetadata href "../VariableMetadata/"
Dataset : version
Dataset : version_access
Dataset --> "0..1" VersionAccess : version_access
click VersionAccess href "../VersionAccess/"
Dataset : vulnerable_populations
Dataset --> "0..1" VulnerablePopulations : vulnerable_populations
click VulnerablePopulations href "../VulnerablePopulations/"
Dataset : was_derived_from
Inheritance
- NamedThing
- Information
- Dataset
- Information
Slots
| Name | Cardinality and Range | Description | Inheritance |
|---|---|---|---|
| bytes | 0..1 Integer |
Size of the data in bytes | direct |
| dialect | 0..1 String |
direct | |
| encoding | 0..1 EncodingEnum |
the character encoding of the data | direct |
| format | 0..1 FormatEnum |
The file format, physical medium, or dimensions of a resource | direct |
| hash | 0..1 String |
hash of the data | direct |
| md5 | 0..1 String |
md5 hash of the data | direct |
| media_type | 0..1 MediaTypeEnum |
The media type of the data | direct |
| path | 0..1 String |
direct | |
| sha256 | 0..1 String |
sha256 hash of the data | direct |
| external_resources | * ExternalResource |
Links or identifiers for external resources | direct |
| resources | * Dataset |
Sub-resources or component datasets that are part of this dataset | direct |
| purposes | * Purpose |
direct | |
| tasks | * Task |
direct | |
| addressing_gaps | * AddressingGap |
direct | |
| creators | * Creator |
direct | |
| funders | * FundingMechanism |
direct | |
| subsets | * DataSubset |
direct | |
| instances | * Instance |
direct | |
| anomalies | * DataAnomaly |
direct | |
| known_biases | * DatasetBias |
Known biases present in the dataset that may affect fairness, representativen... | direct |
| known_limitations | * DatasetLimitation |
Known limitations of the dataset that may affect its use or interpretation | direct |
| confidential_elements | * Confidentiality |
direct | |
| content_warnings | * ContentWarning |
direct | |
| subpopulations | * Subpopulation |
direct | |
| sensitive_elements | * SensitiveElement |
direct | |
| acquisition_methods | * InstanceAcquisition |
direct | |
| collection_mechanisms | * CollectionMechanism |
direct | |
| sampling_strategies | * SamplingStrategy |
direct | |
| data_collectors | * DataCollector |
direct | |
| collection_timeframes | * CollectionTimeframe |
direct | |
| missing_data_documentation | * MissingDataDocumentation |
Documentation of missing data patterns and handling strategies | direct |
| raw_data_sources | * RawDataSource |
Description of raw data sources before preprocessing | direct |
| ethical_reviews | * EthicalReview |
direct | |
| data_protection_impacts | * DataProtectionImpact |
direct | |
| human_subject_research | 0..1 HumanSubjectResearch |
Information about whether dataset involves human subjects research, including... | direct |
| informed_consent | * InformedConsent |
Details about informed consent procedures, including consent type, documentat... | direct |
| participant_privacy | * ParticipantPrivacy |
Privacy protections and anonymization procedures for human research participa... | direct |
| participant_compensation | 0..1 HumanSubjectCompensation |
Compensation or incentives provided to human research participants | direct |
| vulnerable_populations | 0..1 VulnerablePopulations |
Information about protections for vulnerable populations (e | direct |
| preprocessing_strategies | * PreprocessingStrategy |
direct | |
| cleaning_strategies | * CleaningStrategy |
direct | |
| labeling_strategies | * LabelingStrategy |
direct | |
| raw_sources | * RawData |
direct | |
| imputation_protocols | * ImputationProtocol |
Data imputation methodology and techniques | direct |
| annotation_analyses | * AnnotationAnalysis |
Analysis of annotation quality and inter-annotator agreement | direct |
| machine_annotation_tools | * MachineAnnotationTools |
Automated annotation tools used in dataset creation | direct |
| existing_uses | * ExistingUse |
direct | |
| use_repository | * UseRepository |
direct | |
| other_tasks | * OtherTask |
direct | |
| future_use_impacts | * FutureUseImpact |
direct | |
| discouraged_uses | * DiscouragedUse |
direct | |
| intended_uses | * IntendedUse |
Explicit intended and recommended uses for this dataset | direct |
| prohibited_uses | * ProhibitedUse |
Explicitly prohibited or forbidden uses for this dataset | direct |
| distribution_formats | * DistributionFormat |
direct | |
| distribution_dates | * DistributionDate |
direct | |
| license_and_use_terms | 0..1 LicenseAndUseTerms |
direct | |
| ip_restrictions | 0..1 IPRestrictions |
direct | |
| regulatory_restrictions | 0..1 ExportControlRegulatoryRestrictions |
direct | |
| maintainers | * Maintainer |
direct | |
| errata | * Erratum |
direct | |
| updates | 0..1 UpdatePlan |
direct | |
| retention_limit | 0..1 RetentionLimits |
direct | |
| version_access | 0..1 VersionAccess |
direct | |
| extension_mechanism | 0..1 ExtensionMechanism |
direct | |
| variables | * VariableMetadata |
Metadata describing individual variables, fields, or columns in the dataset | direct |
| is_deidentified | 0..1 Deidentification |
direct | |
| is_tabular | 0..1 Boolean |
direct | |
| citation | 0..1 String |
Recommended citation for this dataset in DataCite or BibTeX format | direct |
| parent_datasets | * Dataset |
Parent datasets that this dataset is part of or derived from | direct |
| related_datasets | * DatasetRelationship |
Related datasets with typed relationships (e | direct |
| compression | 0..1 CompressionEnum |
compression format used, if any | Information |
| conforms_to | 0..1 String |
Information | |
| conforms_to_class | 0..1 String |
Information | |
| conforms_to_schema | 0..1 String |
Information | |
| created_by | 0..1 String |
Information | |
| created_on | 0..1 Datetime |
Information | |
| doi | 0..1 String |
digital object identifier | Information |
| download_url | 0..1 Uri |
URL from which the data can be downloaded | Information |
| issued | 0..1 Datetime |
Information | |
| keywords | * String |
Information | |
| language | 0..1 String |
language in which the information is expressed | Information |
| last_updated_on | 0..1 Datetime |
Information | |
| license | 0..1 String |
Information | |
| modified_by | 0..1 String |
Information | |
| page | 0..1 String |
Information | |
| publisher | 0..1 Uriorcurie |
Information | |
| status | 0..1 String |
Information | |
| title | 0..1 String |
the official title of the element | Information |
| version | 0..1 String |
Information | |
| was_derived_from | 0..1 String |
Information | |
| id | 1 Uriorcurie |
A unique identifier for a thing | NamedThing |
| name | 0..1 String |
A human-readable name for a thing | NamedThing |
| description | 0..1 String |
A human-readable description for a thing | NamedThing |
Usages
| used by | used in | type | used |
|---|---|---|---|
| DatasetCollection | resources | range | Dataset |
| Dataset | resources | range | Dataset |
| Dataset | parent_datasets | range | Dataset |
| DataSubset | resources | range | Dataset |
| DataSubset | parent_datasets | range | Dataset |
Aliases
- data resource
- data file
- data package
Identifier and Mapping Information
Schema Source
- from schema: https://w3id.org/bridge2ai/data-sheets-schema
Mappings
| Mapping Type | Mapped Value |
|---|---|
| self | dcat:Distribution |
| native | data_sheets_schema:Dataset |
| exact | schema:DataDownload |
LinkML Source
Direct
name: Dataset
description: A single component of related observations and/or information that can
be read, manipulated, transformed, and otherwise interpreted.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
aliases:
- data resource
- data file
- data package
exact_mappings:
- schema:DataDownload
is_a: Information
slots:
- bytes
- dialect
- encoding
- format
- hash
- md5
- media_type
- path
- sha256
- external_resources
- resources
slot_usage:
external_resources:
name: external_resources
range: ExternalResource
inlined_as_list: true
resources:
name: resources
description: Sub-resources or component datasets that are part of this dataset.
Allows datasets to contain nested resource structures.
inlined_as_list: true
attributes:
purposes:
name: purposes
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: Purpose
multivalued: true
inlined: true
inlined_as_list: true
tasks:
name: tasks
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: Task
multivalued: true
inlined: true
inlined_as_list: true
addressing_gaps:
name: addressing_gaps
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: AddressingGap
multivalued: true
inlined: true
inlined_as_list: true
creators:
name: creators
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: Creator
multivalued: true
inlined: true
inlined_as_list: true
funders:
name: funders
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: FundingMechanism
multivalued: true
inlined: true
inlined_as_list: true
subsets:
name: subsets
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:distribution
rank: 1000
slot_uri: dcat:distribution
domain_of:
- Dataset
range: DataSubset
multivalued: true
inlined: true
inlined_as_list: true
instances:
name: instances
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: Instance
multivalued: true
inlined: true
inlined_as_list: true
anomalies:
name: anomalies
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: DataAnomaly
multivalued: true
inlined: true
inlined_as_list: true
known_biases:
name: known_biases
description: Known biases present in the dataset that may affect fairness, representativeness,
or model performance. Uses BiasTypeEnum for standardized bias categorization
mapped to the AI Ontology (AIO).
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: DatasetBias
multivalued: true
inlined: true
inlined_as_list: true
known_limitations:
name: known_limitations
description: Known limitations of the dataset that may affect its use or interpretation.
Distinct from biases (systematic errors) and anomalies (data quality issues).
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: DatasetLimitation
multivalued: true
inlined: true
inlined_as_list: true
confidential_elements:
name: confidential_elements
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: Confidentiality
multivalued: true
inlined: true
inlined_as_list: true
content_warnings:
name: content_warnings
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: ContentWarning
multivalued: true
inlined: true
inlined_as_list: true
subpopulations:
name: subpopulations
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: Subpopulation
multivalued: true
inlined: true
inlined_as_list: true
sensitive_elements:
name: sensitive_elements
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: SensitiveElement
multivalued: true
inlined: true
inlined_as_list: true
acquisition_methods:
name: acquisition_methods
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: InstanceAcquisition
multivalued: true
inlined: true
inlined_as_list: true
collection_mechanisms:
name: collection_mechanisms
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: CollectionMechanism
multivalued: true
inlined: true
inlined_as_list: true
sampling_strategies:
name: sampling_strategies
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
- Instance
range: SamplingStrategy
multivalued: true
inlined: true
inlined_as_list: true
data_collectors:
name: data_collectors
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: DataCollector
multivalued: true
inlined: true
inlined_as_list: true
collection_timeframes:
name: collection_timeframes
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: CollectionTimeframe
multivalued: true
inlined: true
inlined_as_list: true
missing_data_documentation:
name: missing_data_documentation
description: Documentation of missing data patterns and handling strategies.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: MissingDataDocumentation
multivalued: true
inlined: true
inlined_as_list: true
raw_data_sources:
name: raw_data_sources
description: Description of raw data sources before preprocessing.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: RawDataSource
multivalued: true
inlined: true
inlined_as_list: true
ethical_reviews:
name: ethical_reviews
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: EthicalReview
multivalued: true
inlined: true
inlined_as_list: true
data_protection_impacts:
name: data_protection_impacts
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: DataProtectionImpact
multivalued: true
inlined: true
inlined_as_list: true
human_subject_research:
name: human_subject_research
description: Information about whether dataset involves human subjects research,
including IRB approval, ethics review, and regulatory compliance.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: HumanSubjectResearch
inlined: true
informed_consent:
name: informed_consent
description: Details about informed consent procedures, including consent type,
documentation, and withdrawal mechanisms.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: InformedConsent
multivalued: true
inlined: true
inlined_as_list: true
participant_privacy:
name: participant_privacy
description: Privacy protections and anonymization procedures for human research
participants, including reidentification risk assessment.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: ParticipantPrivacy
multivalued: true
inlined: true
inlined_as_list: true
participant_compensation:
name: participant_compensation
description: Compensation or incentives provided to human research participants.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: HumanSubjectCompensation
inlined: true
vulnerable_populations:
name: vulnerable_populations
description: Information about protections for vulnerable populations (e.g., minors,
pregnant women, prisoners) including special safeguards and assent procedures.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: VulnerablePopulations
inlined: true
preprocessing_strategies:
name: preprocessing_strategies
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: PreprocessingStrategy
multivalued: true
inlined: true
inlined_as_list: true
cleaning_strategies:
name: cleaning_strategies
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: CleaningStrategy
multivalued: true
inlined: true
inlined_as_list: true
labeling_strategies:
name: labeling_strategies
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: LabelingStrategy
multivalued: true
inlined: true
inlined_as_list: true
raw_sources:
name: raw_sources
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: RawData
multivalued: true
inlined: true
inlined_as_list: true
imputation_protocols:
name: imputation_protocols
description: Data imputation methodology and techniques.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: ImputationProtocol
multivalued: true
inlined: true
inlined_as_list: true
annotation_analyses:
name: annotation_analyses
description: Analysis of annotation quality and inter-annotator agreement.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: AnnotationAnalysis
multivalued: true
inlined: true
inlined_as_list: true
machine_annotation_tools:
name: machine_annotation_tools
description: Automated annotation tools used in dataset creation.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: MachineAnnotationTools
multivalued: true
inlined: true
inlined_as_list: true
existing_uses:
name: existing_uses
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: ExistingUse
multivalued: true
inlined: true
inlined_as_list: true
use_repository:
name: use_repository
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: UseRepository
multivalued: true
inlined: true
inlined_as_list: true
other_tasks:
name: other_tasks
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: OtherTask
multivalued: true
inlined: true
inlined_as_list: true
future_use_impacts:
name: future_use_impacts
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: FutureUseImpact
multivalued: true
inlined: true
inlined_as_list: true
discouraged_uses:
name: discouraged_uses
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: DiscouragedUse
multivalued: true
inlined: true
inlined_as_list: true
intended_uses:
name: intended_uses
description: Explicit intended and recommended uses for this dataset. Complements
future_use_impacts by focusing on positive applications.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: IntendedUse
multivalued: true
inlined: true
inlined_as_list: true
prohibited_uses:
name: prohibited_uses
description: Explicitly prohibited or forbidden uses for this dataset. Stronger
than discouraged_uses - these are not permitted.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: ProhibitedUse
multivalued: true
inlined: true
inlined_as_list: true
distribution_formats:
name: distribution_formats
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: DistributionFormat
multivalued: true
inlined: true
inlined_as_list: true
distribution_dates:
name: distribution_dates
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: DistributionDate
multivalued: true
inlined: true
inlined_as_list: true
license_and_use_terms:
name: license_and_use_terms
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: LicenseAndUseTerms
inlined: true
ip_restrictions:
name: ip_restrictions
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: IPRestrictions
inlined: true
regulatory_restrictions:
name: regulatory_restrictions
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
- ExportControlRegulatoryRestrictions
range: ExportControlRegulatoryRestrictions
inlined: true
maintainers:
name: maintainers
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: Maintainer
multivalued: true
inlined: true
inlined_as_list: true
errata:
name: errata
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: Erratum
multivalued: true
inlined: true
inlined_as_list: true
updates:
name: updates
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: UpdatePlan
inlined: true
retention_limit:
name: retention_limit
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: RetentionLimits
inlined: true
version_access:
name: version_access
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: VersionAccess
inlined: true
extension_mechanism:
name: extension_mechanism
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: ExtensionMechanism
inlined: true
variables:
name: variables
description: Metadata describing individual variables, fields, or columns in the
dataset.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:variableMeasured
rank: 1000
slot_uri: schema:variableMeasured
domain_of:
- Dataset
range: VariableMetadata
multivalued: true
inlined: true
inlined_as_list: true
is_deidentified:
name: is_deidentified
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: Deidentification
inlined: true
is_tabular:
name: is_tabular
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: boolean
citation:
name: citation
description: Recommended citation for this dataset in DataCite or BibTeX format.
Provides a standard way to cite the dataset in publications.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:citation
rank: 1000
slot_uri: schema:citation
domain_of:
- Dataset
range: string
parent_datasets:
name: parent_datasets
description: Parent datasets that this dataset is part of or derived from. Enables
hierarchical dataset composition (hasPart/isPartOf relationships).
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:isPartOf
rank: 1000
slot_uri: schema:isPartOf
domain_of:
- Dataset
range: Dataset
multivalued: true
inlined: true
inlined_as_list: true
related_datasets:
name: related_datasets
description: Related datasets with typed relationships (e.g., supplements, derives
from, is version of). Use DatasetRelationship class to specify relationship
types.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
domain_of:
- Dataset
range: DatasetRelationship
multivalued: true
inlined: true
inlined_as_list: true
class_uri: dcat:Distribution
Induced
name: Dataset
description: A single component of related observations and/or information that can
be read, manipulated, transformed, and otherwise interpreted.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
aliases:
- data resource
- data file
- data package
exact_mappings:
- schema:DataDownload
is_a: Information
slot_usage:
external_resources:
name: external_resources
range: ExternalResource
inlined_as_list: true
resources:
name: resources
description: Sub-resources or component datasets that are part of this dataset.
Allows datasets to contain nested resource structures.
inlined_as_list: true
attributes:
purposes:
name: purposes
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: purposes
owner: Dataset
domain_of:
- Dataset
range: Purpose
multivalued: true
inlined: true
inlined_as_list: true
tasks:
name: tasks
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: tasks
owner: Dataset
domain_of:
- Dataset
range: Task
multivalued: true
inlined: true
inlined_as_list: true
addressing_gaps:
name: addressing_gaps
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: addressing_gaps
owner: Dataset
domain_of:
- Dataset
range: AddressingGap
multivalued: true
inlined: true
inlined_as_list: true
creators:
name: creators
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: creators
owner: Dataset
domain_of:
- Dataset
range: Creator
multivalued: true
inlined: true
inlined_as_list: true
funders:
name: funders
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: funders
owner: Dataset
domain_of:
- Dataset
range: FundingMechanism
multivalued: true
inlined: true
inlined_as_list: true
subsets:
name: subsets
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:distribution
rank: 1000
slot_uri: dcat:distribution
alias: subsets
owner: Dataset
domain_of:
- Dataset
range: DataSubset
multivalued: true
inlined: true
inlined_as_list: true
instances:
name: instances
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: instances
owner: Dataset
domain_of:
- Dataset
range: Instance
multivalued: true
inlined: true
inlined_as_list: true
anomalies:
name: anomalies
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: anomalies
owner: Dataset
domain_of:
- Dataset
range: DataAnomaly
multivalued: true
inlined: true
inlined_as_list: true
known_biases:
name: known_biases
description: Known biases present in the dataset that may affect fairness, representativeness,
or model performance. Uses BiasTypeEnum for standardized bias categorization
mapped to the AI Ontology (AIO).
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: known_biases
owner: Dataset
domain_of:
- Dataset
range: DatasetBias
multivalued: true
inlined: true
inlined_as_list: true
known_limitations:
name: known_limitations
description: Known limitations of the dataset that may affect its use or interpretation.
Distinct from biases (systematic errors) and anomalies (data quality issues).
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: known_limitations
owner: Dataset
domain_of:
- Dataset
range: DatasetLimitation
multivalued: true
inlined: true
inlined_as_list: true
confidential_elements:
name: confidential_elements
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: confidential_elements
owner: Dataset
domain_of:
- Dataset
range: Confidentiality
multivalued: true
inlined: true
inlined_as_list: true
content_warnings:
name: content_warnings
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: content_warnings
owner: Dataset
domain_of:
- Dataset
range: ContentWarning
multivalued: true
inlined: true
inlined_as_list: true
subpopulations:
name: subpopulations
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: subpopulations
owner: Dataset
domain_of:
- Dataset
range: Subpopulation
multivalued: true
inlined: true
inlined_as_list: true
sensitive_elements:
name: sensitive_elements
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: sensitive_elements
owner: Dataset
domain_of:
- Dataset
range: SensitiveElement
multivalued: true
inlined: true
inlined_as_list: true
acquisition_methods:
name: acquisition_methods
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: acquisition_methods
owner: Dataset
domain_of:
- Dataset
range: InstanceAcquisition
multivalued: true
inlined: true
inlined_as_list: true
collection_mechanisms:
name: collection_mechanisms
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: collection_mechanisms
owner: Dataset
domain_of:
- Dataset
range: CollectionMechanism
multivalued: true
inlined: true
inlined_as_list: true
sampling_strategies:
name: sampling_strategies
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: sampling_strategies
owner: Dataset
domain_of:
- Dataset
- Instance
range: SamplingStrategy
multivalued: true
inlined: true
inlined_as_list: true
data_collectors:
name: data_collectors
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: data_collectors
owner: Dataset
domain_of:
- Dataset
range: DataCollector
multivalued: true
inlined: true
inlined_as_list: true
collection_timeframes:
name: collection_timeframes
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: collection_timeframes
owner: Dataset
domain_of:
- Dataset
range: CollectionTimeframe
multivalued: true
inlined: true
inlined_as_list: true
missing_data_documentation:
name: missing_data_documentation
description: Documentation of missing data patterns and handling strategies.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: missing_data_documentation
owner: Dataset
domain_of:
- Dataset
range: MissingDataDocumentation
multivalued: true
inlined: true
inlined_as_list: true
raw_data_sources:
name: raw_data_sources
description: Description of raw data sources before preprocessing.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: raw_data_sources
owner: Dataset
domain_of:
- Dataset
range: RawDataSource
multivalued: true
inlined: true
inlined_as_list: true
ethical_reviews:
name: ethical_reviews
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: ethical_reviews
owner: Dataset
domain_of:
- Dataset
range: EthicalReview
multivalued: true
inlined: true
inlined_as_list: true
data_protection_impacts:
name: data_protection_impacts
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: data_protection_impacts
owner: Dataset
domain_of:
- Dataset
range: DataProtectionImpact
multivalued: true
inlined: true
inlined_as_list: true
human_subject_research:
name: human_subject_research
description: Information about whether dataset involves human subjects research,
including IRB approval, ethics review, and regulatory compliance.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: human_subject_research
owner: Dataset
domain_of:
- Dataset
range: HumanSubjectResearch
inlined: true
informed_consent:
name: informed_consent
description: Details about informed consent procedures, including consent type,
documentation, and withdrawal mechanisms.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: informed_consent
owner: Dataset
domain_of:
- Dataset
range: InformedConsent
multivalued: true
inlined: true
inlined_as_list: true
participant_privacy:
name: participant_privacy
description: Privacy protections and anonymization procedures for human research
participants, including reidentification risk assessment.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: participant_privacy
owner: Dataset
domain_of:
- Dataset
range: ParticipantPrivacy
multivalued: true
inlined: true
inlined_as_list: true
participant_compensation:
name: participant_compensation
description: Compensation or incentives provided to human research participants.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: participant_compensation
owner: Dataset
domain_of:
- Dataset
range: HumanSubjectCompensation
inlined: true
vulnerable_populations:
name: vulnerable_populations
description: Information about protections for vulnerable populations (e.g., minors,
pregnant women, prisoners) including special safeguards and assent procedures.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: vulnerable_populations
owner: Dataset
domain_of:
- Dataset
range: VulnerablePopulations
inlined: true
preprocessing_strategies:
name: preprocessing_strategies
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: preprocessing_strategies
owner: Dataset
domain_of:
- Dataset
range: PreprocessingStrategy
multivalued: true
inlined: true
inlined_as_list: true
cleaning_strategies:
name: cleaning_strategies
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: cleaning_strategies
owner: Dataset
domain_of:
- Dataset
range: CleaningStrategy
multivalued: true
inlined: true
inlined_as_list: true
labeling_strategies:
name: labeling_strategies
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: labeling_strategies
owner: Dataset
domain_of:
- Dataset
range: LabelingStrategy
multivalued: true
inlined: true
inlined_as_list: true
raw_sources:
name: raw_sources
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: raw_sources
owner: Dataset
domain_of:
- Dataset
range: RawData
multivalued: true
inlined: true
inlined_as_list: true
imputation_protocols:
name: imputation_protocols
description: Data imputation methodology and techniques.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: imputation_protocols
owner: Dataset
domain_of:
- Dataset
range: ImputationProtocol
multivalued: true
inlined: true
inlined_as_list: true
annotation_analyses:
name: annotation_analyses
description: Analysis of annotation quality and inter-annotator agreement.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: annotation_analyses
owner: Dataset
domain_of:
- Dataset
range: AnnotationAnalysis
multivalued: true
inlined: true
inlined_as_list: true
machine_annotation_tools:
name: machine_annotation_tools
description: Automated annotation tools used in dataset creation.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: machine_annotation_tools
owner: Dataset
domain_of:
- Dataset
range: MachineAnnotationTools
multivalued: true
inlined: true
inlined_as_list: true
existing_uses:
name: existing_uses
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: existing_uses
owner: Dataset
domain_of:
- Dataset
range: ExistingUse
multivalued: true
inlined: true
inlined_as_list: true
use_repository:
name: use_repository
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: use_repository
owner: Dataset
domain_of:
- Dataset
range: UseRepository
multivalued: true
inlined: true
inlined_as_list: true
other_tasks:
name: other_tasks
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: other_tasks
owner: Dataset
domain_of:
- Dataset
range: OtherTask
multivalued: true
inlined: true
inlined_as_list: true
future_use_impacts:
name: future_use_impacts
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: future_use_impacts
owner: Dataset
domain_of:
- Dataset
range: FutureUseImpact
multivalued: true
inlined: true
inlined_as_list: true
discouraged_uses:
name: discouraged_uses
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: discouraged_uses
owner: Dataset
domain_of:
- Dataset
range: DiscouragedUse
multivalued: true
inlined: true
inlined_as_list: true
intended_uses:
name: intended_uses
description: Explicit intended and recommended uses for this dataset. Complements
future_use_impacts by focusing on positive applications.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: intended_uses
owner: Dataset
domain_of:
- Dataset
range: IntendedUse
multivalued: true
inlined: true
inlined_as_list: true
prohibited_uses:
name: prohibited_uses
description: Explicitly prohibited or forbidden uses for this dataset. Stronger
than discouraged_uses - these are not permitted.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: prohibited_uses
owner: Dataset
domain_of:
- Dataset
range: ProhibitedUse
multivalued: true
inlined: true
inlined_as_list: true
distribution_formats:
name: distribution_formats
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: distribution_formats
owner: Dataset
domain_of:
- Dataset
range: DistributionFormat
multivalued: true
inlined: true
inlined_as_list: true
distribution_dates:
name: distribution_dates
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: distribution_dates
owner: Dataset
domain_of:
- Dataset
range: DistributionDate
multivalued: true
inlined: true
inlined_as_list: true
license_and_use_terms:
name: license_and_use_terms
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: license_and_use_terms
owner: Dataset
domain_of:
- Dataset
range: LicenseAndUseTerms
inlined: true
ip_restrictions:
name: ip_restrictions
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: ip_restrictions
owner: Dataset
domain_of:
- Dataset
range: IPRestrictions
inlined: true
regulatory_restrictions:
name: regulatory_restrictions
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: regulatory_restrictions
owner: Dataset
domain_of:
- Dataset
- ExportControlRegulatoryRestrictions
range: ExportControlRegulatoryRestrictions
inlined: true
maintainers:
name: maintainers
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: maintainers
owner: Dataset
domain_of:
- Dataset
range: Maintainer
multivalued: true
inlined: true
inlined_as_list: true
errata:
name: errata
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: errata
owner: Dataset
domain_of:
- Dataset
range: Erratum
multivalued: true
inlined: true
inlined_as_list: true
updates:
name: updates
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: updates
owner: Dataset
domain_of:
- Dataset
range: UpdatePlan
inlined: true
retention_limit:
name: retention_limit
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: retention_limit
owner: Dataset
domain_of:
- Dataset
range: RetentionLimits
inlined: true
version_access:
name: version_access
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: version_access
owner: Dataset
domain_of:
- Dataset
range: VersionAccess
inlined: true
extension_mechanism:
name: extension_mechanism
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: extension_mechanism
owner: Dataset
domain_of:
- Dataset
range: ExtensionMechanism
inlined: true
variables:
name: variables
description: Metadata describing individual variables, fields, or columns in the
dataset.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:variableMeasured
rank: 1000
slot_uri: schema:variableMeasured
alias: variables
owner: Dataset
domain_of:
- Dataset
range: VariableMetadata
multivalued: true
inlined: true
inlined_as_list: true
is_deidentified:
name: is_deidentified
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: is_deidentified
owner: Dataset
domain_of:
- Dataset
range: Deidentification
inlined: true
is_tabular:
name: is_tabular
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: is_tabular
owner: Dataset
domain_of:
- Dataset
range: boolean
citation:
name: citation
description: Recommended citation for this dataset in DataCite or BibTeX format.
Provides a standard way to cite the dataset in publications.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:citation
rank: 1000
slot_uri: schema:citation
alias: citation
owner: Dataset
domain_of:
- Dataset
range: string
parent_datasets:
name: parent_datasets
description: Parent datasets that this dataset is part of or derived from. Enables
hierarchical dataset composition (hasPart/isPartOf relationships).
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:isPartOf
rank: 1000
slot_uri: schema:isPartOf
alias: parent_datasets
owner: Dataset
domain_of:
- Dataset
range: Dataset
multivalued: true
inlined: true
inlined_as_list: true
related_datasets:
name: related_datasets
description: Related datasets with typed relationships (e.g., supplements, derives
from, is version of). Use DatasetRelationship class to specify relationship
types.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: related_datasets
owner: Dataset
domain_of:
- Dataset
range: DatasetRelationship
multivalued: true
inlined: true
inlined_as_list: true
bytes:
name: bytes
description: Size of the data in bytes.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcat:byteSize
alias: bytes
owner: Dataset
domain_of:
- Dataset
range: integer
dialect:
name: dialect
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: dialect
owner: Dataset
domain_of:
- Dataset
range: string
encoding:
name: encoding
description: the character encoding of the data
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcat:mediaType
alias: encoding
owner: Dataset
domain_of:
- Dataset
range: EncodingEnum
format:
name: format
description: The file format, physical medium, or dimensions of a resource. This
should be a file extension or MIME type.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:format
alias: format
owner: Dataset
domain_of:
- Dataset
range: FormatEnum
hash:
name: hash
description: hash of the data
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:identifier
alias: hash
owner: Dataset
domain_of:
- Dataset
range: string
md5:
name: md5
description: md5 hash of the data
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:identifier
alias: md5
owner: Dataset
domain_of:
- Dataset
range: string
media_type:
name: media_type
description: The media type of the data. This should be a MIME type.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:encodingFormat
rank: 1000
slot_uri: dcat:mediaType
alias: media_type
owner: Dataset
domain_of:
- Dataset
range: MediaTypeEnum
path:
name: path
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: schema:contentUrl
alias: path
owner: Dataset
domain_of:
- Dataset
range: string
sha256:
name: sha256
description: sha256 hash of the data
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:identifier
alias: sha256
owner: Dataset
domain_of:
- Dataset
range: string
external_resources:
name: external_resources
description: Links or identifiers for external resources. Can be used either as
a list of ExternalResource objects (in Dataset) or as a list of URL strings
(within ExternalResource class).
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:references
alias: external_resources
owner: Dataset
domain_of:
- Dataset
- ExternalResource
range: ExternalResource
multivalued: true
inlined_as_list: true
resources:
name: resources
description: Sub-resources or component datasets that are part of this dataset.
Allows datasets to contain nested resource structures.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: resources
owner: Dataset
domain_of:
- DatasetCollection
- Dataset
range: Dataset
multivalued: true
inlined_as_list: true
compression:
name: compression
description: compression format used, if any. e.g., gzip, bzip2, zip
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcat:compressFormat
alias: compression
owner: Dataset
domain_of:
- Information
range: CompressionEnum
conforms_to:
name: conforms_to
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:conformsTo
alias: conforms_to
owner: Dataset
domain_of:
- Information
range: string
conforms_to_class:
name: conforms_to_class
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:conformsTo
alias: conforms_to_class
owner: Dataset
domain_of:
- Information
range: string
conforms_to_schema:
name: conforms_to_schema
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:conformsTo
alias: conforms_to_schema
owner: Dataset
domain_of:
- Information
range: string
created_by:
name: created_by
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:creator
alias: created_by
owner: Dataset
domain_of:
- Information
range: string
created_on:
name: created_on
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:created
alias: created_on
owner: Dataset
domain_of:
- Information
range: datetime
doi:
name: doi
description: digital object identifier
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:identifier
alias: doi
owner: Dataset
domain_of:
- Information
range: string
pattern: 10\.\d{4,}\/.+
download_url:
name: download_url
description: URL from which the data can be downloaded. This is not the same as
the landing page, which is a page that describes the dataset. Rather, this URL
points directly to the data itself.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:url
rank: 1000
slot_uri: dcat:downloadURL
alias: download_url
owner: Dataset
domain_of:
- Information
range: uri
issued:
name: issued
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:issued
alias: issued
owner: Dataset
domain_of:
- Information
range: datetime
keywords:
name: keywords
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcat:keyword
alias: keywords
owner: Dataset
domain_of:
- Information
range: string
multivalued: true
language:
name: language
description: language in which the information is expressed
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:inLanguage
rank: 1000
slot_uri: dcterms:language
alias: language
owner: Dataset
domain_of:
- Information
range: string
last_updated_on:
name: last_updated_on
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:modified
alias: last_updated_on
owner: Dataset
domain_of:
- Information
range: datetime
license:
name: license
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:license
alias: license
owner: Dataset
domain_of:
- Software
- Information
range: string
modified_by:
name: modified_by
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:contributor
alias: modified_by
owner: Dataset
domain_of:
- Information
range: string
page:
name: page
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcat:landingPage
alias: page
owner: Dataset
domain_of:
- Information
range: string
publisher:
name: publisher
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:publisher
alias: publisher
owner: Dataset
domain_of:
- Information
range: uriorcurie
status:
name: status
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:type
alias: status
owner: Dataset
domain_of:
- Information
range: string
title:
name: title
description: the official title of the element
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:title
alias: title
owner: Dataset
domain_of:
- Information
range: string
version:
name: version
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:hasVersion
alias: version
owner: Dataset
domain_of:
- Software
- Information
range: string
was_derived_from:
name: was_derived_from
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- dcterms:source
rank: 1000
slot_uri: prov:wasDerivedFrom
alias: was_derived_from
owner: Dataset
domain_of:
- Information
range: string
id:
name: id
description: A unique identifier for a thing.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base
rank: 1000
slot_uri: schema:identifier
identifier: true
alias: id
owner: Dataset
domain_of:
- NamedThing
- DatasetProperty
range: uriorcurie
required: true
name:
name: name
description: A human-readable name for a thing.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base
rank: 1000
slot_uri: schema:name
alias: name
owner: Dataset
domain_of:
- NamedThing
- DatasetProperty
range: string
description:
name: description
description: A human-readable description for a thing.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base
rank: 1000
slot_uri: schema:description
alias: description
owner: Dataset
domain_of:
- NamedThing
- DatasetProperty
- DatasetRelationship
range: string
class_uri: dcat:Distribution