D4D_-_VOICE_PhysioNet_v3 - Data Sheet

Generated using Bridge2AI Data Sheets Schema

This report shows data from a text file following the Bridge2AI Data Sheets Schema.

Schema ID: https://w3id.org/bridge2ai/data-sheets-schema

Collection

DatasetCollection

resources:
[
  {
    "id": "https://doi.org/10.13026/3xt6-rf05",
    "title": "Bridge2AI-Voice: An ethically-sourced, diverse voice dataset linked to health information",
    "description": "Bridge2AI-Voice v2.0 contains data for 19,271 recordings collected from 442 participants across five sites in North America. Participants were selected based on known conditions which manifest within the voice waveform including voice disorders, neurological disorders, mood disorders, and respiratory disorders. The release contains data considered low risk, including derivations such as spectrograms but not the original voice recordings. Detailed demographic, clinical, and validated questionnaire data are also made available.",
    "creators": [
      {
        "principal_investigator": {
          "id": "Yael Bensoussan",
          "name": "Yael Bensoussan"
        },
        "affiliation": ""
      },
      {
        "principal_investigator": {
          "id": "Olivier Elemento",
          "name": "Olivier Elemento"
        },
        "affiliation": ""
      },
      {
        "principal_investigator": {
          "id": "Satrajit Ghosh",
          "name": "Satrajit Ghosh"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Alexandros Sigaras",
          "name": "Alexandros Sigaras"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Anais Rameau",
          "name": "Anais Rameau"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Maria Powell",
          "name": "Maria Powell"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "David Dorr",
          "name": "David Dorr"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Philip Payne",
          "name": "Philip Payne"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Vardit Ravitsky",
          "name": "Vardit Ravitsky"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Jean-Christophe B\u00e9lisle-Pipon",
          "name": "Jean-Christophe B\u00e9lisle-Pipon"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Alistair Johnson",
          "name": "Alistair Johnson"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Ruth Bahr",
          "name": "Ruth Bahr"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Stephanie Watts",
          "name": "Stephanie Watts"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Donald Bolser",
          "name": "Donald Bolser"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Jennifer Siu",
          "name": "Jennifer Siu"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Jordan Lerner-Ellis",
          "name": "Jordan Lerner-Ellis"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Frank Rudzicz",
          "name": "Frank Rudzicz"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Micah Boyer",
          "name": "Micah Boyer"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Samantha Salvi Cruz",
          "name": "Samantha Salvi Cruz"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Yassmeen Abdel-Aty",
          "name": "Yassmeen Abdel-Aty"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Toufeeq Ahmed Syed",
          "name": "Toufeeq Ahmed Syed"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "James Anibal",
          "name": "James Anibal"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Stephen Aradi",
          "name": "Stephen Aradi"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Ana Sophia Martinez",
          "name": "Ana Sophia Martinez"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Shaheen Awan",
          "name": "Shaheen Awan"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Steven Bedrick",
          "name": "Steven Bedrick"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Isaac Bevers",
          "name": "Isaac Bevers"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Rahul Brito",
          "name": "Rahul Brito"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Selina Casalino",
          "name": "Selina Casalino"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "John Costello",
          "name": "John Costello"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Iris De Santiago",
          "name": "Iris De Santiago"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Enrique Diaz-Ocampo",
          "name": "Enrique Diaz-Ocampo"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Mohamed Ebraheem",
          "name": "Mohamed Ebraheem"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Ellie Eiseman",
          "name": "Ellie Eiseman"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Mahmoud Elmahdy",
          "name": "Mahmoud Elmahdy"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Emily Evangelista",
          "name": "Emily Evangelista"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Kenneth Fletcher",
          "name": "Kenneth Fletcher"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Alexander Gelbard",
          "name": "Alexander Gelbard"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Anna Goldenberg",
          "name": "Anna Goldenberg"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Karim Hanna",
          "name": "Karim Hanna"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "William Hersh",
          "name": "William Hersh"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Lochana Jayachandran",
          "name": "Lochana Jayachandran"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Kaley Jenney",
          "name": "Kaley Jenney"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Kathy Jenkins",
          "name": "Kathy Jenkins"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Stacy Jo",
          "name": "Stacy Jo"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Ayush Kalia",
          "name": "Ayush Kalia"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Andrea Krussel",
          "name": "Andrea Krussel"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Elisa Lapadula",
          "name": "Elisa Lapadula"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Chloe Loewith",
          "name": "Chloe Loewith"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Radhika Mahajan",
          "name": "Radhika Mahajan"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Vrishni Maharaj",
          "name": "Vrishni Maharaj"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Siyu Miao",
          "name": "Siyu Miao"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Matthew Mifsud",
          "name": "Matthew Mifsud"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Marian Mikhael",
          "name": "Marian Mikhael"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Elijah Moothedan",
          "name": "Elijah Moothedan"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Yosef Nafii",
          "name": "Yosef Nafii"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Tempestt Neal",
          "name": "Tempestt Neal"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Karlee Newberry",
          "name": "Karlee Newberry"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Evan Ng",
          "name": "Evan Ng"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Christopher Nickel",
          "name": "Christopher Nickel"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Trevor Pharr",
          "name": "Trevor Pharr"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Claire Premi-Bortolotto",
          "name": "Claire Premi-Bortolotto"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "JM Rahman",
          "name": "JM Rahman"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Sarah Rohde",
          "name": "Sarah Rohde"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Laurie Russell",
          "name": "Laurie Russell"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Suketu Shah",
          "name": "Suketu Shah"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Ahmed Shawkat",
          "name": "Ahmed Shawkat"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Elizabeth Silberholz",
          "name": "Elizabeth Silberholz"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Duncan Sutherland",
          "name": "Duncan Sutherland"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Venkata Swarna Mukhi",
          "name": "Venkata Swarna Mukhi"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Jeffrey Tang",
          "name": "Jeffrey Tang"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Jamie Toghranegar",
          "name": "Jamie Toghranegar"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Kimberly Vinson",
          "name": "Kimberly Vinson"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Claire Wilson",
          "name": "Claire Wilson"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Madeleine Zanin",
          "name": "Madeleine Zanin"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Xijie Zeng",
          "name": "Xijie Zeng"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Theresa Zesiewicz",
          "name": "Theresa Zesiewicz"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Robin Zhao",
          "name": "Robin Zhao"
        },
        "affiliation": ""
      },
      {
        "author": {
          "id": "Pantelis Zisimopoulos",
          "name": "Pantelis Zisimopoulos"
        },
        "affiliation": ""
      }
    ],
    "funders": [
      {
        "grantor": {
          "id": "NIH",
          "name": "National Institutes of Health"
        },
        "grant": {
          "id": "3OT2OD032720-01S1",
          "name": "Bridge2AI: Voice as a Biomarker of Health - Building an ethically sourced, bioaccoustic database to understand disease like never before",
          "grant_number": "3OT2OD032720-01S1"
        }
      }
    ],
    "issued": "2025-04-16",
    "version": "2.0.0",
    "keywords": [
      "voice",
      "bridge2ai"
    ],
    "license": "Bridge2AI Voice Registered Access License",
    "doi": "https://doi.org/10.13026/3xt6-rf05",
    "download_url": "",
    "bytes": "",
    "purposes": [
      {
        "response": "The Bridge2AI-Voice project seeks to create an ethically sourced flagship dataset to enable future research in artificial intelligence and support critical insights into the use of voice as a biomarker of health."
      }
    ],
    "instances": [
      {
        "counts": 442,
        "instance_type": "participants",
        "data_topic": "",
        "data_substrate": "",
        "label": "",
        "label_description": "",
        "sampling_strategies": [],
        "missing_information": []
      },
      {
        "counts": 19271,
        "instance_type": "recordings",
        "data_topic": "",
        "data_substrate": "",
        "label": "",
        "label_description": "",
        "sampling_strategies": [],
        "missing_information": []
      }
    ],
    "subpopulations": [
      {
        "subpopulation_elements_present": true,
        "identification": [
          "Disease cohort categories"
        ],
        "distribution": [
          "Voice Disorders, Neurological and Neurodegenerative Disorders, Mood and Psychiatric Disorders, Respiratory disorders. Note: The v2.0.0 dataset does not contain pediatric data and does not contain an equal distribution across categories of diseases."
        ]
      }
    ],
    "collection_timeframes": [
      {
        "description": ""
      }
    ],
    "conforms_to": [],
    "preprocessing_strategies": [
      {
        "description": "Raw audio was preprocessed by converting to monaural and resampling to 16 kHz with a Butterworth anti-aliasing filter. Derived data includes: Spectrograms (short-time FFT), 60 Mel-frequency cepstral coefficients (MFCCs), acoustic features extracted using OpenSMILE, phonetic and prosodic features computed using Parselmouth and Praat, and transcriptions generated using OpenAI's Whisper Large model."
      }
    ],
    "cleaning_strategies": [
      {
        "description": "HIPAA Safe Harbor identifiers were removed. State and province were removed. Country of data collection was retained. Audio records with sensitive information were removed, but their static features were retained. In this release, audio waveforms are omitted."
      }
    ],
    "is_deidentified": {
      "identifiable_elements_present": false,
      "description": [
        "HIPAA Safe Harbor identifiers were removed. State and province were removed. Audio records with sensitive information were removed. Audio waveforms are omitted from this release."
      ]
    },
    "sensitive_elements": [
      {
        "sensitive_elements_present": true,
        "description": [
          "The dataset contains data linked to health conditions including voice disorders, neurological disorders, mood disorders, and respiratory disorders. The raw voice recordings are considered sensitive and are not included in this release but are available in a companion release."
        ]
      }
    ],
    "existing_uses": [],
    "discouraged_uses": [],
    "updates": [
      {
        "description": "b2ai-voice v2.0: This release provides data for an additional 136 new participants. Spectrograms were reprocessed. All spectrograms and Mel-frequency cepstral coefficients from free speech related files have been removed. b2ai-voice v1.1: This release added Mel-frequency cepstral coefficients (MFCCs). b2ai-voice v1.0: This was the first release."
      }
    ],
    "version_access": [
      {
        "description": "Previous versions are available. v1.1 was released Jan. 17, 2025. v1.0 is available at https://doi.org/10.57764/qb6h-em84."
      }
    ],
    "distribution_formats": [
      {
        "description": "spectrograms.parquet, mfcc.parquet, phenotype.tsv, static_features.tsv"
      }
    ],
    "license_and_use_terms": [
      {
        "description": "Access is restricted to credentialed users who sign the 'Bridge2AI Voice Registered Access Agreement' Data Use Agreement. The license is the 'Bridge2AI Voice Registered Access License'."
      }
    ],
    "acquisition_methods": [
      {
        "description": "Data was collected via patient consent from individuals at specialty clinics and institutions. A standardized protocol involved collecting demographic information, health questionnaires, and voice recording tasks.",
        "was_directly_observed": "",
        "was_reported_by_subjects": true,
        "was_inferred_derived": true,
        "was_validated_verified": ""
      }
    ],
    "compression": "",
    "conforms_to_class": "",
    "conforms_to_schema": "",
    "created_by": [],
    "created_on": "",
    "language": "",
    "last_updated_on": "",
    "modified_by": [],
    "page": "",
    "publisher": "PhysioNet",
    "status": "",
    "was_derived_from": "",
    "dialect": "",
    "encoding": "",
    "format": "",
    "hash": "",
    "md5": "",
    "media_type": "",
    "path": "",
    "sha256": "",
    "tasks": [],
    "addressing_gaps": [
      {
        "response": "For voice to emerge as a biomarker of health, there is a pressing need for large, high quality, multi-institutional and diverse voice database linked to other health biomarkers from various data of different modality to fuel voice AI research and answer tangible clinical questions."
      }
    ],
    "subsets": [],
    "anomalies": [],
    "external_resources": [
      {
        "external_resources": [
          "Audio recordings are included on a companion release on PhysioNet with the title 'Bridge2AI-Voice: An ethically-sourced, diverse voice dataset linked to health information (Audio Included)'."
        ],
        "future_guarantees": "",
        "archival": "",
        "restrictions": ""
      }
    ],
    "confidential_elements": [],
    "content_warnings": [],
    "collection_mechanisms": [
      {
        "description": "Data collection was conducted using a custom application on a tablet with a headset used for data collection when possible. Data were exported and converted from RedCap using the b2aiprep open source library."
      }
    ],
    "sampling_strategies": [
      {
        "description": "Patients presenting at specialty clinics and institutions were considered for enrollment. Patients were selected based on membership to five predetermined groups (Respiratory disorders, Voice disorders, Neurological disorders, Mood disorders, Pediatric). This is a purposive sampling strategy."
      }
    ],
    "data_collectors": [],
    "ethical_reviews": [
      {
        "description": "Data collection and sharing was approved by the University of South Florida Institutional Review Board."
      }
    ],
    "data_protection_impacts": [],
    "labeling_strategies": [],
    "raw_sources": [
      {
        "description": "Yes, the raw audio data is available in a companion release on PhysioNet."
      }
    ],
    "use_repository": [],
    "other_tasks": [],
    "future_use_impacts": [],
    "distribution_dates": [],
    "ip_restrictions": "",
    "regulatory_restrictions": "",
    "maintainers": [],
    "errata": [],
    "retention_limit": "",
    "extension_mechanism": "",
    "is_tabular": ""
  }
]

Metadata

DatasetCollection_resources

id: https://doi.org/10.13026/3xt6-rf05
title: Bridge2AI-Voice: An ethically-sourced, diverse voice dataset linked to health information
description: Bridge2AI-Voice v2.0 contains data for 19,271 recordings collected from 442 participants across five sites in North America. Participants were selected based on known conditions which manifest within the voice waveform including voice disorders, neurological disorders, mood disorders, and respiratory disorders. The release contains data considered low risk, including derivations such as spectrograms but not the original voice recordings. Detailed demographic, clinical, and validated questionnaire data are also made available.
creators:
[
  {
    "principal_investigator": {
      "id": "Yael Bensoussan",
      "name": "Yael Bensoussan"
    },
    "affiliation": ""
  },
  {
    "principal_investigator": {
      "id": "Olivier Elemento",
      "name": "Olivier Elemento"
    },
    "affiliation": ""
  },
  {
    "principal_investigator": {
      "id": "Satrajit Ghosh",
      "name": "Satrajit Ghosh"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Alexandros Sigaras",
      "name": "Alexandros Sigaras"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Anais Rameau",
      "name": "Anais Rameau"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Maria Powell",
      "name": "Maria Powell"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "David Dorr",
      "name": "David Dorr"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Philip Payne",
      "name": "Philip Payne"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Vardit Ravitsky",
      "name": "Vardit Ravitsky"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Jean-Christophe B\u00e9lisle-Pipon",
      "name": "Jean-Christophe B\u00e9lisle-Pipon"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Alistair Johnson",
      "name": "Alistair Johnson"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Ruth Bahr",
      "name": "Ruth Bahr"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Stephanie Watts",
      "name": "Stephanie Watts"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Donald Bolser",
      "name": "Donald Bolser"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Jennifer Siu",
      "name": "Jennifer Siu"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Jordan Lerner-Ellis",
      "name": "Jordan Lerner-Ellis"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Frank Rudzicz",
      "name": "Frank Rudzicz"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Micah Boyer",
      "name": "Micah Boyer"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Samantha Salvi Cruz",
      "name": "Samantha Salvi Cruz"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Yassmeen Abdel-Aty",
      "name": "Yassmeen Abdel-Aty"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Toufeeq Ahmed Syed",
      "name": "Toufeeq Ahmed Syed"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "James Anibal",
      "name": "James Anibal"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Stephen Aradi",
      "name": "Stephen Aradi"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Ana Sophia Martinez",
      "name": "Ana Sophia Martinez"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Shaheen Awan",
      "name": "Shaheen Awan"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Steven Bedrick",
      "name": "Steven Bedrick"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Isaac Bevers",
      "name": "Isaac Bevers"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Rahul Brito",
      "name": "Rahul Brito"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Selina Casalino",
      "name": "Selina Casalino"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "John Costello",
      "name": "John Costello"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Iris De Santiago",
      "name": "Iris De Santiago"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Enrique Diaz-Ocampo",
      "name": "Enrique Diaz-Ocampo"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Mohamed Ebraheem",
      "name": "Mohamed Ebraheem"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Ellie Eiseman",
      "name": "Ellie Eiseman"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Mahmoud Elmahdy",
      "name": "Mahmoud Elmahdy"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Emily Evangelista",
      "name": "Emily Evangelista"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Kenneth Fletcher",
      "name": "Kenneth Fletcher"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Alexander Gelbard",
      "name": "Alexander Gelbard"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Anna Goldenberg",
      "name": "Anna Goldenberg"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Karim Hanna",
      "name": "Karim Hanna"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "William Hersh",
      "name": "William Hersh"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Lochana Jayachandran",
      "name": "Lochana Jayachandran"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Kaley Jenney",
      "name": "Kaley Jenney"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Kathy Jenkins",
      "name": "Kathy Jenkins"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Stacy Jo",
      "name": "Stacy Jo"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Ayush Kalia",
      "name": "Ayush Kalia"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Andrea Krussel",
      "name": "Andrea Krussel"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Elisa Lapadula",
      "name": "Elisa Lapadula"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Chloe Loewith",
      "name": "Chloe Loewith"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Radhika Mahajan",
      "name": "Radhika Mahajan"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Vrishni Maharaj",
      "name": "Vrishni Maharaj"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Siyu Miao",
      "name": "Siyu Miao"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Matthew Mifsud",
      "name": "Matthew Mifsud"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Marian Mikhael",
      "name": "Marian Mikhael"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Elijah Moothedan",
      "name": "Elijah Moothedan"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Yosef Nafii",
      "name": "Yosef Nafii"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Tempestt Neal",
      "name": "Tempestt Neal"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Karlee Newberry",
      "name": "Karlee Newberry"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Evan Ng",
      "name": "Evan Ng"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Christopher Nickel",
      "name": "Christopher Nickel"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Trevor Pharr",
      "name": "Trevor Pharr"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Claire Premi-Bortolotto",
      "name": "Claire Premi-Bortolotto"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "JM Rahman",
      "name": "JM Rahman"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Sarah Rohde",
      "name": "Sarah Rohde"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Laurie Russell",
      "name": "Laurie Russell"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Suketu Shah",
      "name": "Suketu Shah"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Ahmed Shawkat",
      "name": "Ahmed Shawkat"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Elizabeth Silberholz",
      "name": "Elizabeth Silberholz"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Duncan Sutherland",
      "name": "Duncan Sutherland"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Venkata Swarna Mukhi",
      "name": "Venkata Swarna Mukhi"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Jeffrey Tang",
      "name": "Jeffrey Tang"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Jamie Toghranegar",
      "name": "Jamie Toghranegar"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Kimberly Vinson",
      "name": "Kimberly Vinson"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Claire Wilson",
      "name": "Claire Wilson"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Madeleine Zanin",
      "name": "Madeleine Zanin"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Xijie Zeng",
      "name": "Xijie Zeng"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Theresa Zesiewicz",
      "name": "Theresa Zesiewicz"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Robin Zhao",
      "name": "Robin Zhao"
    },
    "affiliation": ""
  },
  {
    "author": {
      "id": "Pantelis Zisimopoulos",
      "name": "Pantelis Zisimopoulos"
    },
    "affiliation": ""
  }
]
funders:
[
  {
    "grantor": {
      "id": "NIH",
      "name": "National Institutes of Health"
    },
    "grant": {
      "id": "3OT2OD032720-01S1",
      "name": "Bridge2AI: Voice as a Biomarker of Health - Building an ethically sourced, bioaccoustic database to understand disease like never before",
      "grant_number": "3OT2OD032720-01S1"
    }
  }
]
issued: 2025-04-16
version: 2.0.0
keywords:
[
  "voice",
  "bridge2ai"
]
license: Bridge2AI Voice Registered Access License
doi: https://doi.org/10.13026/3xt6-rf05
download_url:
bytes:
purposes:
[
  {
    "response": "The Bridge2AI-Voice project seeks to create an ethically sourced flagship dataset to enable future research in artificial intelligence and support critical insights into the use of voice as a biomarker of health."
  }
]
instances:
[
  {
    "counts": 442,
    "instance_type": "participants",
    "data_topic": "",
    "data_substrate": "",
    "label": "",
    "label_description": "",
    "sampling_strategies": [],
    "missing_information": []
  },
  {
    "counts": 19271,
    "instance_type": "recordings",
    "data_topic": "",
    "data_substrate": "",
    "label": "",
    "label_description": "",
    "sampling_strategies": [],
    "missing_information": []
  }
]
subpopulations:
[
  {
    "subpopulation_elements_present": true,
    "identification": [
      "Disease cohort categories"
    ],
    "distribution": [
      "Voice Disorders, Neurological and Neurodegenerative Disorders, Mood and Psychiatric Disorders, Respiratory disorders. Note: The v2.0.0 dataset does not contain pediatric data and does not contain an equal distribution across categories of diseases."
    ]
  }
]
collection_timeframes:
[
  {
    "description": ""
  }
]
conforms_to:
[]
preprocessing_strategies:
[
  {
    "description": "Raw audio was preprocessed by converting to monaural and resampling to 16 kHz with a Butterworth anti-aliasing filter. Derived data includes: Spectrograms (short-time FFT), 60 Mel-frequency cepstral coefficients (MFCCs), acoustic features extracted using OpenSMILE, phonetic and prosodic features computed using Parselmouth and Praat, and transcriptions generated using OpenAI's Whisper Large model."
  }
]
cleaning_strategies:
[
  {
    "description": "HIPAA Safe Harbor identifiers were removed. State and province were removed. Country of data collection was retained. Audio records with sensitive information were removed, but their static features were retained. In this release, audio waveforms are omitted."
  }
]
is_deidentified:
{
  "identifiable_elements_present": false,
  "description": [
    "HIPAA Safe Harbor identifiers were removed. State and province were removed. Audio records with sensitive information were removed. Audio waveforms are omitted from this release."
  ]
}
sensitive_elements:
[
  {
    "sensitive_elements_present": true,
    "description": [
      "The dataset contains data linked to health conditions including voice disorders, neurological disorders, mood disorders, and respiratory disorders. The raw voice recordings are considered sensitive and are not included in this release but are available in a companion release."
    ]
  }
]
existing_uses:
[]
discouraged_uses:
[]
updates:
[
  {
    "description": "b2ai-voice v2.0: This release provides data for an additional 136 new participants. Spectrograms were reprocessed. All spectrograms and Mel-frequency cepstral coefficients from free speech related files have been removed. b2ai-voice v1.1: This release added Mel-frequency cepstral coefficients (MFCCs). b2ai-voice v1.0: This was the first release."
  }
]
version_access:
[
  {
    "description": "Previous versions are available. v1.1 was released Jan. 17, 2025. v1.0 is available at https://doi.org/10.57764/qb6h-em84."
  }
]
distribution_formats:
[
  {
    "description": "spectrograms.parquet, mfcc.parquet, phenotype.tsv, static_features.tsv"
  }
]
license_and_use_terms:
[
  {
    "description": "Access is restricted to credentialed users who sign the 'Bridge2AI Voice Registered Access Agreement' Data Use Agreement. The license is the 'Bridge2AI Voice Registered Access License'."
  }
]
acquisition_methods:
[
  {
    "description": "Data was collected via patient consent from individuals at specialty clinics and institutions. A standardized protocol involved collecting demographic information, health questionnaires, and voice recording tasks.",
    "was_directly_observed": "",
    "was_reported_by_subjects": true,
    "was_inferred_derived": true,
    "was_validated_verified": ""
  }
]
compression:
conforms_to_class:
conforms_to_schema:
created_by:
[]
created_on:
language:
last_updated_on:
modified_by:
[]
page:
publisher: PhysioNet
status:
was_derived_from:
dialect:
encoding:
format:
hash:
md5:
media_type:
path:
sha256:
tasks:
[]
addressing_gaps:
[
  {
    "response": "For voice to emerge as a biomarker of health, there is a pressing need for large, high quality, multi-institutional and diverse voice database linked to other health biomarkers from various data of different modality to fuel voice AI research and answer tangible clinical questions."
  }
]
subsets:
[]
anomalies:
[]
external_resources:
[
  {
    "external_resources": [
      "Audio recordings are included on a companion release on PhysioNet with the title 'Bridge2AI-Voice: An ethically-sourced, diverse voice dataset linked to health information (Audio Included)'."
    ],
    "future_guarantees": "",
    "archival": "",
    "restrictions": ""
  }
]
confidential_elements:
[]
content_warnings:
[]
collection_mechanisms:
[
  {
    "description": "Data collection was conducted using a custom application on a tablet with a headset used for data collection when possible. Data were exported and converted from RedCap using the b2aiprep open source library."
  }
]
sampling_strategies:
[
  {
    "description": "Patients presenting at specialty clinics and institutions were considered for enrollment. Patients were selected based on membership to five predetermined groups (Respiratory disorders, Voice disorders, Neurological disorders, Mood disorders, Pediatric). This is a purposive sampling strategy."
  }
]
data_collectors:
[]
ethical_reviews:
[
  {
    "description": "Data collection and sharing was approved by the University of South Florida Institutional Review Board."
  }
]
data_protection_impacts:
[]
labeling_strategies:
[]
raw_sources:
[
  {
    "description": "Yes, the raw audio data is available in a companion release on PhysioNet."
  }
]
use_repository:
[]
other_tasks:
[]
future_use_impacts:
[]
distribution_dates:
[]
ip_restrictions:
regulatory_restrictions:
maintainers:
[]
errata:
[]
retention_limit:
extension_mechanism:
is_tabular:
Generated on: 2025-10-30 10:38:35