diff --git a/.github/workflows/labgraph_audiogen.yml b/.github/workflows/labgraph_audiogen.yml new file mode 100644 index 00000000..57bc4372 --- /dev/null +++ b/.github/workflows/labgraph_audiogen.yml @@ -0,0 +1,31 @@ +name: AudioGen Tests + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: '3.8' + + - name: Install dependencies + run: | + cd extensions/lg_audiogen + python -m pip install --upgrade pip + sudo apt-get install ffmpeg + pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu + pip install --pre xformers + pip install -e . + pip install pytest + + - name: Run tests + run: | + cd extensions/lg_audiogen + pytest -vvv \ No newline at end of file diff --git a/extensions/lg_audiogen/README.md b/extensions/lg_audiogen/README.md new file mode 100644 index 00000000..055ffc48 --- /dev/null +++ b/extensions/lg_audiogen/README.md @@ -0,0 +1,82 @@ +# Audiogen + +Audiogen is a Python command-line tool that uses models from Audiocraft's AudioGen to generate audio from specified descriptions. This tool can generate a single piece of audio based on a specific description, multiple pieces of audio based on a batch file containing multiple descriptions, or based on activities from a string or an `.ics` calendar file. + +## Features + +* Ability to specify duration of the generated audio. +* Ability to generate audio based on a batch file. +* Ability to specify the model to be used for the audio generation. +* Ability to set the output file name. +* Ability to generate audio based on daily activities from a comma-separated string or a `.ics` calendar file. +* Ability to integrate with GPT models to enhance activity descriptions. +* Ability to enable pseudo-deterministic activity prompts +* Ability to specify a date or a range of dates to get events from the `.ics` calendar file. + +## Setup + +Audiocraft needs Python 3.8 or higher to run. If you have a suitable version of Python installed, you can install Audiogen with pip: + +```shell +pip install -e . +``` + +## Usage + +### Command-line interface + +The CLI usage for Audiogen is `lg_audiogen [OPTIONS] [DESCRIPTION]...`. + +### Options + +* `description`: the description based on which the audio is to be generated. +* `duration, -d`: duration of the generated audio, default is 5. +* `model, -m`: name of the Audiocraft AudioGen model to use, default is 'facebook/audiogen-medium'. +* `output, -o`: name of the output file. +* `batch`: file name for batch audio description. +* `activities, -a`: comma-separated string or `.ics` calendar file containing events. +* `gpt`: New: flag to enable GPT model for activities description enhancement. +* `deterministic`: New: flag to enable deterministic generation. +* `dates, -dt`: New: date in the format 'YYYY-MM-DD' or as a range 'YYYY-MM-DD,YYYY-MM-DD'. + +### Example + +To generate an audio file you would use the following command: + +```shell +lg_audiogen -d 5 -m 'facebook/audiogen-medium' -o 'my_output' 'dog barking' + +lg_audiogen 'dog barking' + +lg_audiogen -b 'batch.txt' + +lg_audiogen -a 'meeting with nathan, lunch with friends' -gpt -deterministic + +lg_audiogen -a "calendar.ics" -gpt -dt '2023-11-29,2023-12-01' +``` + +**Note:** for GPT usage, create a `.env` file with the same format as the `sample.env` file provided. + +### Batch File Format + +The batch file should contain one description per line. The descriptions should be in the same format as the descriptions used in the command-line interface. + +Example: + +*batch.txt* +```txt +Natural sounds of a rainforest +Bird Chirping in the background +``` + +### Samples + +[Google Drive Folder](https://drive.google.com/drive/folders/1kdWB1CBog4NGVJ7jWddKLtBAuPm3gwDq?usp=drive_link) + +## O.S Support + +```Tested on Ubuntu 22.04 (Jammy) LTS``` + +## Error Handling + +If the batch file is not found, a notable error message will be presented. Moreover, if a description is not provided when not using a batch file, a misusage error will be raised. diff --git a/extensions/lg_audiogen/lg_audiogen/__init__.py b/extensions/lg_audiogen/lg_audiogen/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/extensions/lg_audiogen/lg_audiogen/calendar_reader.py b/extensions/lg_audiogen/lg_audiogen/calendar_reader.py new file mode 100644 index 00000000..fe00f909 --- /dev/null +++ b/extensions/lg_audiogen/lg_audiogen/calendar_reader.py @@ -0,0 +1,145 @@ +from icalendar import Calendar +from datetime import datetime, date, timedelta, timezone +from dateutil.rrule import rrulestr + +MIN_YEAR = datetime.now().year +MAX_YEAR = MIN_YEAR + +def is_within_limit(dt): + """ + Checks if the datetime is within the limit. + + @param dt: The datetime to check. + + @return: True if the datetime is within the limit, False otherwise. + """ + return MIN_YEAR <= dt.year <= MAX_YEAR + +def convert_to_utc(dt): + """ + Converts a datetime with timezone info to UTC. + + @param dt: The datetime to convert. + + @return: The datetime converted to UTC. + """ + if isinstance(dt, datetime) and dt.tzinfo is not None and dt.tzinfo.utcoffset(dt) is not None: + # Convert offset-aware datetime to UTC + return dt.astimezone(timezone.utc) + return dt + +def datetime_to_timestamp(dt): + """ + Converts a datetime or date to a timestamp. + + @param dt: The datetime or date to convert. + + @return: The timestamp. + """ + if isinstance(dt, datetime): + return dt.timestamp() + elif isinstance(dt, date): + return datetime.combine(dt, datetime.min.time(), tzinfo=timezone.utc).timestamp() + raise TypeError("Expected datetime.datetime or datetime.date") + +def populate_events(start_dt, calendar_events, summary, duration): + """ + Populates the calendar_events dictionary with the events. + + @param start_dt: The start datetime. + @param calendar_events: The dictionary of events. + @param summary: The title/summary of the event. + @param duration: The duration of the event. + + @return: 1 if the event was added, 0 otherwise. + """ + if not is_within_limit(start_dt): + return 0 + + # Ensure dt is converted to UTC if it's a datetime with timezone info. + utc_start_dt = convert_to_utc(start_dt) + # Create timestamp from datetime or date (for sorting later) + timestamp = datetime_to_timestamp(utc_start_dt) + + dt_str = start_dt.strftime('%Y-%m-%d') if isinstance(start_dt, date) \ + else utc_start_dt.strftime('%Y-%m-%d') + + if dt_str not in calendar_events: + calendar_events[dt_str] = [] + + event = {'name': summary, 'duration': duration, 'ts': timestamp} + calendar_events[dt_str].append(event) + return 1 + +def populate_recurring_events(component, start_dt, calendar_events, summary, duration): + """ + Populates the calendar_events dictionary with the recurring events. + + @param component: The component to populate the events from. + @param start_dt: The start datetime. + @param calendar_events: The dictionary of events. + @param summary: The title/summary of the event. + @param duration: The duration of the event. + """ + # rr will give us a generator + rr = rrulestr(component.get('rrule').to_ical().decode('utf-8'), dtstart=start_dt) + for dt in rr: + if populate_events(dt, calendar_events, summary, duration) == 0: + return # short circuit if we're out of the range + + +def calendar_to_dictionary(filepath): + """ + Given a filepath to a calendar file, returns a dictionary of events. + + @param filepath: The filepath to the calendar file. + + @return: A dictionary of events from the .ics file. + """ + # Read the user's calendar file and parse it into an icalendar object + with open(filepath, 'r', encoding='utf-8') as f: + gcal = Calendar.from_ical(f.read()) + + # holds data in the format {'2023-11-06': [Event]} of the user's calendar + calendar_events = {} + + for component in gcal.walk(): + if component.name == "VEVENT": + # Extract information about the event + summary = str(component.get('summary')) + start_dt = component.get('dtstart').dt + end_dt = component.get('dtend').dt + duration = int((end_dt - start_dt).total_seconds() / 60) # duration in minutes + + # rrule Builds up the missing events that are defined by the recurring rules + # Ex: Meetings that happen every M, W, F + if 'rrule' in component: + populate_recurring_events(component, start_dt, calendar_events, summary, duration) + else: + populate_events(start_dt, calendar_events, summary, duration) + + return calendar_events + +def get_events_between_dates(calendar_events, start_date_str, end_date_str): + """ + Given a dictionary of events, returns the events between two dates [start_date, end_date]. + + @param calendar_events: The dictionary of events. + @param start_date_str: The start date. + @param end_date_str: The end date. + + @return: The events between the two dates. + """ + # Assumes start_date_str and end_date_str are in YYYY-MM-DD format and start_date <= end_date + start_date = datetime.strptime(start_date_str, '%Y-%m-%d').date() + end_date = datetime.strptime(end_date_str, '%Y-%m-%d').date() + + events_between_dates = {} + current_date = start_date + while current_date <= end_date: + date_str = current_date.strftime('%Y-%m-%d') + if date_str in calendar_events: + # Sort events for the current date by timestamp key 'ts' in ascending order + events_between_dates[date_str] = sorted(calendar_events[date_str], key=lambda event: event['ts']) + current_date += timedelta(days=1) + return events_between_dates \ No newline at end of file diff --git a/extensions/lg_audiogen/lg_audiogen/gpt_utility.py b/extensions/lg_audiogen/lg_audiogen/gpt_utility.py new file mode 100644 index 00000000..6f61d431 --- /dev/null +++ b/extensions/lg_audiogen/lg_audiogen/gpt_utility.py @@ -0,0 +1,71 @@ +import os +import json +from openai import OpenAI +from dotenv import load_dotenv +load_dotenv() + +client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) + +def query_gpt(event_list, deterministic=False): + """ + Queries GPT-3.5 to generate a response based on the given event list. + + @param event_list: The list of events to be used as input. + @param deterministic: Flag indicating whether to use deterministic mode for GPT response generation. + + @return: The response generated by GPT-3.5 as a list of strings. + """ + response = client.chat.completions.create( + model="gpt-3.5-turbo-1106", + messages=[ + { + "role": "system", + "content": "Creative assistant in generating sound prompts from a given list of events. Outputs a json object of sounds. Size of the output should be the same as the input" + }, + { + "role": "user", + "content": "[\"Commute to work\", \"Walk by the beach\"]" + }, + { + "role": "assistant", + "content": "{sounds: [\"Cars honking in traffic\", \"Footsteps tapping on the sand with waves in the background\"]}" + }, + { + "role": "user", + "content": "[\"Virtual Meeting with Nathan\", \"Beer and Chips with Friends\"]" + }, + { + "role": "assistant", + "content": "{sounds: [\"Keyboard typing and mouse clicks\", \"Laughter and the clinking of glasses, crunching of chips\"]}" + }, + { + "role": "user", + "content": "[\"Meeting with Joe\"]" + }, + { + "role": "assistant", + "content": "{sounds: [\"Keyboard typing and mouse clicks with chatter in the background\"]}" + }, + { + "role": "user", + "content": "[\"'23.FAL.B.1 Pod Meeting - MLH Fellowship\", \"Oscar Mier and Nathan Kurelo Wilk\", \"Monday MS FinTech Classes\", \"Tuesday MS FinTech Classes\", \"23.FAL.B.1 Pod Meeting - MLH Fellowship\", \"Wednesday MS FinTech Classes\"]" + }, + { + "role": "assistant", + "content": "{sounds: [\"Mic feedback, low murmur of voices discussing on a conference call\",\"Ambient room noise\",\"Turning pages, lecturer speaking faintly in the background\",\"Turning pages, lecturer speaking faintly in the background\",\"Mic feedback, low murmur of voices discussing on a conference call\",\"Turning pages, lecturer speaking faintly in the background\"]}" + }, + { + "role": "user", + "content": json.dumps(event_list) + } + ], + temperature=0 if deterministic else 1, + max_tokens=1101, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + response_format={ "type": "json_object" } + ) + response = json.loads(response.choices[0].message.content).get("sounds") + print("GPT Response", response) + return response \ No newline at end of file diff --git a/extensions/lg_audiogen/lg_audiogen/keyword_generator.py b/extensions/lg_audiogen/lg_audiogen/keyword_generator.py new file mode 100644 index 00000000..94cfc1ad --- /dev/null +++ b/extensions/lg_audiogen/lg_audiogen/keyword_generator.py @@ -0,0 +1,50 @@ +import os +import json +import random + +# This is the default keyword dictionary. It is a JSON file that maps keywords to prompts +# The CLI will allow the user to input his own dictionary of keywords +THIS_DIR = os.path.dirname(os.path.abspath(__file__)) +KEYWORD_DICT = "/static_inputs/prompt_keywords.json" + +# SEED for Deterministic Randomness +DEFAULT_SEED = 42 + +# First Try to load KEYWORD_DICT, if it doesn't work, try with THIS_DIR + KEYWORD_DICT +try: + PROMPT_KEYWORDS = json.load(open(KEYWORD_DICT)) +except FileNotFoundError: + PROMPT_KEYWORDS = json.load(open(THIS_DIR + KEYWORD_DICT)) +except: + raise Exception("Could not load keyword dictionary. Please check that the file exists.") + +# for each word in the event name, check if it matches a keyword +# if it does, add one of the random prompt to the list to return +# deterministic=True will make the random choice deterministic +def get_prompts(event_names, deterministic=False): + """ + Creates a prompt for each event name by matching keywords + in the event name to prompts in the keyword dictionary. + + @param event_names: A list of event names + @param deterministic: A boolean to make the random choice deterministic + @return: A list of prompts for each event name + """ + if PROMPT_KEYWORDS and len(PROMPT_KEYWORDS) == 0: + raise Exception("Keyword dictionary is empty. Please check that the file is not empty.") + full_prompt = [] + for event in event_names: + event_name = event.lower() + prompt = [] + random.seed(DEFAULT_SEED if deterministic else None) + for word in event.split(): + if word in PROMPT_KEYWORDS: + prompt.append(random.choice(PROMPT_KEYWORDS[word])) + if len(prompt) > 1: + prompt = ' combined with '.join(prompt) + full_prompt.append(prompt) + elif len(prompt) == 1: + full_prompt.append(prompt[0]) + else: + full_prompt.append(event_name) # if no prompt is found, just use the event name + return full_prompt \ No newline at end of file diff --git a/extensions/lg_audiogen/lg_audiogen/main.py b/extensions/lg_audiogen/lg_audiogen/main.py new file mode 100644 index 00000000..b563a587 --- /dev/null +++ b/extensions/lg_audiogen/lg_audiogen/main.py @@ -0,0 +1,117 @@ +import click +import torch +import datetime +from audiocraft.models import AudioGen +from audiocraft.data.audio import audio_write +from lg_audiogen.calendar_reader import calendar_to_dictionary, get_events_between_dates +from lg_audiogen.gpt_utility import query_gpt +from lg_audiogen.keyword_generator import get_prompts + +DEFAULT_AUDIOGEN_MODEL = 'facebook/audiogen-medium' +DEFAULT_AUDIO_DURATION = 5 +DEFAULT_DATE = datetime.datetime.now().strftime('%Y-%m-%d') + +@click.command() +@click.argument('description', nargs=-1, required=False) +@click.option('--duration', '-d', default=DEFAULT_AUDIO_DURATION, help='Duration of the generated audio.') +@click.option('--model', '-m', default=DEFAULT_AUDIOGEN_MODEL, help='Name of the Audiocraft AudioGen model to use.') +@click.option('--output', '-o', help='Name of the output file.') +@click.option('--batch', '-b', type=click.Path(), help='File name for batch audio description.') +@click.option('--activities', '-a', help='Comma separated string or .ics file containing activities.') +@click.option('--gpt', is_flag=True, help='Enable GPT model for activities.') +@click.option('--deterministic', is_flag=True, help='Enable deterministic generation.') +@click.option('--dates', '-dt', default=DEFAULT_DATE, help='Date in the format \'YYYY-MM-DD\' or as a range: \'YYYY-MM-DD,YYYY-MM-DD\'.') +def parse_arguments(description, duration, model, output, batch, activities, gpt, deterministic, dates): + """ + Generates audio from description using Audiocraft's AudioGen. + """ + if activities: + descriptions, output = handle_activities(activities, gpt, deterministic, dates) + elif batch: + try: + with open(batch, mode='r', encoding='utf-8') as f: + descriptions = [line.strip() for line in f.readlines()] + except FileNotFoundError: + print(f"File {batch} not found. Please check the file path and try again.") + else: + if not description: + raise click.BadParameter("Description argument is required when not using --batch.") + descriptions = [' '.join(description)] + run_audio_generation(descriptions, duration, model, output) + +def check_dates_format(dates): + """ + Checks if the dates are in the correct format. + + @param dates: The dates to be checked. If a string is provided, it will be split by commas. + + @return: A list of dates. + """ + dates = dates.split(',') + if len(dates) > 2: + raise click.BadParameter("Dates must be in the format \'YYYY-MM-DD\' or as a range: \'YYYY-MM-DD,YYYY-MM-DD\'.") + for date in dates: + try: + datetime.datetime.strptime(date, '%Y-%m-%d') + except ValueError: + raise click.BadParameter("Dates must be in the format \'YYYY-MM-DD\' or as a range: \'YYYY-MM-DD,YYYY-MM-DD\'.") + return dates + +def handle_activities(activities, gpt, deterministic, dates): + """ + Handles the activities based on the given parameters. + + @param activities: The activities to be handled. If a string is provided, it will be split by commas. + @param gpt: Flag indicating whether to use GPT for generating response. + @param deterministic: Flag indicating whether to use deterministic mode for GPT response generation. + @param dates: The dates to filter the activities. If a string is provided, it should be in the format 'YYYY-MM-DD'. + + @return: A tuple containing the response generated and the list of activities. + """ + if activities.endswith('.ics'): + dates = check_dates_format(dates) + calendar_events = calendar_to_dictionary(activities) + # -1 trick to get the last element of the list (end date or single date) + sorted_events = get_events_between_dates(calendar_events, dates[0], dates[-1]) + # build a list of event name strings if event has a name + activities = [] + for each_date in sorted_events: + for each_event in sorted_events[each_date]: + if each_event['name']: + activities.append(each_event['name']) + else: + activities = activities.split(',') + if gpt: + response = query_gpt(activities, deterministic) + else: + response = get_prompts(activities, deterministic) + activities = [activity.replace(' ', '_') for activity in activities] + return response, activities + +def run_audio_generation(descriptions, duration, model_name, output): + """ + Load Audiocraft's AudioGen model and generate audio from the description. + + @param descriptions: The parsed arguments. + @param duration: Duration of the generated audio. + @param model_name: Name of the Audiocraft AudioGen model to use. + @param output: Name of the output file. + """ + print(f"Running lg_audiogen with descriptions: {descriptions}") + + # Load Audiocraft's AudioGen model and set generation params. + model = AudioGen.get_pretrained(model_name) + model.set_generation_params(duration=duration) + + # Generate audio from the descriptions + wav = model.generate(descriptions) + batch_output = output if type(output) == str else '' + # Save the generated audios. + for idx, one_wav in enumerate(wav): + # Will save under {output}{idx}.wav, with loudness normalization at -14 db LUFS. + if not output: + batch_output = descriptions[idx].replace(' ', '_') + if type(output) == list and len(output) == len(descriptions): + batch_output = output[idx] + audio_write(f'{batch_output}{idx}', one_wav.cpu(), + model.sample_rate, strategy="loudness", loudness_compressor=True) diff --git a/extensions/lg_audiogen/lg_audiogen/sample.env b/extensions/lg_audiogen/lg_audiogen/sample.env new file mode 100644 index 00000000..f81c4a84 --- /dev/null +++ b/extensions/lg_audiogen/lg_audiogen/sample.env @@ -0,0 +1 @@ +OPENAI_API_KEY = "" \ No newline at end of file diff --git a/extensions/lg_audiogen/lg_audiogen/static_inputs/prompt_keywords.json b/extensions/lg_audiogen/lg_audiogen/static_inputs/prompt_keywords.json new file mode 100644 index 00000000..b6399823 --- /dev/null +++ b/extensions/lg_audiogen/lg_audiogen/static_inputs/prompt_keywords.json @@ -0,0 +1,532 @@ +{ + "meeting": [ + "Low hum of projector", + "Shuffling of papers and notebooks", + "Soft taps of laptop keyboards", + "Muffled voices through conference room walls", + "Clear voice of a presenter", + "Opening and closing of a meeting room door", + "Quiet whispers as side conversations", + "Scribbling of pens on flipcharts" + ], + "classes": [ + "Lecture voice echoing in a hall", + "Turning of textbook pages", + "Scratching of pens on paper during exams", + "Bubbling of a chemistry lab experiment", + "Clicks of a calculator", + "Debate and discussion amongst students", + "Laughter and chatter during break", + "Sound of a school bell indicating end of class" + ], + "swimming": [ + "Splashes of water as swimmers dive in", + "Distant whistle of the lifeguard", + "Rhythmic movements in the water", + "Gurgling of water in the pool", + "Echoes of playful shouts in the swimming area", + "Snapping of swim caps", + "Flapping of flip-flops on wet tiles", + "Water cascading from a swimmer's body" + ], + "lunch": [ + "Rustling of snack wrappers", + "Pop of a soda can opening", + "Biting into a crunchy apple", + "Laughter and chatter in the cafeteria", + "Ding of a microwave", + "Silverware clinking in a salad bowl", + "Soup slurping", + "Vending machine dispensing a snack" + ], + "yoga": [ + "Soft instrumental music", + "Deep inhaling and exhaling", + "Stretching and rolling out a yoga mat", + "Quiet voice of instructor guiding the practice", + "Gentle taps of feet on the floor", + "Rustling of comfortable clothing", + "Sound of wind chimes from an open window", + "Peaceful silence held in meditation" + ], + "grocery": [ + "Beep of the scanner at checkout", + "Crinkle of plastic grocery bags", + "Background music of the store", + "Wheels of a shopping cart rolling down aisles", + "Conversations about price checks", + "Thud of produce being weighed", + "Intercom announcements for sales", + "Refrigeration units humming in the frozen section" + ], + "gardening": [ + "Snipping of pruning shears", + "Swoosh of watering from a hose", + "Rustle of plants and leaves", + "Thud of a shovel digging into earth", + "Chirping of birds nearby", + "Creaking of a wheelbarrow", + "Buzzing of bees at work", + "Patting of soil around a newly planted flower" + ], + "birthday": [ + "Laughter and singing of Happy Birthday", + "Pop of balloons being inflated", + "Rustle of gift wrap", + "Crackling of candles being lit", + "Cheers and clapping after blowing out candles", + "Tinkle of ice cubes in party drinks", + "Crunch of chips and snacks", + "Thumping bass of celebratory music" + ], + "concert": [ + "Roar of the crowd cheering", + "Amplified strumming of guitars", + "Powerful voice of the lead singer", + "Thudding bass vibrating the floor", + "Drum solos that resonate through the arena", + "Synthesizers adding electronic harmonies", + "Clapping along to the rhythm of the song", + "Encore chants filling the venue" + ], + "study": [ + "Turning pages of a heavy textbook", + "Click-clack of a keyboard in a quiet room", + "Sighs of concentration and occasional frustration", + "Sipping of coffee or tea while pondering", + "Timer beeping at the end of a study session", + "Whispered readings of notes", + "Highlighters scribbling on key points", + "Silent ambiance of a library" + ], + "hiking": [ + "Leaves crunching underfoot on the trail", + "Bird calls echoing through the trees", + "Streams trickling over rocks", + "Branches snapping as they are moved aside", + "Heavy breathing from the ascent", + "Wind whistling past mountain peaks", + "Zippers of backpacks being opened and closed", + "Distant thunder suggesting an approaching storm" + ], + "travel": [ + "Airport announcements over the PA system", + "Luggage wheels rolling across the floor", + "Language chatter from international travelers", + "Airplane engine starting up", + "Train horn blaring as it arrives", + "Distant sounds of a city new to the traveler", + "Maps being unfolded and studied", + "Camera shutter clicking capturing memories" + ], + "cooking": [ + "Chopping vegetables on a cutting board", + "Grease popping from a hot pan", + "Timer dinging when the oven is preheated", + "Blender whirring while making sauces", + "Cork squeaking out of a wine bottle", + "Faucet running to wash produce", + "Tin foil tearing for covering dishes", + "Spices being shaken into a pot" + ], + "festival": [ + "Carnival music playing in the distance", + "Rides whirring and creaking", + "Children laughing and shouting with joy", + "Food vendors calling out specials", + "Balloons rubbing together and squeaking", + "Fireworks crackling and booming overhead", + "Drum beats from a live band", + "Crowds milling and conversing" + ], + "meditation": [ + "Chime ringing to start the session", + "Gentle bubbling of a water feature", + "Soft rustle of meditation cushions", + "Steady breathing focused and deep", + "Whispers of mantras being repeated", + "Wind rustling leaves outside", + "Silence enveloping the room", + "Faint ting of a singing bowl" + ], + "party": [ + "Upbeat music flooding the room", + "Laughter and lively conversations", + "Clinking of glasses in toasts", + "Heels clicking on the dance floor", + "Bottles opening and pouring drinks", + "Chips dipping into salsa", + "Balloons popping unexpectedly", + "Games and competitions stirring up excitement" + ], + "virtual": [ + "Keyboard tapping during an online chat", + "Mouse clicking navigating a digital interface", + "Notification pings from incoming messages", + "Microphone catching intermittent sounds of breathing", + "Silent pauses during a video call", + "Earphones rubbing against fabric", + "Computer fan whirring quietly", + "Echo of a voice due to a bad connection" + ], + "clubbing": [ + "Bass reverberating through the floor", + "Shouts and cheers to a DJ's beat drop", + "Glasses clinking at the bar", + "Heels stomping rhythmically on the dance floor", + "Securing of wristbands at the entrance", + "Stamps being pressed on hands", + "Doors opening and closing in VIP sections", + "Whistles and yells from the crowd" + ], + "stay": [ + "Keys jingling as they unlock a hotel room", + "Luggage being rolled across a lobby", + "Pages of a book flipping in a quiet space", + "Intermittent sips of a hot beverage", + "Air conditioning starting up in the background", + "Curtains sliding on their rods", + "Remote control clicking as the television turns on", + "Faucet running water into a bath" + ], + "wedding": [ + "Applause as the couple makes their entrance", + "Rice being thrown during the exit", + "Pop of a champagne cork during toasts", + "Tears being dabbed with tissues", + "Chairs scraping as guests move to dance", + "Camera shutters during the photo session", + "Zippers of dresses and the adjusting of suits", + "Laughter from shared memories and jokes" + ], + "brunch": [ + "Froth hissing from an espresso machine", + "Sizzling from breakfast items on a grill", + "Juice being poured into glasses", + "Silverware tinking against mugs for attention", + "Eggshells cracking into a hot pan", + "Baskets of pastries being unwrapped", + "Spoon clinking inside a mixing bowl", + "Toast popping up from toasters" + ], + "expo": [ + "Crowds murmuring at convention center", + "Scanner beeping over tickets and badges", + "Stalls being assembled and disassembled", + "Enthusiasts discussing over exhibited items", + "Presenters speaking through microphones", + "Demo products being tested and tapped", + "Rolling of promotional banners and signs", + "Carts carrying supplies bumping on the floor" + ], + "flight": [ + "Cabin crew walking in aisles", + "Seatbelt clicking shut", + "Overhead compartments being opened and closed", + "Pilot announcements over the intercom", + "Beverage carts rattling down the aisle", + "Aircraft accelerating on the runway", + "Air vents being adjusted", + "Soft snores of a sleeping passenger" + ], + "interview": [ + "Leather seats creaking in an office", + "Handshakes exchanging between participants", + "Writing on a notepad", + "Clothes rustling as candidates adjust in their seats", + "Doors opening and closing as people enter and leave", + "Coughs and clears of the throat nervously", + "Resume papers being shuffled and examined", + "Clock ticking, marking time passing" + ], + "seminar": [ + "Audience applause after a speaker's point", + "Attendees murmuring in agreement or curiosity", + "Microphone feedback before adjustments", + "Presenters shuffling cue cards", + "Laser pointer clicking and pointing at slides", + "Markers squeaking on a whiteboard", + "Chairs sliding as participants lean forward", + "Water bottles being opened during breaks" + ], + "picnic": [ + "Laughter and chatter in open fields", + "Cooler lids closing over ice and refreshments", + "Plastic utensils tapping on paper plates", + "Crunch of biting into fresh fruit", + "Kites whooshing in the wind", + "Dogs barking and playing", + "Blankets flapping before settling on the ground", + "Ice melting and clinking in sun-warmed glasses" + ], + "massage": [ + "Ointment tubes unscrewing and squeezing", + "Gentle tapping of a masseuse's hands", + "Quiet whir of a massage chair", + "Towels being unfolded and draped", + "Oils being warmed and swirled in burners", + "Low sighs of relaxation from clients", + "Pillows being plumped", + "Water trickling from a zen fountain" + ], + "laundry": [ + "Washing machine's rhythmic tumbling", + "Dryer sheets being pulled apart", + "Zippers clanking against the drum", + "Liquid detergent glugging into a cup", + "Clothes being folded on a hard surface", + "Iron hissing steam on fabric", + "Buttons rattling in the spin cycle", + "Hangers clicking as they're hung on a rod" + ], + "presentation": [ + "Slide changes with a soft beep", + "Pointer tapping on a table for emphasis", + "Audience taking notes in hushed tones", + "Cables being connected for multimedia", + "Footsteps as the presenter paces", + "Questions being asked after the talk", + "Hands clapping at the presentation's conclusion", + "Breath being drawn in to calm nerves" + ], + "camping": [ + "Tent fabric flapping in the breeze", + "Zipper of a sleeping bag joining", + "Wood being chopped for a fire", + "Marshmallows crackling in the flame", + "Tree branches snapping underfoot", + "Animal calls at night", + "Streams flowing gently in the background", + "Sleepy yawns in the crisp morning air" + ], + "errands": [ + "Shopping list paper unfolding", + "Car keys jangling and igniting the engine", + "Self-checkout register giving instructions", + "Shopping carts being retrieved and returned", + "Automatic doors whooshing at entrances", + "Pen scribbling to check off items", + "Coins clinking as they're handed to cashiers", + "Plastic bags rustling with purchased goods" + ], + "tailgate": [ + "Grill igniting with a woosh", + "Cans being opened with a fizz", + "Fans cheering for their team", + "Spatula scraping on a griddle", + "Football being caught and thrown", + "Coolers sliding open on truck beds", + "Team chants being bellowed out", + "Foldable chairs snapping into place" + ], + "demo": [ + "Exclamations from onlookers at a surprising feature", + "Machinery whirring during a product test", + "Products being handled and tapped", + "Questions being asked by potential customers", + "Plastic protective film peeling away", + "Samples being distributed", + "Applause at the end of a successful demonstration", + "Pens scribbling notes or feedback" + ], + "class": [ + "Students brainstorming in small groups", + "Pencils filling in multiple-choice bubbles", + "Footsteps of an instructor walking around", + "Backpacks zipping as classes end", + "Chairs screeching slightly against the floor", + "Projector fan blowing softly during a lecture", + "Desks being arranged for a group activity", + "Excited chatter as a classmate answers correctly" + ], + "break": [ + "Steam hissing from a relaxing tea kettle", + "Keyboard keys being left idle", + "Contented sighs of stretching", + "Snack packages crinkling as they are opened", + "Laughter and banter in the break room", + "Clock ticking, counting down the break time", + "Sneakers scuffing on the floor during a quick walk", + "Soda fizzing as it's poured into a cup" + ], + "cleanup": [ + "Brooms sweeping over a floor", + "Trash bags rustling as they're tied up", + "Spray bottles misting cleaners", + "Bins opening and closing for recycling", + "Rags swooshing over surfaces", + "Vacuums humming as they pick up debris", + "Windows squeaking from being wiped down", + "Objects being reorganized on a shelf" + ], + "repair": [ + "Tools clanking in a toolbox", + "Drill whirring into wood or metal", + "Hammer pounding nails into place", + "Screws rattling in a jar", + "Parts being jostled as they're replaced", + "Tape being ripped and applied", + "Objects being tested after fixing", + "Sigh of satisfaction after a job well done" + ], + "nap": [ + "Cloth rustling as one settles in", + "Fan whirling softly in the background", + "Curtains drawing to dim the room", + "Gentle snoring or breathing", + "Clock softly ticking away the minutes", + "Bed springs creaking slightly with movement", + "Soft murmur of daytime ambience", + "Alarm beeping to awaken from the rest" + ], + "read": [ + "Pages turning slowly and deliberately", + "Occasional chuckles or gasps at a plot twist", + "Fingers tapping a rhythm on a book spine", + "Bookmarks sliding into place", + "Glasses being adjusted on the nose", + "Armchair creaking with each shift", + "Throat clearing before continuing to read aloud", + "Sudden silence as concentration deepens" + ], + "game": [ + "Die clattering on a board", + "Cards being shuffled and dealt", + "Pieces being moved across the game board", + "Timer ticking down during a speed round", + "Cheering after a winning play", + "Groans of defeat or frustration", + "Strategic whispering among teammates", + "Chips being stacked or collected" + ], + "cook": [ + "Knife dicing ingredients quickly", + "Dough being kneaded on a countertop", + "Pot lid rattling from steam", + "Vegetables being stirred in a sizzling pan", + "Oven door creaking open for inspection", + "Timer ringing to signal readiness", + "Herbs being chopped on a wooden board", + "Utensils being laid out for service" + ], + "drive": [ + "Engine revving as the vehicle accelerates", + "Wind whooshing past open car windows", + "Seatbelts clicking into their holders", + "Turn signal ticking before a corner", + "Wiper blades swishing in the rain", + "Horn honking in a traffic jam", + "Tires crunching over a gravel road", + "GPS voice announcing upcoming directions" + ], + "commute": [ + "Bus doors hissing open and closed", + "Bike bell dinging on a crowded path", + "Seats shuffling as people find their spots", + "Newspaper pages rustling during the read", + "Subway screeching to a halt at a station", + "Footsteps hurrying to catch public transport", + "Cars honking in traffic", + "Cyclists pedaling and gears shifting" + ], + "subway": [ + "Train conductor announcing stations", + "Rails clanking beneath fast-moving cars", + "Turnstile clicking as it turns", + "Passengers chatting or dozing off", + "Card reader beeping as tickets are scanned", + "Sudden silence when a car goes underground", + "Energetic performers in the station", + "Doors sliding shut and warning chimes" + ], + "cycle": [ + "Pedals turning with a rhythmic cadence", + "Chain clinking as it moves the gears", + "Tires humming on smooth pavement", + "Breath puffing from exertion", + "Helmet strap clicking secure", + "Bike frame rattling on uneven surfaces", + "Air pump whooshing to inflate tires", + "Kickstand clanking as it's put down" + ], + "sail": [ + "Sails flapping when catching the wind", + "Ropes creaking as they're pulled tight", + "Waves lapping against the hull", + "Wooden deck groaning with the swell", + "Seagulls calling out overhead", + "Flag whipping at the mast", + "Anchor dropping and chain rattling", + "Captain calling out directions" + ], + "walk": [ + "Footsteps tapping on a sidewalk", + "Leaves crunching in a park", + "Birds chirping along a nature trail", + "Children laughing in nearby playgrounds", + "Gravel scattering on a path", + "Crosswalk signal beeping for pedestrians", + "Jingling of dog collars on a leash", + "Traffic passing on adjacent streets" + ], + "ferry": [ + "Vessel's horn booming over the water", + "Ramp clanging as it lowers for vehicles", + "Waves splashing against the sides", + "Engines rumbling from below deck", + "Passengers' voices mingling on the open deck", + "Seating benches shifting slightly on the deck", + "Life jackets being adjusted with straps and buckles", + "Disembarkment bell signaling arrival" + ], + "taxi": [ + "Radio dispatch voices crackling", + "Meter beeping as it's activated", + "Traffic honking heard through closed windows", + "Upholstery creaking with passenger movement", + "Driver and passenger exchanging pleasantries", + "Car slowing with the sound of brakes", + "Trunk popping open for luggage", + "Receipts printing for the fare" + ], + "trip": [ + "Suitcases zipping and locks clicking", + "Maps folding and unfolding for navigation", + "Camera shutters snapping photos of sights", + "Vehicle doors thudding shut loaded with gear", + "Footsteps echoing in historic corridors", + "Local wildlife calls in natural reserves", + "Chatter of fellow travelers sharing experiences", + "Guidebook pages flipping in search of information" + ], + "vacation": [ + "Resort pools splashing with joyful swimmers", + "Umbrellas flapping in a seaside breeze", + "Luggage wheels rolling across hotel lobbies", + "Distant laughter from late-night revelers", + "Island music floating from a beach bar", + "Boardwalk planks creaking under foot traffic", + "Ice clinking in a glass of a chilled beverage", + "Exchange of greetings in different languages" + ], + "run": [ + "Sneakers thudding against pavement", + "Breathing quick and steady in rhythm", + "Heartbeat pulsing in ears", + "Clothing whispering with each stride", + "Water bottles sloshing mid-jog", + "Birds calling as dawn breaks", + "Timer beeping to signal lap completion", + "Encouragement shouted from a running buddy" + ], + "fitness": [ + "Weights clinking on a rack after a set", + "Treadmills humming in a steady pace", + "Jump ropes slapping the ground quickly", + "Gym mats thumping from dynamic movements", + "Instructor's voice motivating the class", + "Balls bouncing during a sports practice", + "Swim caps gliding through pool water", + "Heavy bag absorbing punches and kicks" + ] +} \ No newline at end of file diff --git a/extensions/lg_audiogen/setup.py b/extensions/lg_audiogen/setup.py new file mode 100644 index 00000000..34bb9cd6 --- /dev/null +++ b/extensions/lg_audiogen/setup.py @@ -0,0 +1,25 @@ +from setuptools import setup, find_packages + +setup( + name='lg_audiogen', + version='0.1', + description="A Command-line interface to use Audiocraft for labgraph", + long_description=""" + A Command-line interface to facilitate the usage of Audiocraft's models + to generate and process audio on labgraph + """, + packages=find_packages(), + install_requires=[ + "Click>=8.1.7", + "torch>=2.1.0", + "torchaudio>=2.1.0", + "audiocraft==1.1.0", + "icalendar==5.0.11", + "openai==1.3.6", + "python-dotenv==1.0.0" + ], + entry_points=''' + [console_scripts] + lg_audiogen=lg_audiogen.main:parse_arguments + ''', +) \ No newline at end of file diff --git a/extensions/lg_audiogen/tests/test_main.py b/extensions/lg_audiogen/tests/test_main.py new file mode 100644 index 00000000..8159fe74 --- /dev/null +++ b/extensions/lg_audiogen/tests/test_main.py @@ -0,0 +1,26 @@ +import os +import subprocess + +def test_single_description(): + ''' + Tests output with a single description + ''' + # Run the script with an example description + subprocess.run(["lg_audiogen", "dog barking"], + capture_output=True, text=True, check=False) + # Assert that the output file was created + assert os.path.exists("dog_barking0.wav"), "Output file dog_barking0.wav was not created" + os.remove("dog_barking0.wav") + +def test_activity_to_sound(): + ''' + Tests output with a single activity + ''' + # Run the script with an example activity + subprocess.run(["lg_audiogen", "-a", "meeting with nathan"], + capture_output=True, text=True, check=False) + # print the ls command output + print(subprocess.run(["ls"], capture_output=True, text=True, check=False)) + # Assert that the output file was created + assert os.path.exists("meeting_with_nathan0.wav"), "Output file meeting_with_nathan0.wav was not created" + os.remove("meeting_with_nathan0.wav")