Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .github/opencode-memory/features.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,17 @@ Repository configured with OpenCode memory and automated sync workflow.
- Model ID: `google/veo-3.1`
- Namespace: `video.generate`
- Description: Google's improved video model with higher-fidelity video, context-aware audio, reference image and last frame support

## 2026-01-20 - New Featured Models Added

- Model ID: `black-forest-labs/flux-2-klein-4b`
- Namespace: `image.generate`
- Description: FLUX.2 Klein - Very fast image generation model, 4 steps distilled, sub-second inference for production and near real-time applications

- Model ID: `black-forest-labs/flux-2-max`
- Namespace: `image.generate`
- Description: FLUX.2 Max - The highest fidelity image model from Black Forest Labs

- Model ID: `bytedance/seedance-1.5-pro`
- Namespace: `video.generate`
- Description: Seedance 1.5 Pro - Joint audio-video model that accurately follows complex instructions
41 changes: 32 additions & 9 deletions src/nodetool/dsl/replicate/audio/enhance.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,51 @@
# This file is auto-generated by nodetool.dsl.codegen.
# Please do not edit this file manually.

# Instead, edit the node class in the source module and run the following commands to regenerate the DSL:
# nodetool package scan
# nodetool codegen

from pydantic import BaseModel, Field
import typing
from typing import Any
import nodetool.metadata.types
import nodetool.metadata.types as types
from nodetool.dsl.graph import GraphNode
from nodetool.dsl.graph import GraphNode, SingleOutputGraphNode

import typing
from pydantic import Field
from nodetool.dsl.handles import OutputHandle, OutputsProxy, connect_field
import nodetool.nodes.replicate.audio.enhance
from nodetool.workflows.base_node import BaseNode


class AudioSuperResolution(GraphNode):
"""AudioSR: Versatile Audio Super-resolution at Scale"""
class AudioSuperResolution(
SingleOutputGraphNode[types.AudioRef], GraphNode[types.AudioRef]
):
"""
AudioSR: Versatile Audio Super-resolution at Scale
"""

seed: int | None | GraphNode | tuple[GraphNode, str] = Field(
seed: int | OutputHandle[int] | None = connect_field(
default=None, description="Random seed. Leave blank to randomize the seed"
)
ddim_steps: int | GraphNode | tuple[GraphNode, str] = Field(
ddim_steps: int | OutputHandle[int] = connect_field(
default=50, description="Number of inference steps"
)
input_file: types.AudioRef | GraphNode | tuple[GraphNode, str] = Field(
default=types.AudioRef(type="audio", uri="", asset_id=None, data=None),
input_file: types.AudioRef | OutputHandle[types.AudioRef] = connect_field(
default=types.AudioRef(
type="audio", uri="", asset_id=None, data=None, metadata=None
),
description="Audio to upsample",
)
guidance_scale: float | GraphNode | tuple[GraphNode, str] = Field(
guidance_scale: float | OutputHandle[float] = connect_field(
default=3.5, description="Scale for classifier free guidance"
)

@classmethod
def get_node_class(cls) -> type[BaseNode]:
return nodetool.nodes.replicate.audio.enhance.AudioSuperResolution

@classmethod
def get_node_type(cls):
return "replicate.audio.enhance.AudioSuperResolution"
return cls.get_node_class().get_node_type()
291 changes: 193 additions & 98 deletions src/nodetool/dsl/replicate/audio/generate.py

Large diffs are not rendered by default.

61 changes: 38 additions & 23 deletions src/nodetool/dsl/replicate/audio/separate.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,28 @@
# This file is auto-generated by nodetool.dsl.codegen.
# Please do not edit this file manually.

# Instead, edit the node class in the source module and run the following commands to regenerate the DSL:
# nodetool package scan
# nodetool codegen

from pydantic import BaseModel, Field
import typing
from typing import Any
import nodetool.metadata.types
import nodetool.metadata.types as types
from nodetool.dsl.graph import GraphNode
from nodetool.dsl.graph import GraphNode, SingleOutputGraphNode

import typing
from pydantic import Field
from nodetool.dsl.handles import OutputHandle, OutputsProxy, connect_field
import nodetool.nodes.replicate.audio.separate
import nodetool.nodes.replicate.audio.separate
import nodetool.nodes.replicate.audio.separate
import nodetool.nodes.replicate.audio.separate
import nodetool.nodes.replicate.audio.separate
import nodetool.nodes.replicate.audio.separate
from nodetool.workflows.base_node import BaseNode


class Demucs(GraphNode):
"""Demucs is an audio source separator created by Facebook Research."""
class Demucs(SingleOutputGraphNode[typing.Any], GraphNode[typing.Any]):
"""
Demucs is an audio source separator created by Facebook Research.
"""

Stem: typing.ClassVar[type] = nodetool.nodes.replicate.audio.separate.Demucs.Stem
Model: typing.ClassVar[type] = nodetool.nodes.replicate.audio.separate.Demucs.Model
Expand All @@ -30,58 +38,65 @@ class Demucs(GraphNode):
Output_format: typing.ClassVar[type] = (
nodetool.nodes.replicate.audio.separate.Demucs.Output_format
)
jobs: int | GraphNode | tuple[GraphNode, str] = Field(

jobs: int | OutputHandle[int] = connect_field(
default=0,
description="Choose the number of parallel jobs to use for separation.",
)
stem: nodetool.nodes.replicate.audio.separate.Demucs.Stem = Field(
default=nodetool.nodes.replicate.audio.separate.Demucs.Stem.NONE,
default=nodetool.nodes.replicate.audio.separate.Demucs.Stem("none"),
description="If you just want to isolate one stem, you can choose it here.",
)
audio: types.AudioRef | GraphNode | tuple[GraphNode, str] = Field(
default=types.AudioRef(type="audio", uri="", asset_id=None, data=None),
audio: types.AudioRef | OutputHandle[types.AudioRef] = connect_field(
default=types.AudioRef(
type="audio", uri="", asset_id=None, data=None, metadata=None
),
description="Upload the file to be processed here.",
)
model: nodetool.nodes.replicate.audio.separate.Demucs.Model = Field(
default=nodetool.nodes.replicate.audio.separate.Demucs.Model.HTDEMUCS,
default=nodetool.nodes.replicate.audio.separate.Demucs.Model("htdemucs"),
description="Choose the demucs audio that proccesses your audio. The readme has more information on what to choose.",
)
split: bool | GraphNode | tuple[GraphNode, str] = Field(
split: bool | OutputHandle[bool] = connect_field(
default=True,
description="Choose whether or not the audio should be split into chunks.",
)
shifts: int | GraphNode | tuple[GraphNode, str] = Field(
shifts: int | OutputHandle[int] = connect_field(
default=1,
description="Choose the amount random shifts for equivariant stabilization. This performs multiple predictions with random shifts of the input and averages them, which makes it x times slower.",
)
overlap: float | GraphNode | tuple[GraphNode, str] = Field(
overlap: float | OutputHandle[float] = connect_field(
default=0.25,
description="Choose the amount of overlap between prediction windows.",
)
segment: int | None | GraphNode | tuple[GraphNode, str] = Field(
segment: int | OutputHandle[int] | None = connect_field(
default=None, description="Choose the segment length to use for separation."
)
clip_mode: nodetool.nodes.replicate.audio.separate.Demucs.Clip_mode = Field(
default=nodetool.nodes.replicate.audio.separate.Demucs.Clip_mode.RESCALE,
default=nodetool.nodes.replicate.audio.separate.Demucs.Clip_mode("rescale"),
description="Choose the strategy for avoiding clipping. Rescale will rescale entire signal if necessary or clamp will allow hard clipping.",
)
mp3_preset: nodetool.nodes.replicate.audio.separate.Demucs.Mp3_preset = Field(
default=nodetool.nodes.replicate.audio.separate.Demucs.Mp3_preset._2,
default=nodetool.nodes.replicate.audio.separate.Demucs.Mp3_preset(2),
description="Choose the preset for the MP3 output. Higher is faster but worse quality. If MP3 is not selected as the output type, this has no effect.",
)
wav_format: nodetool.nodes.replicate.audio.separate.Demucs.Wav_format = Field(
default=nodetool.nodes.replicate.audio.separate.Demucs.Wav_format.INT24,
default=nodetool.nodes.replicate.audio.separate.Demucs.Wav_format("int24"),
description="Choose format for the WAV output. If WAV is not selected as the output type, this has no effect.",
)
mp3_bitrate: int | GraphNode | tuple[GraphNode, str] = Field(
mp3_bitrate: int | OutputHandle[int] = connect_field(
default=320,
description="Choose the bitrate for the MP3 output. Higher is better quality but larger file size. If MP3 is not selected as the output type, this has no effect.",
)
output_format: nodetool.nodes.replicate.audio.separate.Demucs.Output_format = Field(
default=nodetool.nodes.replicate.audio.separate.Demucs.Output_format.MP3,
default=nodetool.nodes.replicate.audio.separate.Demucs.Output_format("mp3"),
description="Choose the audio format you would like the result to be returned in.",
)

@classmethod
def get_node_class(cls) -> type[BaseNode]:
return nodetool.nodes.replicate.audio.separate.Demucs

@classmethod
def get_node_type(cls):
return "replicate.audio.separate.Demucs"
return cls.get_node_class().get_node_type()
87 changes: 74 additions & 13 deletions src/nodetool/dsl/replicate/audio/transcribe.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,65 @@
# This file is auto-generated by nodetool.dsl.codegen.
# Please do not edit this file manually.

# Instead, edit the node class in the source module and run the following commands to regenerate the DSL:
# nodetool package scan
# nodetool codegen

from pydantic import BaseModel, Field
import typing
from typing import Any
import nodetool.metadata.types
import nodetool.metadata.types as types
from nodetool.dsl.graph import GraphNode
from nodetool.dsl.graph import GraphNode, SingleOutputGraphNode

import typing
from pydantic import Field
from nodetool.dsl.handles import OutputHandle, OutputsProxy, connect_field
import nodetool.nodes.replicate.audio.transcribe
from nodetool.workflows.base_node import BaseNode


class GPT4o_Transcribe(SingleOutputGraphNode[str], GraphNode[str]):
"""
A speech-to-text model that uses GPT-4o to transcribe audio
"""

prompt: str | OutputHandle[str] | None = connect_field(
default=None,
description="An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language.",
)
language: str | OutputHandle[str] | None = connect_field(
default=None,
description="The language of the input audio. Supplying the input language in ISO-639-1 (e.g. en) format will improve accuracy and latency.",
)
audio_file: str | OutputHandle[str] | None = connect_field(
default=None,
description="The audio file to transcribe. Supported formats: mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm",
)
temperature: float | OutputHandle[float] = connect_field(
default=0, description="Sampling temperature between 0 and 1"
)

@classmethod
def get_node_class(cls) -> type[BaseNode]:
return nodetool.nodes.replicate.audio.transcribe.GPT4o_Transcribe

@classmethod
def get_node_type(cls):
return cls.get_node_class().get_node_type()


import typing
from pydantic import Field
from nodetool.dsl.handles import OutputHandle, OutputsProxy, connect_field
import nodetool.nodes.replicate.audio.transcribe
import nodetool.nodes.replicate.audio.transcribe
from nodetool.workflows.base_node import BaseNode


class IncrediblyFastWhisper(GraphNode):
"""whisper-large-v3, incredibly fast, powered by Hugging Face Transformers! 🤗"""
class IncrediblyFastWhisper(SingleOutputGraphNode[str], GraphNode[str]):
"""
whisper-large-v3, incredibly fast, powered by Hugging Face Transformers! 🤗
"""

Task: typing.ClassVar[type] = (
nodetool.nodes.replicate.audio.transcribe.IncrediblyFastWhisper.Task
Expand All @@ -22,39 +70,52 @@ class IncrediblyFastWhisper(GraphNode):
Timestamp: typing.ClassVar[type] = (
nodetool.nodes.replicate.audio.transcribe.IncrediblyFastWhisper.Timestamp
)

task: nodetool.nodes.replicate.audio.transcribe.IncrediblyFastWhisper.Task = Field(
default=nodetool.nodes.replicate.audio.transcribe.IncrediblyFastWhisper.Task.TRANSCRIBE,
default=nodetool.nodes.replicate.audio.transcribe.IncrediblyFastWhisper.Task(
"transcribe"
),
description="Task to perform: transcribe or translate to another language.",
)
audio: types.AudioRef | GraphNode | tuple[GraphNode, str] = Field(
default=types.AudioRef(type="audio", uri="", asset_id=None, data=None),
audio: types.AudioRef | OutputHandle[types.AudioRef] = connect_field(
default=types.AudioRef(
type="audio", uri="", asset_id=None, data=None, metadata=None
),
description="Audio file",
)
hf_token: str | None | GraphNode | tuple[GraphNode, str] = Field(
hf_token: str | OutputHandle[str] | None = connect_field(
default=None,
description="Provide a hf.co/settings/token for Pyannote.audio to diarise the audio clips. You need to agree to the terms in 'https://huggingface.co/pyannote/speaker-diarization-3.1' and 'https://huggingface.co/pyannote/segmentation-3.0' first.",
)
language: (
nodetool.nodes.replicate.audio.transcribe.IncrediblyFastWhisper.Language
) = Field(
default=nodetool.nodes.replicate.audio.transcribe.IncrediblyFastWhisper.Language.NONE,
default=nodetool.nodes.replicate.audio.transcribe.IncrediblyFastWhisper.Language(
"None"
),
description="Language spoken in the audio, specify 'None' to perform language detection.",
)
timestamp: (
nodetool.nodes.replicate.audio.transcribe.IncrediblyFastWhisper.Timestamp
) = Field(
default=nodetool.nodes.replicate.audio.transcribe.IncrediblyFastWhisper.Timestamp.CHUNK,
default=nodetool.nodes.replicate.audio.transcribe.IncrediblyFastWhisper.Timestamp(
"chunk"
),
description="Whisper supports both chunked as well as word level timestamps.",
)
batch_size: int | GraphNode | tuple[GraphNode, str] = Field(
batch_size: int | OutputHandle[int] = connect_field(
default=24,
description="Number of parallel batches you want to compute. Reduce if you face OOMs.",
)
diarise_audio: bool | GraphNode | tuple[GraphNode, str] = Field(
diarise_audio: bool | OutputHandle[bool] = connect_field(
default=False,
description="Use Pyannote.audio to diarise the audio clips. You will need to provide hf_token below too.",
)

@classmethod
def get_node_class(cls) -> type[BaseNode]:
return nodetool.nodes.replicate.audio.transcribe.IncrediblyFastWhisper

@classmethod
def get_node_type(cls):
return "replicate.audio.transcribe.IncrediblyFastWhisper"
return cls.get_node_class().get_node_type()
Loading
Loading