Skip to content

deepecgkit.models

Neural network architectures for ECG signal processing. All models are nn.Module subclasses with a consistent interface: __init__(input_channels, output_size, **kwargs).

CNN Architectures

SimpleCNN

Bases: Module

Simple CNN model for ECG signal classification.

A straightforward convolutional neural network with pooling and dropout layers for basic ECG classification tasks.

Parameters:

Name Type Description Default
input_channels int

Number of input channels (default: 1 for single-lead ECG)

1
output_size int

Number of output classes

4
dropout_rate float

Dropout probability (default: 0.3)

0.3
Example

model = SimpleCNN(input_channels=1, output_size=4) x = torch.randn(32, 1, 3000) output = model(x) print(output.shape)

features = model.extract_features(x) print(features.shape) # (32, 256)

Source code in deepecgkit/models/simple_cnn.py
@register_model(
    name="simple-cnn",
    description="Lightweight CNN for fast inference",
)
class SimpleCNN(nn.Module):
    """
    Simple CNN model for ECG signal classification.

    A straightforward convolutional neural network with pooling and dropout
    layers for basic ECG classification tasks.

    Args:
        input_channels: Number of input channels (default: 1 for single-lead ECG)
        output_size: Number of output classes
        dropout_rate: Dropout probability (default: 0.3)

    Example:
        >>> model = SimpleCNN(input_channels=1, output_size=4)
        >>> x = torch.randn(32, 1, 3000)
        >>> output = model(x)
        >>> print(output.shape)

        >>> features = model.extract_features(x)
        >>> print(features.shape)  # (32, 256)
    """

    def __init__(
        self,
        input_channels: int = 1,
        output_size: int = 4,
        dropout_rate: float = 0.3,
    ):
        super().__init__()

        self.features = nn.Sequential(
            nn.Conv1d(input_channels, 32, kernel_size=7, padding=3),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2),
            nn.Dropout(dropout_rate),
            nn.Conv1d(32, 64, kernel_size=5, padding=2),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2),
            nn.Dropout(dropout_rate),
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2),
            nn.Dropout(dropout_rate),
            nn.Conv1d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1),
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(128, output_size),
        )

    @property
    def feature_dim(self) -> int:
        return 256

    def extract_features(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        return torch.flatten(x, 1)

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

FCNWang

Bases: Module

Fully Convolutional Network for ECG signal classification.

Based on Wang et al.'s FCN architecture for time series classification, using three convolutional blocks with batch normalization and global average pooling. No dense layers except the final classifier.

Reference

Wang Z., Yan W., Oates T. "Time Series Classification from Scratch with Deep Neural Networks: A Strong Baseline" (2017) https://github.com/helme/ecg_ptbxl_benchmarking

Parameters:

Name Type Description Default
input_channels int

Number of input channels (default: 12 for 12-lead ECG)

12
output_size int

Number of output classes

5
filters list[int] | None

List of filter counts for each conv block (default: [128, 256, 128])

None
kernel_sizes list[int] | None

List of kernel sizes for each conv block (default: [8, 5, 3])

None
dropout_rate float

Dropout probability before classifier (default: 0.3)

0.3
Example

model = FCNWang(input_channels=12, output_size=5) x = torch.randn(32, 12, 1000) output = model(x) print(output.shape)

features = model.extract_features(x) print(features.shape) # (32, 128)

Source code in deepecgkit/models/fcn_wang.py
@register_model(
    name="fcn-wang",
    description="Fully Convolutional Network (Wang et al.) for time series classification",
)
class FCNWang(nn.Module):
    """
    Fully Convolutional Network for ECG signal classification.

    Based on Wang et al.'s FCN architecture for time series classification,
    using three convolutional blocks with batch normalization and global
    average pooling. No dense layers except the final classifier.

    Reference:
        Wang Z., Yan W., Oates T. "Time Series Classification from Scratch
        with Deep Neural Networks: A Strong Baseline" (2017)
        https://github.com/helme/ecg_ptbxl_benchmarking

    Args:
        input_channels: Number of input channels (default: 12 for 12-lead ECG)
        output_size: Number of output classes
        filters: List of filter counts for each conv block (default: [128, 256, 128])
        kernel_sizes: List of kernel sizes for each conv block (default: [8, 5, 3])
        dropout_rate: Dropout probability before classifier (default: 0.3)

    Example:
        >>> model = FCNWang(input_channels=12, output_size=5)
        >>> x = torch.randn(32, 12, 1000)
        >>> output = model(x)
        >>> print(output.shape)

        >>> features = model.extract_features(x)
        >>> print(features.shape)  # (32, 128)
    """

    def __init__(
        self,
        input_channels: int = 12,
        output_size: int = 5,
        filters: list[int] | None = None,
        kernel_sizes: list[int] | None = None,
        dropout_rate: float = 0.3,
    ):
        super().__init__()

        if filters is None:
            filters = [128, 256, 128]
        if kernel_sizes is None:
            kernel_sizes = [8, 5, 3]

        assert len(filters) == len(kernel_sizes)

        blocks = []
        in_ch = input_channels
        for nf, ks in zip(filters, kernel_sizes):
            blocks.append(
                nn.Sequential(
                    nn.Conv1d(in_ch, nf, kernel_size=ks, padding=(ks - 1) // 2, bias=False),
                    nn.BatchNorm1d(nf),
                    nn.ReLU(inplace=True),
                )
            )
            in_ch = nf

        self.conv_blocks = nn.Sequential(*blocks)
        self.global_pool = nn.AdaptiveAvgPool1d(1)
        self.dropout = nn.Dropout(dropout_rate)
        self._feature_dim = filters[-1]
        self.classifier = nn.Linear(self._feature_dim, output_size)

    @property
    def feature_dim(self) -> int:
        return self._feature_dim

    def extract_features(self, x: torch.Tensor) -> torch.Tensor:
        x = self.conv_blocks(x)
        x = self.global_pool(x)
        return torch.flatten(x, 1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.extract_features(x)
        x = self.dropout(x)
        return self.classifier(x)

AFModel

Bases: Module

Atrial Fibrillation classification model.

A convolutional neural network specifically designed for AF detection in ECG signals with configurable recording length support.

Parameters:

Name Type Description Default
input_channels int

Number of input ECG leads (default: 1)

1
output_size int

Number of output classes (default: 4)

4
recording_length int

Recording length in seconds (must be 6, 10, or 30)

30
Example

model = AFModel(recording_length=30) x = torch.randn(32, 1, 9000) output = model(x) print(output.shape) # (32, 4)

features = model.extract_features(x) print(features.shape) # (32, 196)

Source code in deepecgkit/models/af_classifier.py
@register_model(
    name="afmodel",
    description="Atrial Fibrillation model optimized for 30s ECG segments",
    default_kwargs={"recording_length": 30},
)
class AFModel(nn.Module):
    """
    Atrial Fibrillation classification model.

    A convolutional neural network specifically designed for AF detection
    in ECG signals with configurable recording length support.

    Args:
        input_channels: Number of input ECG leads (default: 1)
        output_size: Number of output classes (default: 4)
        recording_length: Recording length in seconds (must be 6, 10, or 30)

    Example:
        >>> model = AFModel(recording_length=30)
        >>> x = torch.randn(32, 1, 9000)
        >>> output = model(x)
        >>> print(output.shape)  # (32, 4)

        >>> features = model.extract_features(x)
        >>> print(features.shape)  # (32, 196)
    """

    def __init__(
        self,
        input_channels: int = 1,
        output_size: int = 4,
        recording_length: int = 30,
    ):
        super().__init__()
        assert recording_length in [6, 10, 30], (
            f"Recording length must be 6, 10, or 30, got {recording_length}"
        )

        self.recording_length = recording_length
        kernel_size = 6 if recording_length in [6, 10] else 32

        layer_configs = [
            (input_channels, 64, True, True),
            (64, 64, False, False),
            (64, 64, True, True),
            (64, 64, False, False),
            (64, 64, True, True),
            (64, 64, False, False),
            (64, 128, True, True),
            (128, 128, False, False),
            (128, 128, True, True),
            (128, 128, False, False),
            (128, 196, True, True),
            (196, 196, False, False),
            (196, 196, True, True),
        ]

        self.conv_layers = nn.ModuleList(
            [
                ConvBlock(in_ch, out_ch, kernel_size, dropout, pooling)
                for in_ch, out_ch, dropout, pooling in layer_configs
            ]
        )

        self.adaptive_pool = nn.AdaptiveAvgPool1d(1)
        self.flatten = nn.Flatten()
        self._feature_dim = 196
        self.dropout = nn.Dropout(0.3)
        self.output = nn.Linear(in_features=self._feature_dim, out_features=output_size)

    @property
    def feature_dim(self) -> int:
        return self._feature_dim

    def extract_features(self, data: torch.Tensor) -> torch.Tensor:
        for conv_layer in self.conv_layers:
            data = conv_layer(data)
        data = self.adaptive_pool(data)
        data = self.flatten(data)
        return data

    def forward(self, data):
        data = self.extract_features(data)
        data = self.dropout(data)
        return self.output(data)

    def get_feature_size(self, input_size: int) -> int:
        with torch.no_grad():
            dummy_input = torch.randn(1, 1, input_size)

            for conv_layer in self.conv_layers:
                dummy_input = conv_layer(dummy_input)

            return dummy_input.shape[-1]

    @classmethod
    def from_pretrained(
        cls,
        weights: str,
        map_location: Optional[Union[str, torch.device]] = None,
        force_download: bool = False,
        **kwargs,
    ) -> "AFModel":
        """Load a pretrained AFModel.

        Args:
            weights: Name of pretrained weights (e.g., "afmodel-30s") or path to weights file
            map_location: Device to map weights to (e.g., "cpu", "cuda")
            force_download: If True, re-download weights even if cached
            **kwargs: Override default model parameters from the weight registry

        Returns:
            Model with pretrained weights loaded

        Example:
            >>> model = AFModel.from_pretrained("afmodel-30s")
            >>> model = AFModel.from_pretrained("afmodel-30s", map_location="cuda")
            >>> model = AFModel.from_pretrained("/path/to/weights.pt", recording_length=30)
        """
        weight_path = Path(weights)
        if weight_path.exists():
            state_dict = torch.load(weight_path, map_location=map_location, weights_only=True)
            model = cls(**kwargs)
        else:
            info = get_weight_info(weights)
            model_kwargs = {**info["model_kwargs"], **kwargs}
            model = cls(**model_kwargs)
            state_dict = load_pretrained_weights(weights, map_location, force_download)

        model.load_state_dict(state_dict)
        return model

from_pretrained classmethod

from_pretrained(
    weights: str,
    map_location: Optional[Union[str, device]] = None,
    force_download: bool = False,
    **kwargs,
) -> AFModel

Load a pretrained AFModel.

Parameters:

Name Type Description Default
weights str

Name of pretrained weights (e.g., "afmodel-30s") or path to weights file

required
map_location Optional[Union[str, device]]

Device to map weights to (e.g., "cpu", "cuda")

None
force_download bool

If True, re-download weights even if cached

False
**kwargs

Override default model parameters from the weight registry

{}

Returns:

Type Description
AFModel

Model with pretrained weights loaded

Example

model = AFModel.from_pretrained("afmodel-30s") model = AFModel.from_pretrained("afmodel-30s", map_location="cuda") model = AFModel.from_pretrained("/path/to/weights.pt", recording_length=30)

Source code in deepecgkit/models/af_classifier.py
@classmethod
def from_pretrained(
    cls,
    weights: str,
    map_location: Optional[Union[str, torch.device]] = None,
    force_download: bool = False,
    **kwargs,
) -> "AFModel":
    """Load a pretrained AFModel.

    Args:
        weights: Name of pretrained weights (e.g., "afmodel-30s") or path to weights file
        map_location: Device to map weights to (e.g., "cpu", "cuda")
        force_download: If True, re-download weights even if cached
        **kwargs: Override default model parameters from the weight registry

    Returns:
        Model with pretrained weights loaded

    Example:
        >>> model = AFModel.from_pretrained("afmodel-30s")
        >>> model = AFModel.from_pretrained("afmodel-30s", map_location="cuda")
        >>> model = AFModel.from_pretrained("/path/to/weights.pt", recording_length=30)
    """
    weight_path = Path(weights)
    if weight_path.exists():
        state_dict = torch.load(weight_path, map_location=map_location, weights_only=True)
        model = cls(**kwargs)
    else:
        info = get_weight_info(weights)
        model_kwargs = {**info["model_kwargs"], **kwargs}
        model = cls(**model_kwargs)
        state_dict = load_pretrained_weights(weights, map_location, force_download)

    model.load_state_dict(state_dict)
    return model

Residual Networks

ResNet1D

Bases: Module

1D ResNet model for ECG signal classification.

A residual network architecture adapted for 1D time-series ECG data, providing deep feature extraction with skip connections.

Parameters:

Name Type Description Default
input_channels int

Number of input channels (default: 1 for single-lead ECG)

1
output_size int

Number of output classes

4
base_channels int

Base number of channels (default: 64)

64
num_blocks list | None

List of number of blocks in each layer (default: [2, 2, 2, 2])

None
dropout_rate float

Dropout probability (default: 0.3)

0.3
Example

model = ResNet1D(input_channels=1, output_size=4) x = torch.randn(32, 1, 3000) output = model(x) print(output.shape)

features = model.extract_features(x) print(features.shape) # (32, 512) with base_channels=64

Source code in deepecgkit/models/resnet1d.py
@register_model(
    name="resnet",
    description="1D ResNet architecture adapted for ECG",
)
class ResNet1D(nn.Module):
    """
    1D ResNet model for ECG signal classification.

    A residual network architecture adapted for 1D time-series ECG data,
    providing deep feature extraction with skip connections.

    Args:
        input_channels: Number of input channels (default: 1 for single-lead ECG)
        output_size: Number of output classes
        base_channels: Base number of channels (default: 64)
        num_blocks: List of number of blocks in each layer (default: [2, 2, 2, 2])
        dropout_rate: Dropout probability (default: 0.3)

    Example:
        >>> model = ResNet1D(input_channels=1, output_size=4)
        >>> x = torch.randn(32, 1, 3000)
        >>> output = model(x)
        >>> print(output.shape)

        >>> features = model.extract_features(x)
        >>> print(features.shape)  # (32, 512) with base_channels=64
    """

    def __init__(
        self,
        input_channels: int = 1,
        output_size: int = 4,
        base_channels: int = 64,
        num_blocks: list | None = None,
        dropout_rate: float = 0.3,
    ):
        super().__init__()

        if num_blocks is None:
            num_blocks = [2, 2, 2, 2]

        self.in_channels = base_channels

        self.conv1 = nn.Conv1d(
            input_channels,
            base_channels,
            kernel_size=15,
            stride=2,
            padding=7,
            bias=False,
        )
        self.bn1 = nn.BatchNorm1d(base_channels)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(base_channels, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(base_channels * 2, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(base_channels * 4, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(base_channels * 8, num_blocks[3], stride=2)

        self.avgpool = nn.AdaptiveAvgPool1d(1)
        self.dropout = nn.Dropout(dropout_rate)
        self._feature_dim = base_channels * 8
        self.fc = nn.Linear(self._feature_dim, output_size)

    def _make_layer(self, out_channels: int, num_blocks: int, stride: int = 1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv1d(
                    self.in_channels,
                    out_channels,
                    kernel_size=1,
                    stride=stride,
                    bias=False,
                ),
                nn.BatchNorm1d(out_channels),
            )

        layers = []
        layers.append(
            ResidualBlock1D(
                self.in_channels,
                out_channels,
                stride=stride,
                downsample=downsample,
            )
        )
        self.in_channels = out_channels

        for _ in range(1, num_blocks):
            layers.append(ResidualBlock1D(out_channels, out_channels))

        return nn.Sequential(*layers)

    @property
    def feature_dim(self) -> int:
        return self._feature_dim

    def extract_features(self, x: torch.Tensor) -> torch.Tensor:
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        return x

    def forward(self, x):
        x = self.extract_features(x)
        x = self.dropout(x)
        x = self.fc(x)
        return x

ResNetWang

Bases: Module

ResNet model based on Wang et al.'s architecture for time series classification.

A shallow 3-block residual network without initial pooling, using larger initial channels (128) and asymmetric kernel sizes in residual blocks. This is the standard ResNet baseline from the PTB-XL benchmark.

Reference

Wang Z., Yan W., Oates T. "Time Series Classification from Scratch with Deep Neural Networks: A Strong Baseline" (2017) https://github.com/helme/ecg_ptbxl_benchmarking

Parameters:

Name Type Description Default
input_channels int

Number of input channels (default: 12 for 12-lead ECG)

12
output_size int

Number of output classes

5
base_channels int

Base number of channels (default: 128)

128
kernel_size int

Primary kernel size for residual blocks (default: 5)

5
kernel_size_stem int

Kernel size for the stem convolution (default: 7)

7
dropout_rate float

Dropout probability before classifier (default: 0.3)

0.3
Example

model = ResNetWang(input_channels=12, output_size=5) x = torch.randn(32, 12, 1000) output = model(x) print(output.shape)

features = model.extract_features(x) print(features.shape) # (32, 128)

Source code in deepecgkit/models/resnet1d_wang.py
@register_model(
    name="resnet-wang",
    description="ResNet (Wang et al.) for time series classification",
)
class ResNetWang(nn.Module):
    """
    ResNet model based on Wang et al.'s architecture for time series classification.

    A shallow 3-block residual network without initial pooling, using larger
    initial channels (128) and asymmetric kernel sizes in residual blocks.
    This is the standard ResNet baseline from the PTB-XL benchmark.

    Reference:
        Wang Z., Yan W., Oates T. "Time Series Classification from Scratch
        with Deep Neural Networks: A Strong Baseline" (2017)
        https://github.com/helme/ecg_ptbxl_benchmarking

    Args:
        input_channels: Number of input channels (default: 12 for 12-lead ECG)
        output_size: Number of output classes
        base_channels: Base number of channels (default: 128)
        kernel_size: Primary kernel size for residual blocks (default: 5)
        kernel_size_stem: Kernel size for the stem convolution (default: 7)
        dropout_rate: Dropout probability before classifier (default: 0.3)

    Example:
        >>> model = ResNetWang(input_channels=12, output_size=5)
        >>> x = torch.randn(32, 12, 1000)
        >>> output = model(x)
        >>> print(output.shape)

        >>> features = model.extract_features(x)
        >>> print(features.shape)  # (32, 128)
    """

    def __init__(
        self,
        input_channels: int = 12,
        output_size: int = 5,
        base_channels: int = 128,
        kernel_size: int = 5,
        kernel_size_stem: int = 7,
        dropout_rate: float = 0.3,
    ):
        super().__init__()

        self.in_channels = base_channels

        self.stem = nn.Sequential(
            nn.Conv1d(
                input_channels,
                base_channels,
                kernel_size=kernel_size_stem,
                stride=1,
                padding=(kernel_size_stem - 1) // 2,
                bias=False,
            ),
            nn.BatchNorm1d(base_channels),
            nn.ReLU(inplace=True),
        )

        self.layer1 = self._make_layer(base_channels, 1, kernel_size=kernel_size, stride=1)
        self.layer2 = self._make_layer(base_channels, 1, kernel_size=kernel_size, stride=1)
        self.layer3 = self._make_layer(base_channels, 1, kernel_size=kernel_size, stride=1)

        self.global_pool = nn.AdaptiveAvgPool1d(1)
        self.dropout = nn.Dropout(dropout_rate)
        self._feature_dim = base_channels
        self.classifier = nn.Linear(self._feature_dim, output_size)

    def _make_layer(
        self,
        out_channels: int,
        num_blocks: int,
        kernel_size: int = 5,
        stride: int = 1,
    ) -> nn.Sequential:
        downsample = None
        if stride != 1 or self.in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv1d(self.in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm1d(out_channels),
            )

        layers = [
            BasicBlock1DWang(
                self.in_channels,
                out_channels,
                kernel_size=kernel_size,
                stride=stride,
                downsample=downsample,
            )
        ]
        self.in_channels = out_channels
        for _ in range(1, num_blocks):
            layers.append(BasicBlock1DWang(out_channels, out_channels, kernel_size=kernel_size))

        return nn.Sequential(*layers)

    @property
    def feature_dim(self) -> int:
        return self._feature_dim

    def extract_features(self, x: torch.Tensor) -> torch.Tensor:
        x = self.stem(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.global_pool(x)
        return torch.flatten(x, 1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.extract_features(x)
        x = self.dropout(x)
        return self.classifier(x)

SEResNet1D

Bases: Module

SE-ResNet model for ECG signal classification.

Extends ResNet1D with Squeeze-and-Excitation blocks that learn channel attention weights. Particularly effective for multi-lead ECG where the network can learn which leads are most informative for each class.

Parameters:

Name Type Description Default
input_channels int

Number of input channels (default: 1 for single-lead ECG)

1
output_size int

Number of output classes

4
base_channels int

Base number of channels (default: 64)

64
num_blocks list | None

List of number of blocks in each layer (default: [2, 2, 2, 2])

None
se_reduction int

SE reduction ratio (default: 16)

16
dropout_rate float

Dropout probability (default: 0.3)

0.3
Example

model = SEResNet1D(input_channels=12, output_size=5) x = torch.randn(32, 12, 5000) output = model(x) print(output.shape)

features = model.extract_features(x) print(features.shape) # (32, 512) with base_channels=64

Source code in deepecgkit/models/se_resnet1d.py
@register_model(
    name="se-resnet",
    description="ResNet with Squeeze-and-Excitation channel attention for ECG",
)
class SEResNet1D(nn.Module):
    """
    SE-ResNet model for ECG signal classification.

    Extends ResNet1D with Squeeze-and-Excitation blocks that learn channel
    attention weights. Particularly effective for multi-lead ECG where the
    network can learn which leads are most informative for each class.

    Args:
        input_channels: Number of input channels (default: 1 for single-lead ECG)
        output_size: Number of output classes
        base_channels: Base number of channels (default: 64)
        num_blocks: List of number of blocks in each layer (default: [2, 2, 2, 2])
        se_reduction: SE reduction ratio (default: 16)
        dropout_rate: Dropout probability (default: 0.3)

    Example:
        >>> model = SEResNet1D(input_channels=12, output_size=5)
        >>> x = torch.randn(32, 12, 5000)
        >>> output = model(x)
        >>> print(output.shape)

        >>> features = model.extract_features(x)
        >>> print(features.shape)  # (32, 512) with base_channels=64
    """

    def __init__(
        self,
        input_channels: int = 1,
        output_size: int = 4,
        base_channels: int = 64,
        num_blocks: list | None = None,
        se_reduction: int = 16,
        dropout_rate: float = 0.3,
    ):
        super().__init__()

        if num_blocks is None:
            num_blocks = [2, 2, 2, 2]

        self.in_channels = base_channels
        self.se_reduction = se_reduction
        self.dropout_rate = dropout_rate

        self.conv1 = nn.Conv1d(
            input_channels,
            base_channels,
            kernel_size=15,
            stride=2,
            padding=7,
            bias=False,
        )
        self.bn1 = nn.BatchNorm1d(base_channels)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(base_channels, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(base_channels * 2, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(base_channels * 4, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(base_channels * 8, num_blocks[3], stride=2)

        self.avgpool = nn.AdaptiveAvgPool1d(1)
        self.dropout = nn.Dropout(dropout_rate)
        self._feature_dim = base_channels * 8
        self.fc = nn.Linear(self._feature_dim, output_size)

    def _make_layer(self, out_channels: int, num_blocks: int, stride: int = 1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv1d(
                    self.in_channels,
                    out_channels,
                    kernel_size=1,
                    stride=stride,
                    bias=False,
                ),
                nn.BatchNorm1d(out_channels),
            )

        layers = [
            SEResidualBlock1D(
                self.in_channels,
                out_channels,
                stride=stride,
                downsample=downsample,
                se_reduction=self.se_reduction,
                dropout_rate=self.dropout_rate,
            )
        ]
        self.in_channels = out_channels

        for _ in range(1, num_blocks):
            layers.append(
                SEResidualBlock1D(
                    out_channels,
                    out_channels,
                    se_reduction=self.se_reduction,
                    dropout_rate=self.dropout_rate,
                )
            )

        return nn.Sequential(*layers)

    @property
    def feature_dim(self) -> int:
        return self._feature_dim

    def extract_features(self, x: torch.Tensor) -> torch.Tensor:
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        return x

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.extract_features(x)
        x = self.dropout(x)
        x = self.fc(x)
        return x

XResNet1D

Bases: Module

XResNet model for ECG signal classification.

An improved ResNet incorporating three key enhancements from recent research: (1) a multi-layer stem instead of a single large convolution, (2) Mish activation for smoother gradients, and (3) anti-aliased blur-pool downsampling to reduce aliasing.

Parameters:

Name Type Description Default
input_channels int

Number of input channels (default: 1 for single-lead ECG)

1
output_size int

Number of output classes

4
base_channels int

Base number of channels (default: 64)

64
num_blocks list | None

List of number of blocks in each layer (default: [2, 2, 2, 2])

None
dropout_rate float

Dropout probability (default: 0.3)

0.3
use_blur_pool bool

Whether to use anti-aliased downsampling (default: True)

True
Example

model = XResNet1D(input_channels=1, output_size=4) x = torch.randn(32, 1, 3000) output = model(x) print(output.shape)

features = model.extract_features(x) print(features.shape) # (32, 512) with base_channels=64

Source code in deepecgkit/models/xresnet1d.py
@register_model(
    name="xresnet",
    description="Improved ResNet with Mish activation and blur-pool downsampling",
)
class XResNet1D(nn.Module):
    """
    XResNet model for ECG signal classification.

    An improved ResNet incorporating three key enhancements from recent
    research: (1) a multi-layer stem instead of a single large convolution,
    (2) Mish activation for smoother gradients, and (3) anti-aliased
    blur-pool downsampling to reduce aliasing.

    Args:
        input_channels: Number of input channels (default: 1 for single-lead ECG)
        output_size: Number of output classes
        base_channels: Base number of channels (default: 64)
        num_blocks: List of number of blocks in each layer (default: [2, 2, 2, 2])
        dropout_rate: Dropout probability (default: 0.3)
        use_blur_pool: Whether to use anti-aliased downsampling (default: True)

    Example:
        >>> model = XResNet1D(input_channels=1, output_size=4)
        >>> x = torch.randn(32, 1, 3000)
        >>> output = model(x)
        >>> print(output.shape)

        >>> features = model.extract_features(x)
        >>> print(features.shape)  # (32, 512) with base_channels=64
    """

    def __init__(
        self,
        input_channels: int = 1,
        output_size: int = 4,
        base_channels: int = 64,
        num_blocks: list | None = None,
        dropout_rate: float = 0.3,
        use_blur_pool: bool = True,
    ):
        super().__init__()

        if num_blocks is None:
            num_blocks = [2, 2, 2, 2]

        self.in_channels = base_channels
        self.use_blur_pool = use_blur_pool
        self.act = Mish()

        self.stem = nn.Sequential(
            nn.Conv1d(
                input_channels,
                base_channels // 2,
                kernel_size=7,
                stride=2,
                padding=3,
                bias=False,
            ),
            nn.BatchNorm1d(base_channels // 2),
            Mish(),
            nn.Conv1d(
                base_channels // 2,
                base_channels // 2,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=False,
            ),
            nn.BatchNorm1d(base_channels // 2),
            Mish(),
            nn.Conv1d(
                base_channels // 2,
                base_channels,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=False,
            ),
            nn.BatchNorm1d(base_channels),
            Mish(),
        )

        if use_blur_pool:
            self.stem_pool = BlurPool1D(base_channels, stride=2)
        else:
            self.stem_pool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(base_channels, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(base_channels * 2, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(base_channels * 4, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(base_channels * 8, num_blocks[3], stride=2)

        self.avgpool = nn.AdaptiveAvgPool1d(1)
        self.dropout = nn.Dropout(dropout_rate)
        self._feature_dim = base_channels * 8
        self.fc = nn.Linear(self._feature_dim, output_size)

    def _make_layer(self, out_channels: int, num_blocks: int, stride: int = 1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels:
            ds_layers: list[nn.Module] = []
            if stride > 1:
                if self.use_blur_pool:
                    ds_layers.append(BlurPool1D(self.in_channels, stride=stride))
                else:
                    ds_layers.append(nn.AvgPool1d(kernel_size=stride, stride=stride))
            ds_layers.append(nn.Conv1d(self.in_channels, out_channels, kernel_size=1, bias=False))
            ds_layers.append(nn.BatchNorm1d(out_channels))
            downsample = nn.Sequential(*ds_layers)

        layers = [
            XResBlock1D(
                self.in_channels,
                out_channels,
                stride=stride,
                downsample=downsample,
                use_blur_pool=self.use_blur_pool,
            )
        ]
        self.in_channels = out_channels

        for _ in range(1, num_blocks):
            layers.append(
                XResBlock1D(
                    out_channels,
                    out_channels,
                    use_blur_pool=self.use_blur_pool,
                )
            )

        return nn.Sequential(*layers)

    @property
    def feature_dim(self) -> int:
        return self._feature_dim

    def extract_features(self, x: torch.Tensor) -> torch.Tensor:
        x = self.stem(x)
        x = self.stem_pool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        return x

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.extract_features(x)
        x = self.dropout(x)
        x = self.fc(x)
        return x

XResNet1dBenchmark

Bases: Module

XResNet1d adapted from the PTB-XL benchmarking repository.

Key differences from the standard XResNet1D: - Fixed feature dimension across all layers (all blocks use same width) - Concat pooling head (AdaptiveAvgPool + AdaptiveMaxPool concatenated) - Multi-layer stem with configurable kernel size - Supports both BasicBlock (expansion=1) and Bottleneck (expansion=4)

Reference

Strodthoff N., Wagner P., Schaeffter T., Samek W. "Deep Learning for ECG Analysis: Benchmarks and Insights from PTB-XL" (2021) https://github.com/helme/ecg_ptbxl_benchmarking

Parameters:

Name Type Description Default
input_channels int

Number of input channels (default: 12 for 12-lead ECG)

12
output_size int

Number of output classes

5
expansion int

Block expansion factor (1=BasicBlock, 4=Bottleneck)

1
layers list[int] | None

List of block counts per layer (default: [3, 4, 6, 3] for ResNet-50)

None
base_channels int

Fixed channel width for all layers (default: 64)

64
kernel_size int

Convolution kernel size (default: 5)

5
kernel_size_stem int

Stem convolution kernel size (default: 5)

5
stem_channels tuple[int, ...]

Tuple of stem layer channels

(32, 32, 64)
dropout_rate float

Dropout probability in head (default: 0.5)

0.5
concat_pooling bool

Use avg+max concat pooling (default: True)

True
Example

model = XResNet1dBenchmark(input_channels=12, output_size=5) x = torch.randn(32, 12, 1000) output = model(x) print(output.shape)

features = model.extract_features(x) print(features.shape) # (32, 128) with concat_pooling

Source code in deepecgkit/models/xresnet1d_benchmark.py
@register_model(
    name="xresnet1d-benchmark",
    description="XResNet1d from PTB-XL benchmark (fixed feature dim, concat pooling)",
)
class XResNet1dBenchmark(nn.Module):
    """
    XResNet1d adapted from the PTB-XL benchmarking repository.

    Key differences from the standard XResNet1D:
    - Fixed feature dimension across all layers (all blocks use same width)
    - Concat pooling head (AdaptiveAvgPool + AdaptiveMaxPool concatenated)
    - Multi-layer stem with configurable kernel size
    - Supports both BasicBlock (expansion=1) and Bottleneck (expansion=4)

    Reference:
        Strodthoff N., Wagner P., Schaeffter T., Samek W.
        "Deep Learning for ECG Analysis: Benchmarks and Insights from PTB-XL" (2021)
        https://github.com/helme/ecg_ptbxl_benchmarking

    Args:
        input_channels: Number of input channels (default: 12 for 12-lead ECG)
        output_size: Number of output classes
        expansion: Block expansion factor (1=BasicBlock, 4=Bottleneck)
        layers: List of block counts per layer (default: [3, 4, 6, 3] for ResNet-50)
        base_channels: Fixed channel width for all layers (default: 64)
        kernel_size: Convolution kernel size (default: 5)
        kernel_size_stem: Stem convolution kernel size (default: 5)
        stem_channels: Tuple of stem layer channels
        dropout_rate: Dropout probability in head (default: 0.5)
        concat_pooling: Use avg+max concat pooling (default: True)

    Example:
        >>> model = XResNet1dBenchmark(input_channels=12, output_size=5)
        >>> x = torch.randn(32, 12, 1000)
        >>> output = model(x)
        >>> print(output.shape)

        >>> features = model.extract_features(x)
        >>> print(features.shape)  # (32, 128) with concat_pooling
    """

    def __init__(
        self,
        input_channels: int = 12,
        output_size: int = 5,
        expansion: int = 1,
        layers: list[int] | None = None,
        base_channels: int = 64,
        kernel_size: int = 5,
        kernel_size_stem: int = 5,
        stem_channels: tuple[int, ...] = (32, 32, 64),
        dropout_rate: float = 0.5,
        concat_pooling: bool = True,
    ):
        super().__init__()

        if layers is None:
            layers = [2, 2, 2, 2]

        self.expansion = expansion
        self.concat_pooling = concat_pooling

        stem_szs = [input_channels, *stem_channels]
        self.stem = nn.Sequential(
            *[
                ConvBnAct(
                    stem_szs[i], stem_szs[i + 1], ks=kernel_size_stem, stride=2 if i == 0 else 1
                )
                for i in range(len(stem_channels))
            ]
        )
        self.stem_pool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)

        block_szs = [64 // expansion, *([base_channels] * len(layers))]

        self.res_layers = nn.Sequential(
            *[
                self._make_layer(
                    ni=block_szs[i],
                    nf=block_szs[i + 1],
                    blocks=n_blocks,
                    stride=1 if i == 0 else 2,
                    kernel_size=kernel_size,
                )
                for i, n_blocks in enumerate(layers)
            ]
        )

        final_nf = block_szs[-1] * expansion
        self._feature_dim = final_nf * 2 if concat_pooling else final_nf

        if concat_pooling:
            self.pool = nn.ModuleList([nn.AdaptiveAvgPool1d(1), nn.AdaptiveMaxPool1d(1)])
        else:
            self.pool = nn.ModuleList([nn.AdaptiveAvgPool1d(1)])

        self.head = nn.Sequential(
            nn.Flatten(),
            nn.BatchNorm1d(self._feature_dim),
            nn.Dropout(dropout_rate / 2),
            nn.Linear(self._feature_dim, output_size),
        )

        _init_cnn(self)

    def _make_layer(
        self, ni: int, nf: int, blocks: int, stride: int, kernel_size: int
    ) -> nn.Sequential:
        return nn.Sequential(
            *[
                XResBlock(
                    self.expansion,
                    ni if i == 0 else nf,
                    nf,
                    stride=stride if i == 0 else 1,
                    kernel_size=kernel_size,
                )
                for i in range(blocks)
            ]
        )

    @property
    def feature_dim(self) -> int:
        return self._feature_dim

    def extract_features(self, x: torch.Tensor) -> torch.Tensor:
        x = self.stem(x)
        x = self.stem_pool(x)
        x = self.res_layers(x)
        pooled = torch.cat([p(x) for p in self.pool], dim=1)
        return torch.flatten(pooled, 1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.extract_features(x)
        return self.head(x)

KanResWideX

Bases: Module

KanRes-Wide-X model for ECG signal classification.

A convolutional neural network architecture designed for ECG signal analysis with residual connections and wide blocks for improved feature extraction.

Parameters:

Name Type Description Default
input_channels int

Number of input channels (default: 1 for single-lead ECG)

1
output_size int

Number of output classes or regression targets

4
base_channels int

Base number of channels for the first layer (default: 64)

64
Example

model = KanResWideX(input_channels=1, output_size=4) x = torch.randn(32, 1, 3000) output = model(x) print(output.shape) # [32, 4]

features = model.extract_features(x) print(features.shape) # (32, 64)

Source code in deepecgkit/models/kanres_x.py
@register_model(
    name="kanres",
    description="KAN-ResNet architecture with wide layers",
)
class KanResWideX(nn.Module):
    """
    KanRes-Wide-X model for ECG signal classification.

    A convolutional neural network architecture designed for ECG signal analysis
    with residual connections and wide blocks for improved feature extraction.

    Args:
        input_channels: Number of input channels (default: 1 for single-lead ECG)
        output_size: Number of output classes or regression targets
        base_channels: Base number of channels for the first layer (default: 64)

    Example:
        >>> model = KanResWideX(input_channels=1, output_size=4)
        >>> x = torch.randn(32, 1, 3000)
        >>> output = model(x)
        >>> print(output.shape)  # [32, 4]

        >>> features = model.extract_features(x)
        >>> print(features.shape)  # (32, 64)
    """

    def __init__(self, input_channels: int = 1, output_size: int = 4, base_channels: int = 64):
        super().__init__()

        self.input_layer = ConvBlock(input_channels, base_channels)
        self.res_modules = nn.Sequential(
            KanResModule(base_channels), KanResModule(base_channels), KanResModule(base_channels)
        )
        self.global_pool = nn.AdaptiveAvgPool1d(1)
        self._feature_dim = base_channels
        self.classifier = nn.Linear(base_channels, output_size)

    @property
    def feature_dim(self) -> int:
        return self._feature_dim

    def extract_features(self, x: torch.Tensor) -> torch.Tensor:
        x = self.input_layer(x)
        x = self.res_modules(x)
        x = self.global_pool(x)
        x = x.squeeze(-1)
        return x

    def forward(self, x):
        x = self.extract_features(x)
        return self.classifier(x)

    @classmethod
    def from_pretrained(
        cls,
        weights: str,
        map_location: Optional[Union[str, torch.device]] = None,
        force_download: bool = False,
        **kwargs,
    ) -> "KanResWideX":
        """Load a pretrained KanResWideX model.

        Args:
            weights: Name of pretrained weights (e.g., "kanres-af-30s") or path to weights file
            map_location: Device to map weights to (e.g., "cpu", "cuda")
            force_download: If True, re-download weights even if cached
            **kwargs: Override default model parameters from the weight registry

        Returns:
            Model with pretrained weights loaded

        Example:
            >>> model = KanResWideX.from_pretrained("kanres-af-30s")
            >>> model = KanResWideX.from_pretrained("kanres-af-30s", map_location="cuda")
            >>> model = KanResWideX.from_pretrained("/path/to/weights.pt", output_size=2)
        """
        weight_path = Path(weights)
        if weight_path.exists():
            state_dict = torch.load(weight_path, map_location=map_location, weights_only=True)
            model = cls(**kwargs)
        else:
            info = get_weight_info(weights)
            model_kwargs = {**info["model_kwargs"], **kwargs}
            model = cls(**model_kwargs)
            state_dict = load_pretrained_weights(weights, map_location, force_download)

        model.load_state_dict(state_dict)
        return model

from_pretrained classmethod

from_pretrained(
    weights: str,
    map_location: Optional[Union[str, device]] = None,
    force_download: bool = False,
    **kwargs,
) -> KanResWideX

Load a pretrained KanResWideX model.

Parameters:

Name Type Description Default
weights str

Name of pretrained weights (e.g., "kanres-af-30s") or path to weights file

required
map_location Optional[Union[str, device]]

Device to map weights to (e.g., "cpu", "cuda")

None
force_download bool

If True, re-download weights even if cached

False
**kwargs

Override default model parameters from the weight registry

{}

Returns:

Type Description
KanResWideX

Model with pretrained weights loaded

Example

model = KanResWideX.from_pretrained("kanres-af-30s") model = KanResWideX.from_pretrained("kanres-af-30s", map_location="cuda") model = KanResWideX.from_pretrained("/path/to/weights.pt", output_size=2)

Source code in deepecgkit/models/kanres_x.py
@classmethod
def from_pretrained(
    cls,
    weights: str,
    map_location: Optional[Union[str, torch.device]] = None,
    force_download: bool = False,
    **kwargs,
) -> "KanResWideX":
    """Load a pretrained KanResWideX model.

    Args:
        weights: Name of pretrained weights (e.g., "kanres-af-30s") or path to weights file
        map_location: Device to map weights to (e.g., "cpu", "cuda")
        force_download: If True, re-download weights even if cached
        **kwargs: Override default model parameters from the weight registry

    Returns:
        Model with pretrained weights loaded

    Example:
        >>> model = KanResWideX.from_pretrained("kanres-af-30s")
        >>> model = KanResWideX.from_pretrained("kanres-af-30s", map_location="cuda")
        >>> model = KanResWideX.from_pretrained("/path/to/weights.pt", output_size=2)
    """
    weight_path = Path(weights)
    if weight_path.exists():
        state_dict = torch.load(weight_path, map_location=map_location, weights_only=True)
        model = cls(**kwargs)
    else:
        info = get_weight_info(weights)
        model_kwargs = {**info["model_kwargs"], **kwargs}
        model = cls(**model_kwargs)
        state_dict = load_pretrained_weights(weights, map_location, force_download)

    model.load_state_dict(state_dict)
    return model

KanResDeepX

Bases: Module

KanRes-Deep-X model for ECG signal classification.

A deep residual convolutional neural network architecture designed for ECG signal analysis with 8 residual blocks for improved feature extraction.

Parameters:

Name Type Description Default
input_channels int

Number of input channels (default: 1 for single-lead ECG)

1
output_size int

Number of output classes (default: 4)

4
base_channels int

Base number of channels for the architecture (default: 32)

32
Example

model = KanResDeepX(input_channels=1, output_size=4, base_channels=32) x = torch.randn(32, 1, 3000) output = model(x) print(output.shape)

features = model.extract_features(x) print(features.shape) # (32, 32)

Source code in deepecgkit/models/kanres_wide_x.py
@register_model(
    name="kanres-deep",
    description="Deep KAN-ResNet architecture",
)
class KanResDeepX(nn.Module):
    """KanRes-Deep-X model for ECG signal classification.

    A deep residual convolutional neural network architecture designed for ECG
    signal analysis with 8 residual blocks for improved feature extraction.

    Args:
        input_channels: Number of input channels (default: 1 for single-lead ECG)
        output_size: Number of output classes (default: 4)
        base_channels: Base number of channels for the architecture (default: 32)

    Example:
        >>> model = KanResDeepX(input_channels=1, output_size=4, base_channels=32)
        >>> x = torch.randn(32, 1, 3000)
        >>> output = model(x)
        >>> print(output.shape)

        >>> features = model.extract_features(x)
        >>> print(features.shape)  # (32, 32)
    """

    def __init__(self, input_channels: int = 1, output_size: int = 4, base_channels: int = 32):
        super().__init__()

        self.base_channels = base_channels
        init_channels = base_channels * 2

        self.init_block = KanResInit(input_channels, init_channels, base_channels, 8, 3, 1)
        self.pool = nn.AvgPool1d(kernel_size=2)

        self.res_modules = nn.ModuleList(
            [
                KanResModule(base_channels, base_channels * 2, base_channels, 50, 50, 1)
                for _ in range(8)
            ]
        )

        self.global_pool = nn.AdaptiveAvgPool1d(1)
        self._feature_dim = base_channels
        self.fc = nn.Linear(base_channels, output_size)

    @property
    def feature_dim(self) -> int:
        return self._feature_dim

    def extract_features(self, x: torch.Tensor) -> torch.Tensor:
        x = self.init_block(x)
        x = self.pool(x)

        for res_module in self.res_modules:
            x = res_module(x)

        x = self.global_pool(x)
        x = x.squeeze(-1)
        return x

    def forward(self, x):
        x = self.extract_features(x)
        x = self.fc(x)
        return x

DeepResCNN

Bases: Module

Deep Residual 2D CNN for ECG classification.

Faithful implementation of Elyamani et al. (2022). Uses Conv2d with (1, k) kernels and valid padding to process each ECG lead independently along the time axis, then fuses across leads with a (leads, 1) convolution at the end.

Input convention: (batch, leads, time) -- standard deepecg-kit format. Internally reshaped to (batch, 1, leads, time) for 2D convolution.

The classifier head includes L2 regularization matching the original Keras model. Call l2_regularization_loss() to obtain the penalty term.

Reference

https://github.com/HaneenElyamani/ECG-classification

Parameters:

Name Type Description Default
input_channels int

Number of ECG leads (default: 12)

12
output_size int

Number of output classes (default: 5)

5
dropout_rate float

Dropout probability in residual blocks (default: 0.1)

0.1
Example

model = DeepResCNN(input_channels=12, output_size=5) x = torch.randn(32, 12, 1000) output = model(x) print(output.shape)

features = model.extract_features(x) print(features.shape) # (32, 128)

Source code in deepecgkit/models/deep_res_cnn.py
@register_model(
    name="deep-res-cnn",
    description="Deep Residual 2D CNN (Elyamani et al. 2022) for multi-lead ECG classification",
)
class DeepResCNN(nn.Module):
    """
    Deep Residual 2D CNN for ECG classification.

    Faithful implementation of Elyamani et al. (2022). Uses Conv2d with (1, k)
    kernels and valid padding to process each ECG lead independently along the
    time axis, then fuses across leads with a (leads, 1) convolution at the end.

    Input convention: (batch, leads, time) -- standard deepecg-kit format.
    Internally reshaped to (batch, 1, leads, time) for 2D convolution.

    The classifier head includes L2 regularization matching the original Keras
    model. Call l2_regularization_loss() to obtain the penalty term.

    Reference:
        https://github.com/HaneenElyamani/ECG-classification

    Args:
        input_channels: Number of ECG leads (default: 12)
        output_size: Number of output classes (default: 5)
        dropout_rate: Dropout probability in residual blocks (default: 0.1)

    Example:
        >>> model = DeepResCNN(input_channels=12, output_size=5)
        >>> x = torch.randn(32, 12, 1000)
        >>> output = model(x)
        >>> print(output.shape)

        >>> features = model.extract_features(x)
        >>> print(features.shape)  # (32, 128)
    """

    def __init__(
        self,
        input_channels: int = 12,
        output_size: int = 5,
        dropout_rate: float = 0.1,
    ):
        super().__init__()

        self.stem_conv = nn.Conv2d(1, 32, kernel_size=(1, 7))
        self.stem_bn = nn.BatchNorm2d(32)

        self.block1 = StemResBlock2D(32, 64, dropout_rate=dropout_rate)
        self.block2 = PreActResBlock2D(64, 64, dropout_rate=dropout_rate)
        self.block3 = PreActResBlock2D(64, 128, dropout_rate=dropout_rate)
        self.block4 = PreActResBlock2D(128, 128, dropout_rate=dropout_rate)

        self.lead_fusion = nn.Conv2d(128, 128, kernel_size=(input_channels, 1))
        self.lead_fusion_bn = nn.BatchNorm2d(128)
        self.pool = nn.AdaptiveAvgPool2d(1)

        self._feature_dim = 128

        self.fc1 = nn.Linear(128, 128)
        self.fc1_bn = nn.BatchNorm1d(128)
        self.fc1_drop = nn.Dropout(dropout_rate)

        self.fc2 = nn.Linear(128, 64)
        self.fc2_bn = nn.BatchNorm1d(64)
        self.fc2_drop = nn.Dropout(0.15)

        self.fc_out = nn.Linear(64, output_size)

    @property
    def feature_dim(self) -> int:
        return self._feature_dim

    def l2_regularization_loss(self) -> torch.Tensor:
        """Return the L2 penalty for the classifier head weights.

        Matches the Keras kernel_regularizer=L2(lambda) on the two Dense layers.
        Add this to the training loss for full equivalence with the original model.
        """
        return 0.005 * self.fc1.weight.pow(2).sum() + 0.009 * self.fc2.weight.pow(2).sum()

    def extract_features(self, x: torch.Tensor) -> torch.Tensor:
        x = x.unsqueeze(1)

        x = self.stem_conv(x)
        x = self.stem_bn(x)
        x = func.relu(x)

        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)

        x = self.lead_fusion(x)
        x = self.lead_fusion_bn(x)
        x = func.relu(x)
        x = self.pool(x)
        return torch.flatten(x, 1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.extract_features(x)

        x = self.fc1(x)
        x = self.fc1_bn(x)
        x = func.relu(x)
        x = self.fc1_drop(x)

        x = self.fc2(x)
        x = self.fc2_bn(x)
        x = func.relu(x)
        x = self.fc2_drop(x)

        x = self.fc_out(x)
        return x

l2_regularization_loss

l2_regularization_loss() -> torch.Tensor

Return the L2 penalty for the classifier head weights.

Matches the Keras kernel_regularizer=L2(lambda) on the two Dense layers. Add this to the training loss for full equivalence with the original model.

Source code in deepecgkit/models/deep_res_cnn.py
def l2_regularization_loss(self) -> torch.Tensor:
    """Return the L2 penalty for the classifier head weights.

    Matches the Keras kernel_regularizer=L2(lambda) on the two Dense layers.
    Add this to the training loss for full equivalence with the original model.
    """
    return 0.005 * self.fc1.weight.pow(2).sum() + 0.009 * self.fc2.weight.pow(2).sum()

Modern CNN

ConvNeXtV21D

Bases: Module

ConvNeXtV2 adapted for 1D ECG signals.

A modern convolutional architecture using depthwise separable convolutions, LayerNorm, GELU activation, and Global Response Normalization (GRN).

Parameters:

Name Type Description Default
input_channels int

Number of input channels (default: 1 for single-lead ECG)

1
output_size int

Number of output classes

4
dims list | None

Channel dimensions for each stage (default: [64, 128, 256, 512])

None
depths list | None

Number of blocks per stage (default: [2, 2, 6, 2])

None
kernel_size int

Kernel size for depthwise convolutions (default: 7)

7
expansion_factor int

Expansion ratio for inverted bottleneck (default: 4)

4
dropout_rate float

Dropout probability (default: 0.3)

0.3
Example

model = ConvNeXtV21D(input_channels=1, output_size=4) x = torch.randn(32, 1, 3000) output = model(x) print(output.shape)

features = model.extract_features(x) print(features.shape) # (32, 512)

Source code in deepecgkit/models/convnext_v2_1d.py
@register_model(
    name="convnext-v2",
    description="Modern ConvNet with depthwise convolutions and Global Response Normalization for ECG",
)
class ConvNeXtV21D(nn.Module):
    """
    ConvNeXtV2 adapted for 1D ECG signals.

    A modern convolutional architecture using depthwise separable convolutions,
    LayerNorm, GELU activation, and Global Response Normalization (GRN).

    Args:
        input_channels: Number of input channels (default: 1 for single-lead ECG)
        output_size: Number of output classes
        dims: Channel dimensions for each stage (default: [64, 128, 256, 512])
        depths: Number of blocks per stage (default: [2, 2, 6, 2])
        kernel_size: Kernel size for depthwise convolutions (default: 7)
        expansion_factor: Expansion ratio for inverted bottleneck (default: 4)
        dropout_rate: Dropout probability (default: 0.3)

    Example:
        >>> model = ConvNeXtV21D(input_channels=1, output_size=4)
        >>> x = torch.randn(32, 1, 3000)
        >>> output = model(x)
        >>> print(output.shape)

        >>> features = model.extract_features(x)
        >>> print(features.shape)  # (32, 512)
    """

    def __init__(
        self,
        input_channels: int = 1,
        output_size: int = 4,
        dims: list | None = None,
        depths: list | None = None,
        kernel_size: int = 7,
        expansion_factor: int = 4,
        dropout_rate: float = 0.3,
    ):
        super().__init__()

        if dims is None:
            dims = [64, 128, 256, 512]
        if depths is None:
            depths = [2, 2, 6, 2]

        self.stem = nn.Sequential(
            nn.Conv1d(input_channels, dims[0], kernel_size=7, stride=4, padding=3),
            LayerNorm1d(dims[0]),
        )

        stages = []
        for i in range(len(dims)):
            stage_blocks = []
            for _ in range(depths[i]):
                stage_blocks.append(ConvNeXtV2Block1D(dims[i], expansion_factor, kernel_size))
            stages.append(nn.Sequential(*stage_blocks))

        self.stages = nn.ModuleList(stages)

        self.downsamples = nn.ModuleList()
        for i in range(len(dims) - 1):
            self.downsamples.append(
                nn.Sequential(
                    LayerNorm1d(dims[i]),
                    nn.Conv1d(dims[i], dims[i + 1], kernel_size=2, stride=2),
                )
            )

        self.pool = nn.AdaptiveAvgPool1d(1)
        self.final_norm = nn.LayerNorm(dims[-1])
        self.dropout = nn.Dropout(dropout_rate)
        self._feature_dim = dims[-1]

        self.classifier = nn.Linear(dims[-1], output_size)

    @property
    def feature_dim(self) -> int:
        return self._feature_dim

    def extract_features(self, x: torch.Tensor) -> torch.Tensor:
        x = self.stem(x)
        for i, stage in enumerate(self.stages):
            x = stage(x)
            if i < len(self.downsamples):
                x = self.downsamples[i](x)
        x = self.pool(x)
        x = torch.flatten(x, 1)
        x = self.final_norm(x)
        return x

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.extract_features(x)
        x = self.dropout(x)
        x = self.classifier(x)
        return x

InceptionTime1D

Bases: Module

InceptionTime model adapted for 1D ECG signal classification.

Uses parallel convolutions at multiple temporal scales (short/medium/long kernels) with residual connections to capture both rapid arrhythmia spikes and slow rhythm patterns simultaneously.

Parameters:

Name Type Description Default
input_channels int

Number of input channels (default: 1 for single-lead ECG)

1
output_size int

Number of output classes

4
n_filters int

Number of filters per Inception branch (default: 32)

32
depth int

Number of Inception residual blocks (default: 6)

6
kernel_sizes tuple[int, ...]

Tuple of kernel sizes for multi-scale branches

(5, 15, 41)
bottleneck_channels int

Channels in bottleneck layers (default: 32)

32
dropout_rate float

Dropout probability (default: 0.3)

0.3
Example

model = InceptionTime1D(input_channels=1, output_size=4) x = torch.randn(32, 1, 3000) output = model(x) print(output.shape)

features = model.extract_features(x) print(features.shape) # (32, 128) with n_filters=32

Source code in deepecgkit/models/inception_time.py
@register_model(
    name="inception-time",
    description="Multi-scale temporal CNN inspired by InceptionTime",
)
class InceptionTime1D(nn.Module):
    """
    InceptionTime model adapted for 1D ECG signal classification.

    Uses parallel convolutions at multiple temporal scales (short/medium/long
    kernels) with residual connections to capture both rapid arrhythmia spikes
    and slow rhythm patterns simultaneously.

    Args:
        input_channels: Number of input channels (default: 1 for single-lead ECG)
        output_size: Number of output classes
        n_filters: Number of filters per Inception branch (default: 32)
        depth: Number of Inception residual blocks (default: 6)
        kernel_sizes: Tuple of kernel sizes for multi-scale branches
        bottleneck_channels: Channels in bottleneck layers (default: 32)
        dropout_rate: Dropout probability (default: 0.3)

    Example:
        >>> model = InceptionTime1D(input_channels=1, output_size=4)
        >>> x = torch.randn(32, 1, 3000)
        >>> output = model(x)
        >>> print(output.shape)

        >>> features = model.extract_features(x)
        >>> print(features.shape)  # (32, 128) with n_filters=32
    """

    def __init__(
        self,
        input_channels: int = 1,
        output_size: int = 4,
        n_filters: int = 32,
        depth: int = 6,
        kernel_sizes: tuple[int, ...] = (5, 15, 41),
        bottleneck_channels: int = 32,
        dropout_rate: float = 0.3,
    ):
        super().__init__()

        n_branches = len(kernel_sizes) + 1
        block_out_channels = n_filters * n_branches

        blocks = []
        for i in range(depth):
            in_ch = input_channels if i == 0 else block_out_channels
            blocks.append(
                InceptionResidualBlock1D(
                    in_channels=in_ch,
                    n_filters=n_filters,
                    kernel_sizes=kernel_sizes,
                    bottleneck_channels=bottleneck_channels,
                )
            )

        self.blocks = nn.Sequential(*blocks)
        self.global_pool = nn.AdaptiveAvgPool1d(1)
        self.dropout = nn.Dropout(dropout_rate)
        self._feature_dim = block_out_channels

        self.classifier = nn.Linear(block_out_channels, output_size)

    @property
    def feature_dim(self) -> int:
        return self._feature_dim

    def extract_features(self, x: torch.Tensor) -> torch.Tensor:
        x = self.blocks(x)
        x = self.global_pool(x)
        x = torch.flatten(x, 1)
        return x

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.extract_features(x)
        x = self.dropout(x)
        x = self.classifier(x)
        return x

TCN

Bases: Module

Temporal Convolutional Network for ECG signal classification.

Uses stacked dilated causal convolutions with exponentially growing receptive fields to efficiently model long-range dependencies without recurrence. The causal structure makes it suitable for real-time ECG monitoring applications.

Parameters:

Name Type Description Default
input_channels int

Number of input channels (default: 1 for single-lead ECG)

1
output_size int

Number of output classes

4
num_channels list | None

List of channel sizes per temporal block (default: [64, 64, 128, 128, 256, 256])

None
kernel_size int

Convolution kernel size (default: 7)

7
dropout_rate float

Dropout probability (default: 0.2)

0.2
Example

model = TCN(input_channels=1, output_size=4) x = torch.randn(32, 1, 3000) output = model(x) print(output.shape)

features = model.extract_features(x) print(features.shape) # (32, 256) with default num_channels

Source code in deepecgkit/models/tcn.py
@register_model(
    name="tcn",
    description="Temporal Convolutional Network with dilated causal convolutions",
)
class TCN(nn.Module):
    """
    Temporal Convolutional Network for ECG signal classification.

    Uses stacked dilated causal convolutions with exponentially growing
    receptive fields to efficiently model long-range dependencies without
    recurrence. The causal structure makes it suitable for real-time
    ECG monitoring applications.

    Args:
        input_channels: Number of input channels (default: 1 for single-lead ECG)
        output_size: Number of output classes
        num_channels: List of channel sizes per temporal block
            (default: [64, 64, 128, 128, 256, 256])
        kernel_size: Convolution kernel size (default: 7)
        dropout_rate: Dropout probability (default: 0.2)

    Example:
        >>> model = TCN(input_channels=1, output_size=4)
        >>> x = torch.randn(32, 1, 3000)
        >>> output = model(x)
        >>> print(output.shape)

        >>> features = model.extract_features(x)
        >>> print(features.shape)  # (32, 256) with default num_channels
    """

    def __init__(
        self,
        input_channels: int = 1,
        output_size: int = 4,
        num_channels: list | None = None,
        kernel_size: int = 7,
        dropout_rate: float = 0.2,
    ):
        super().__init__()

        if num_channels is None:
            num_channels = [64, 64, 128, 128, 256, 256]

        blocks = []
        for i, out_ch in enumerate(num_channels):
            in_ch = input_channels if i == 0 else num_channels[i - 1]
            dilation = 2**i
            blocks.append(
                TemporalBlock(
                    in_channels=in_ch,
                    out_channels=out_ch,
                    kernel_size=kernel_size,
                    dilation=dilation,
                    dropout_rate=dropout_rate,
                )
            )

        self.network = nn.Sequential(*blocks)
        self.global_pool = nn.AdaptiveAvgPool1d(1)
        self._feature_dim = num_channels[-1]
        self.dropout = nn.Dropout(dropout_rate)
        self.classifier = nn.Linear(self._feature_dim, output_size)

    @property
    def feature_dim(self) -> int:
        return self._feature_dim

    def extract_features(self, x: torch.Tensor) -> torch.Tensor:
        x = self.network(x)
        x = self.global_pool(x)
        x = torch.flatten(x, 1)
        return x

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.extract_features(x)
        x = self.dropout(x)
        x = self.classifier(x)
        return x

Recurrent Networks

CRNN

Bases: Module

Convolutional Recurrent Neural Network for ECG classification.

Uses a CNN front-end for local feature extraction followed by a bidirectional LSTM for temporal aggregation, combining the strengths of both architectures.

Parameters:

Name Type Description Default
input_channels int

Number of input channels (default: 1 for single-lead ECG)

1
output_size int

Number of output classes

4
cnn_channels list | None

List of channel sizes for CNN stages (default: [32, 64, 128, 256])

None
lstm_hidden_size int

Size of LSTM hidden state (default: 128)

128
lstm_num_layers int

Number of LSTM layers (default: 2)

2
bidirectional bool

Use bidirectional LSTM (default: True)

True
dropout_rate float

Dropout probability (default: 0.3)

0.3
Example

model = CRNN(input_channels=1, output_size=4) x = torch.randn(32, 1, 3000) output = model(x) print(output.shape)

features = model.extract_features(x) print(features.shape) # (32, 256)

Source code in deepecgkit/models/crnn.py
@register_model(
    name="crnn",
    description="CNN-LSTM hybrid for local feature extraction and temporal aggregation",
)
class CRNN(nn.Module):
    """
    Convolutional Recurrent Neural Network for ECG classification.

    Uses a CNN front-end for local feature extraction followed by a
    bidirectional LSTM for temporal aggregation, combining the strengths
    of both architectures.

    Args:
        input_channels: Number of input channels (default: 1 for single-lead ECG)
        output_size: Number of output classes
        cnn_channels: List of channel sizes for CNN stages (default: [32, 64, 128, 256])
        lstm_hidden_size: Size of LSTM hidden state (default: 128)
        lstm_num_layers: Number of LSTM layers (default: 2)
        bidirectional: Use bidirectional LSTM (default: True)
        dropout_rate: Dropout probability (default: 0.3)

    Example:
        >>> model = CRNN(input_channels=1, output_size=4)
        >>> x = torch.randn(32, 1, 3000)
        >>> output = model(x)
        >>> print(output.shape)

        >>> features = model.extract_features(x)
        >>> print(features.shape)  # (32, 256)
    """

    def __init__(
        self,
        input_channels: int = 1,
        output_size: int = 4,
        cnn_channels: list | None = None,
        lstm_hidden_size: int = 128,
        lstm_num_layers: int = 2,
        bidirectional: bool = True,
        dropout_rate: float = 0.3,
    ):
        super().__init__()

        if cnn_channels is None:
            cnn_channels = [32, 64, 128, 256]

        self.lstm_hidden_size = lstm_hidden_size
        self.lstm_num_layers = lstm_num_layers
        self.bidirectional = bidirectional
        self.num_directions = 2 if bidirectional else 1

        layers = []
        in_ch = input_channels
        kernel_sizes = [7, 5, 3, 3]
        for i, out_ch in enumerate(cnn_channels):
            ks = kernel_sizes[i] if i < len(kernel_sizes) else 3
            layers.extend(
                [
                    nn.Conv1d(in_ch, out_ch, kernel_size=ks, padding=ks // 2),
                    nn.BatchNorm1d(out_ch),
                    nn.ReLU(),
                    nn.MaxPool1d(kernel_size=2),
                    nn.Dropout(dropout_rate),
                ]
            )
            in_ch = out_ch

        self.cnn = nn.Sequential(*layers)

        self.lstm = nn.LSTM(
            input_size=cnn_channels[-1],
            hidden_size=lstm_hidden_size,
            num_layers=lstm_num_layers,
            batch_first=True,
            dropout=dropout_rate if lstm_num_layers > 1 else 0,
            bidirectional=bidirectional,
        )

        self.dropout = nn.Dropout(dropout_rate)
        self._feature_dim = lstm_hidden_size * self.num_directions

        self.classifier = nn.Sequential(
            nn.Linear(self._feature_dim, 64),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(64, output_size),
        )

    @property
    def feature_dim(self) -> int:
        return self._feature_dim

    def extract_features(self, x: torch.Tensor) -> torch.Tensor:
        batch_size = x.size(0)
        x = self.cnn(x)
        x = x.transpose(1, 2)
        _, (h_n, _) = self.lstm(x)

        if self.bidirectional:
            h_n = h_n.view(self.lstm_num_layers, 2, batch_size, self.lstm_hidden_size)
            h_n = h_n[-1]
            h_n = torch.cat([h_n[0], h_n[1]], dim=1)
        else:
            h_n = h_n[-1]

        return h_n

    def forward(self, x):
        x = self.extract_features(x)
        x = self.dropout(x)
        return self.classifier(x)

GRUECG

Bases: Module

GRU-based model for ECG signal classification.

A recurrent neural network using GRU layers with concat pooling (adaptive avg + adaptive max + last hidden state) for ECG classification. Based on the RNN architecture from the PTB-XL benchmark.

Reference

https://github.com/helme/ecg_ptbxl_benchmarking

Parameters:

Name Type Description Default
input_channels int

Number of input channels (default: 12 for 12-lead ECG)

12
output_size int

Number of output classes

5
hidden_size int

Size of GRU hidden state (default: 256)

256
num_layers int

Number of GRU layers (default: 2)

2
dropout_rate float

Dropout probability (default: 0.3)

0.3
bidirectional bool

Use bidirectional GRU (default: False)

False
Example

model = GRUECG(input_channels=12, output_size=5) x = torch.randn(32, 12, 1000) output = model(x) print(output.shape)

features = model.extract_features(x) print(features.shape) # (32, 768) with hidden_size=256, unidirectional

Source code in deepecgkit/models/gru_model.py
@register_model(
    name="gru",
    description="GRU-based model for sequential ECG analysis",
)
class GRUECG(nn.Module):
    """
    GRU-based model for ECG signal classification.

    A recurrent neural network using GRU layers with concat pooling
    (adaptive avg + adaptive max + last hidden state) for ECG classification.
    Based on the RNN architecture from the PTB-XL benchmark.

    Reference:
        https://github.com/helme/ecg_ptbxl_benchmarking

    Args:
        input_channels: Number of input channels (default: 12 for 12-lead ECG)
        output_size: Number of output classes
        hidden_size: Size of GRU hidden state (default: 256)
        num_layers: Number of GRU layers (default: 2)
        dropout_rate: Dropout probability (default: 0.3)
        bidirectional: Use bidirectional GRU (default: False)

    Example:
        >>> model = GRUECG(input_channels=12, output_size=5)
        >>> x = torch.randn(32, 12, 1000)
        >>> output = model(x)
        >>> print(output.shape)

        >>> features = model.extract_features(x)
        >>> print(features.shape)  # (32, 768) with hidden_size=256, unidirectional
    """

    def __init__(
        self,
        input_channels: int = 12,
        output_size: int = 5,
        hidden_size: int = 256,
        num_layers: int = 2,
        dropout_rate: float = 0.3,
        bidirectional: bool = False,
    ):
        super().__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.num_directions = 2 if bidirectional else 1

        self.gru = nn.GRU(
            input_size=input_channels,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout_rate if num_layers > 1 else 0,
            bidirectional=bidirectional,
        )

        rnn_out_dim = hidden_size * self.num_directions
        self._feature_dim = rnn_out_dim * 3

        self.head = nn.Sequential(
            nn.BatchNorm1d(self._feature_dim),
            nn.Dropout(dropout_rate / 2),
            nn.Linear(self._feature_dim, output_size),
        )

    @property
    def feature_dim(self) -> int:
        return self._feature_dim

    def _concat_pool(self, rnn_output: torch.Tensor) -> torch.Tensor:
        x = rnn_output.transpose(1, 2)
        avg_pool = torch.mean(x, dim=2)
        max_pool, _ = torch.max(x, dim=2)

        if self.bidirectional:
            last = torch.cat(
                [rnn_output[:, -1, : self.hidden_size], rnn_output[:, 0, self.hidden_size :]], dim=1
            )
        else:
            last = rnn_output[:, -1, :]

        return torch.cat([avg_pool, max_pool, last], dim=1)

    def extract_features(self, x: torch.Tensor) -> torch.Tensor:
        x = x.transpose(1, 2)
        output, _ = self.gru(x)
        return self._concat_pool(output)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.extract_features(x)
        return self.head(x)

LSTMECG

Bases: Module

LSTM-based model for ECG signal classification.

A recurrent neural network using bidirectional LSTM layers for temporal pattern recognition in ECG signals.

Parameters:

Name Type Description Default
input_channels int

Number of input channels (default: 1 for single-lead ECG)

1
output_size int

Number of output classes

4
hidden_size int

Size of LSTM hidden state (default: 128)

128
num_layers int

Number of LSTM layers (default: 2)

2
dropout_rate float

Dropout probability (default: 0.3)

0.3
bidirectional bool

Use bidirectional LSTM (default: True)

True
Example

model = LSTMECG(input_channels=1, output_size=4) x = torch.randn(32, 1, 3000) output = model(x) print(output.shape)

features = model.extract_features(x) print(features.shape) # (32, 256) with bidirectional=True

Source code in deepecgkit/models/lstm_model.py
@register_model(
    name="lstm",
    description="LSTM-based model for sequential ECG analysis",
)
class LSTMECG(nn.Module):
    """
    LSTM-based model for ECG signal classification.

    A recurrent neural network using bidirectional LSTM layers
    for temporal pattern recognition in ECG signals.

    Args:
        input_channels: Number of input channels (default: 1 for single-lead ECG)
        output_size: Number of output classes
        hidden_size: Size of LSTM hidden state (default: 128)
        num_layers: Number of LSTM layers (default: 2)
        dropout_rate: Dropout probability (default: 0.3)
        bidirectional: Use bidirectional LSTM (default: True)

    Example:
        >>> model = LSTMECG(input_channels=1, output_size=4)
        >>> x = torch.randn(32, 1, 3000)
        >>> output = model(x)
        >>> print(output.shape)

        >>> features = model.extract_features(x)
        >>> print(features.shape)  # (32, 256) with bidirectional=True
    """

    def __init__(
        self,
        input_channels: int = 1,
        output_size: int = 4,
        hidden_size: int = 128,
        num_layers: int = 2,
        dropout_rate: float = 0.3,
        bidirectional: bool = True,
    ):
        super().__init__()

        self.input_channels = input_channels
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.num_directions = 2 if bidirectional else 1

        self.lstm = nn.LSTM(
            input_size=input_channels,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout_rate if num_layers > 1 else 0,
            bidirectional=bidirectional,
        )

        self.dropout = nn.Dropout(dropout_rate)
        self._feature_dim = hidden_size * self.num_directions

        self.classifier = nn.Sequential(
            nn.Linear(self._feature_dim, 64),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(64, output_size),
        )

    @property
    def feature_dim(self) -> int:
        return self._feature_dim

    def _extract_lstm_features(self, x: torch.Tensor) -> torch.Tensor:
        batch_size = x.size(0)
        x = x.transpose(1, 2)
        _, (h_n, _) = self.lstm(x)

        if self.bidirectional:
            h_n = h_n.view(self.num_layers, 2, batch_size, self.hidden_size)
            h_n = h_n[-1]
            h_n = torch.cat([h_n[0], h_n[1]], dim=1)
        else:
            h_n = h_n[-1]

        return h_n

    def extract_features(self, x: torch.Tensor) -> torch.Tensor:
        return self._extract_lstm_features(x)

    def forward(self, x):
        x = self.extract_features(x)
        x = self.dropout(x)
        return self.classifier(x)

Transformers & Attention

TransformerECG

Bases: Module

Transformer-based model for ECG signal classification.

A transformer architecture that uses self-attention mechanisms to capture long-range dependencies in ECG signals.

Parameters:

Name Type Description Default
input_channels int

Number of input channels (default: 1 for single-lead ECG)

1
output_size int

Number of output classes

4
d_model int

Dimension of the model (default: 128)

128
nhead int

Number of attention heads (default: 8)

8
num_encoder_layers int

Number of transformer encoder layers (default: 4)

4
dim_feedforward int

Dimension of feedforward network (default: 512)

512
dropout_rate float

Dropout probability (default: 0.1)

0.1
max_len int

Maximum sequence length (default: 5000)

5000
Example

model = TransformerECG(input_channels=1, output_size=4) x = torch.randn(32, 1, 3000) output = model(x) print(output.shape)

features = model.extract_features(x) print(features.shape) # (32, 128) with d_model=128

Source code in deepecgkit/models/transformer_ecg.py
@register_model(
    name="transformer",
    description="Transformer-based ECG classifier",
)
class TransformerECG(nn.Module):
    """
    Transformer-based model for ECG signal classification.

    A transformer architecture that uses self-attention mechanisms
    to capture long-range dependencies in ECG signals.

    Args:
        input_channels: Number of input channels (default: 1 for single-lead ECG)
        output_size: Number of output classes
        d_model: Dimension of the model (default: 128)
        nhead: Number of attention heads (default: 8)
        num_encoder_layers: Number of transformer encoder layers (default: 4)
        dim_feedforward: Dimension of feedforward network (default: 512)
        dropout_rate: Dropout probability (default: 0.1)
        max_len: Maximum sequence length (default: 5000)

    Example:
        >>> model = TransformerECG(input_channels=1, output_size=4)
        >>> x = torch.randn(32, 1, 3000)
        >>> output = model(x)
        >>> print(output.shape)

        >>> features = model.extract_features(x)
        >>> print(features.shape)  # (32, 128) with d_model=128
    """

    def __init__(
        self,
        input_channels: int = 1,
        output_size: int = 4,
        d_model: int = 128,
        nhead: int = 8,
        num_encoder_layers: int = 4,
        dim_feedforward: int = 512,
        dropout_rate: float = 0.1,
        max_len: int = 5000,
    ):
        super().__init__()

        self.d_model = d_model

        self.input_projection = nn.Linear(input_channels, d_model)

        self.positional_encoding = PositionalEncoding(d_model, max_len, dropout_rate)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout_rate,
            batch_first=True,
        )

        self.transformer_encoder = nn.TransformerEncoder(
            encoder_layer,
            num_layers=num_encoder_layers,
        )

        self.global_pool = nn.AdaptiveAvgPool1d(1)
        self._feature_dim = d_model

        self.classifier = nn.Sequential(
            nn.Linear(d_model, dim_feedforward // 2),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(dim_feedforward // 2, output_size),
        )

    @property
    def feature_dim(self) -> int:
        return self._feature_dim

    def extract_features(self, x: torch.Tensor) -> torch.Tensor:
        x = x.transpose(1, 2)
        x = self.input_projection(x)
        x = self.positional_encoding(x)
        x = self.transformer_encoder(x)
        x = x.transpose(1, 2)
        x = self.global_pool(x)
        x = x.squeeze(-1)
        return x

    def forward(self, x):
        x = self.extract_features(x)
        return self.classifier(x)

Medformer

Bases: Module

Medformer: Multi-Granularity Patching Transformer for medical time series.

Uses multiple patch sizes to capture fine, medium, and coarse temporal patterns, with intra-granularity self-attention and inter-granularity cross-attention for information fusion.

Parameters:

Name Type Description Default
input_channels int

Number of input channels (default: 1 for single-lead ECG)

1
output_size int

Number of output classes

4
d_model int

Transformer model dimension (default: 128)

128
patch_sizes tuple[int, ...]

Tuple of patch sizes for different granularities (default: (10, 25, 50))

(10, 25, 50)
num_encoder_layers int

Number of encoder layers (default: 2)

2
nhead int

Number of attention heads (default: 8)

8
dim_feedforward int

Feedforward dimension (default: 256)

256
dropout_rate float

Dropout probability (default: 0.1)

0.1
max_patches int

Maximum number of patches per granularity (default: 500)

500
Example

model = Medformer(input_channels=1, output_size=4) x = torch.randn(32, 1, 3000) output = model(x) print(output.shape)

features = model.extract_features(x) print(features.shape) # (32, 384)

Source code in deepecgkit/models/medformer.py
@register_model(
    name="medformer",
    description="Multi-granularity patching Transformer for medical time series (NeurIPS 2024)",
)
class Medformer(nn.Module):
    """
    Medformer: Multi-Granularity Patching Transformer for medical time series.

    Uses multiple patch sizes to capture fine, medium, and coarse temporal
    patterns, with intra-granularity self-attention and inter-granularity
    cross-attention for information fusion.

    Args:
        input_channels: Number of input channels (default: 1 for single-lead ECG)
        output_size: Number of output classes
        d_model: Transformer model dimension (default: 128)
        patch_sizes: Tuple of patch sizes for different granularities (default: (10, 25, 50))
        num_encoder_layers: Number of encoder layers (default: 2)
        nhead: Number of attention heads (default: 8)
        dim_feedforward: Feedforward dimension (default: 256)
        dropout_rate: Dropout probability (default: 0.1)
        max_patches: Maximum number of patches per granularity (default: 500)

    Example:
        >>> model = Medformer(input_channels=1, output_size=4)
        >>> x = torch.randn(32, 1, 3000)
        >>> output = model(x)
        >>> print(output.shape)

        >>> features = model.extract_features(x)
        >>> print(features.shape)  # (32, 384)
    """

    def __init__(
        self,
        input_channels: int = 1,
        output_size: int = 4,
        d_model: int = 128,
        patch_sizes: tuple[int, ...] = (10, 25, 50),
        num_encoder_layers: int = 2,
        nhead: int = 8,
        dim_feedforward: int = 256,
        dropout_rate: float = 0.1,
        max_patches: int = 500,
    ):
        super().__init__()

        self.patch_sizes = patch_sizes
        self.d_model = d_model
        num_granularities = len(patch_sizes)

        self.patch_embeddings = nn.ModuleList(
            [PatchEmbedding1D(input_channels, d_model, ps) for ps in patch_sizes]
        )

        self.pos_embeddings = nn.ParameterList(
            [nn.Parameter(torch.randn(1, max_patches, d_model) * 0.02) for _ in patch_sizes]
        )

        self.cross_channel_attn = CrossChannelAttention(
            d_model,
            nhead=min(4, nhead),
            dropout=dropout_rate,
        )

        self.intra_blocks = nn.ModuleList()
        self.inter_blocks = nn.ModuleList()
        for _ in range(num_encoder_layers):
            self.intra_blocks.append(
                nn.ModuleList(
                    [
                        IntraGranularityBlock(d_model, nhead, dim_feedforward, dropout_rate)
                        for _ in patch_sizes
                    ]
                )
            )
            self.inter_blocks.append(
                InterGranularityAttention(d_model, num_granularities, min(4, nhead), dropout_rate)
            )

        self.dropout = nn.Dropout(dropout_rate)
        self._feature_dim = d_model * num_granularities

        self.classifier = nn.Linear(self._feature_dim, output_size)

    @property
    def feature_dim(self) -> int:
        return self._feature_dim

    def extract_features(self, x: torch.Tensor) -> torch.Tensor:
        granularity_features = []
        for patch_emb, pos_emb in zip(self.patch_embeddings, self.pos_embeddings):
            patches = patch_emb(x)
            num_patches = patches.size(1)
            patches = patches + pos_emb[:, :num_patches]
            granularity_features.append(patches)

        for intra_layer, inter_layer in zip(self.intra_blocks, self.inter_blocks):
            for i, intra_block in enumerate(intra_layer):
                granularity_features[i] = intra_block(granularity_features[i])
            granularity_features = inter_layer(granularity_features)

        pooled = [f.mean(dim=1) for f in granularity_features]
        return torch.cat(pooled, dim=1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.extract_features(x)
        x = self.dropout(x)
        x = self.classifier(x)
        return x

Hybrid & State Space

ECGDualNet

Bases: Module

Dual-path ECG classification network.

Runs a CNN-LSTM branch and a Transformer branch in parallel, then fuses their outputs via concatenation and a fusion head.

Parameters:

Name Type Description Default
input_channels int

Number of input channels (default: 1 for single-lead ECG)

1
output_size int

Number of output classes

4
cnn_channels int

CNN output channels in CNN-LSTM branch (default: 128)

128
lstm_hidden int

LSTM hidden size in CNN-LSTM branch (default: 128)

128
lstm_layers int

Number of LSTM layers (default: 1)

1
d_model int

Transformer model dimension (default: 128)

128
nhead int

Number of attention heads (default: 8)

8
transformer_layers int

Number of transformer encoder layers (default: 2)

2
dim_feedforward int

Transformer feedforward dimension (default: 256)

256
dropout_rate float

Dropout probability (default: 0.3)

0.3
Example

model = ECGDualNet(input_channels=1, output_size=4) x = torch.randn(32, 1, 3000) output = model(x) print(output.shape)

features = model.extract_features(x) print(features.shape) # (32, 384)

Source code in deepecgkit/models/ecg_dualnet.py
@register_model(
    name="dualnet",
    description="Dual-path architecture with CNN-LSTM and Transformer branches for ECG",
)
class ECGDualNet(nn.Module):
    """
    Dual-path ECG classification network.

    Runs a CNN-LSTM branch and a Transformer branch in parallel,
    then fuses their outputs via concatenation and a fusion head.

    Args:
        input_channels: Number of input channels (default: 1 for single-lead ECG)
        output_size: Number of output classes
        cnn_channels: CNN output channels in CNN-LSTM branch (default: 128)
        lstm_hidden: LSTM hidden size in CNN-LSTM branch (default: 128)
        lstm_layers: Number of LSTM layers (default: 1)
        d_model: Transformer model dimension (default: 128)
        nhead: Number of attention heads (default: 8)
        transformer_layers: Number of transformer encoder layers (default: 2)
        dim_feedforward: Transformer feedforward dimension (default: 256)
        dropout_rate: Dropout probability (default: 0.3)

    Example:
        >>> model = ECGDualNet(input_channels=1, output_size=4)
        >>> x = torch.randn(32, 1, 3000)
        >>> output = model(x)
        >>> print(output.shape)

        >>> features = model.extract_features(x)
        >>> print(features.shape)  # (32, 384)
    """

    def __init__(
        self,
        input_channels: int = 1,
        output_size: int = 4,
        cnn_channels: int = 128,
        lstm_hidden: int = 128,
        lstm_layers: int = 1,
        d_model: int = 128,
        nhead: int = 8,
        transformer_layers: int = 2,
        dim_feedforward: int = 256,
        dropout_rate: float = 0.3,
    ):
        super().__init__()

        self.cnn_lstm_branch = CNNLSTMBranch(
            input_channels=input_channels,
            cnn_channels=cnn_channels,
            lstm_hidden=lstm_hidden,
            lstm_layers=lstm_layers,
            dropout_rate=dropout_rate,
        )

        self.transformer_branch = TransformerBranch(
            input_channels=input_channels,
            d_model=d_model,
            nhead=nhead,
            num_layers=transformer_layers,
            dim_feedforward=dim_feedforward,
            dropout_rate=dropout_rate,
        )

        self._feature_dim = self.cnn_lstm_branch.output_dim + self.transformer_branch.output_dim

        self.classifier = nn.Sequential(
            nn.Linear(self._feature_dim, 128),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(128, output_size),
        )

    @property
    def feature_dim(self) -> int:
        return self._feature_dim

    def extract_features(self, x: torch.Tensor) -> torch.Tensor:
        cnn_lstm_out = self.cnn_lstm_branch(x)
        transformer_out = self.transformer_branch(x)
        return torch.cat([cnn_lstm_out, transformer_out], dim=1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.extract_features(x)
        return self.classifier(x)

Mamba1D

Bases: Module

Bidirectional Mamba model for ECG classification.

Uses selective state space models with linear complexity as an alternative to Transformer self-attention. The SSM is implemented from scratch in pure PyTorch with no external dependencies.

Parameters:

Name Type Description Default
input_channels int

Number of input channels (default: 1 for single-lead ECG)

1
output_size int

Number of output classes

4
d_model int

Model dimension (default: 128)

128
d_state int

State space dimension (default: 16)

16
d_conv int

Local convolution width (default: 4)

4
expansion_factor int

Inner dimension expansion (default: 2)

2
num_layers int

Number of bidirectional Mamba layers (default: 4)

4
patch_size int

Patch size for input tokenization (default: 50)

50
dropout_rate float

Dropout probability (default: 0.1)

0.1
max_patches int

Maximum number of patches (default: 500)

500
Example

model = Mamba1D(input_channels=1, output_size=4) x = torch.randn(32, 1, 3000) output = model(x) print(output.shape)

features = model.extract_features(x) print(features.shape) # (32, 128)

Source code in deepecgkit/models/mamba1d.py
@register_model(
    name="mamba",
    description="Bidirectional Mamba state space model with linear complexity for ECG",
)
class Mamba1D(nn.Module):
    """
    Bidirectional Mamba model for ECG classification.

    Uses selective state space models with linear complexity as an
    alternative to Transformer self-attention. The SSM is implemented
    from scratch in pure PyTorch with no external dependencies.

    Args:
        input_channels: Number of input channels (default: 1 for single-lead ECG)
        output_size: Number of output classes
        d_model: Model dimension (default: 128)
        d_state: State space dimension (default: 16)
        d_conv: Local convolution width (default: 4)
        expansion_factor: Inner dimension expansion (default: 2)
        num_layers: Number of bidirectional Mamba layers (default: 4)
        patch_size: Patch size for input tokenization (default: 50)
        dropout_rate: Dropout probability (default: 0.1)
        max_patches: Maximum number of patches (default: 500)

    Example:
        >>> model = Mamba1D(input_channels=1, output_size=4)
        >>> x = torch.randn(32, 1, 3000)
        >>> output = model(x)
        >>> print(output.shape)

        >>> features = model.extract_features(x)
        >>> print(features.shape)  # (32, 128)
    """

    def __init__(
        self,
        input_channels: int = 1,
        output_size: int = 4,
        d_model: int = 128,
        d_state: int = 16,
        d_conv: int = 4,
        expansion_factor: int = 2,
        num_layers: int = 4,
        patch_size: int = 50,
        dropout_rate: float = 0.1,
        max_patches: int = 500,
    ):
        super().__init__()

        self.patch_embed = nn.Conv1d(
            input_channels,
            d_model,
            kernel_size=patch_size,
            stride=patch_size,
        )
        self.pos_embedding = nn.Parameter(torch.randn(1, max_patches, d_model) * 0.02)

        self.layers = nn.ModuleList(
            [
                BidirectionalMamba(d_model, d_state, d_conv, expansion_factor)
                for _ in range(num_layers)
            ]
        )

        self.norm = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout_rate)
        self._feature_dim = d_model

        self.classifier = nn.Linear(d_model, output_size)

    @property
    def feature_dim(self) -> int:
        return self._feature_dim

    def extract_features(self, x: torch.Tensor) -> torch.Tensor:
        x = self.patch_embed(x)
        x = x.transpose(1, 2)
        num_patches = x.size(1)
        x = x + self.pos_embedding[:, :num_patches]

        for layer in self.layers:
            x = layer(x)

        x = self.norm(x)
        x = x.mean(dim=1)
        return x

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.extract_features(x)
        x = self.dropout(x)
        x = self.classifier(x)
        return x