Skip to content

COSMICConfig

Configuration system for COSMIC.

Class Definition

@dataclass
class COSMICConfig:
    """
    Main configuration container for COSMIC.

    All configuration options can be set via:
    - Constructor arguments
    - YAML file loading
    - Environment variables
    """

Attributes

Attribute Type Default Description
dcs DCSConfig See below DCS weights
structure StructureConfig See below Structure analysis
embedding EmbeddingConfig See below Embedding model
llm LLMConfig See below LLM verification
reference ReferenceConfig See below Reference linking
fusion FusionConfig See below Boundary fusion
chunk_constraints ChunkConstraints See below Size constraints

Sub-Configurations

DCSConfig

Discourse Coherence Score weights.

@dataclass
class DCSConfig:
    alpha: float = 0.4      # Topical coherence weight
    beta: float = 0.35      # Coreference density weight
    gamma: float = 0.25     # Discourse marker weight
    threshold: float = 0.5  # Boundary detection threshold

ChunkConstraints

Chunk size limits.

@dataclass
class ChunkConstraints:
    min_tokens: int = 100   # Minimum chunk size
    max_tokens: int = 2000  # Maximum chunk size
    target_tokens: int = 500  # Target size

LLMConfig

LLM verification settings.

@dataclass
class LLMConfig:
    enabled: bool = True
    provider: str = "auto"       # "openai", "ollama", "auto"
    base_url: str = ""
    model_name: str = ""
    api_key: str = ""
    confidence_threshold: float = 0.8
    timeout_seconds: int = 30

EmbeddingConfig

Embedding model settings.

@dataclass
class EmbeddingConfig:
    model_name: str = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
    device: str = "cuda"
    batch_size: int = 64
    cache_size: int = 10000
    normalize: bool = True

FusionConfig

Boundary fusion weights.

@dataclass
class FusionConfig:
    structural_weight: float = 0.6
    semantic_weight: float = 0.4
    acceptance_threshold: float = 0.5

Class Methods

from_yaml

@classmethod
def from_yaml(cls, path: Path) -> COSMICConfig:
    """
    Load configuration from YAML file.

    Args:
        path: Path to YAML configuration file.

    Returns:
        COSMICConfig instance.

    Example:
        config = COSMICConfig.from_yaml(Path("configs/custom.yaml"))
    """

from_env

@classmethod
def from_env(cls) -> COSMICConfig:
    """
    Load configuration from environment variables.

    Environment variables:
        COSMIC_LLM_PROVIDER
        COSMIC_LLM_URL
        COSMIC_LLM_MODEL
        COSMIC_LLM_API_KEY
        COSMIC_EMBEDDING_DEVICE
        OLLAMA_HOST
        COSMIC_OLLAMA_MODEL

    Returns:
        COSMICConfig with environment overrides.
    """

Usage Examples

Default Configuration

from cosmic import COSMICConfig

config = COSMICConfig()  # All defaults

Custom DCS Weights

from cosmic import COSMICConfig
from cosmic.core.config import DCSConfig

config = COSMICConfig(
    dcs=DCSConfig(
        alpha=0.5,   # More weight on topical coherence
        beta=0.3,
        gamma=0.2,
        threshold=0.45,  # Lower threshold = more boundaries
    )
)

Custom Chunk Sizes

from cosmic.core.config import ChunkConstraints

config = COSMICConfig(
    chunk_constraints=ChunkConstraints(
        min_tokens=50,
        max_tokens=1024,
        target_tokens=400,
    )
)

Configure LLM

from cosmic.core.config import LLMConfig

# OpenAI
config = COSMICConfig(
    llm=LLMConfig(
        enabled=True,
        provider="openai",
        base_url="https://api.openai.com/v1",
        model_name="gpt-4",
        api_key="sk-...",
    )
)

# Ollama
config = COSMICConfig(
    llm=LLMConfig(
        enabled=True,
        provider="ollama",
        base_url="http://localhost:11434/v1",
        model_name="gemma3:latest",
    )
)

# Disabled
config = COSMICConfig(
    llm=LLMConfig(enabled=False)
)

From YAML File

from pathlib import Path

config = COSMICConfig.from_yaml(Path("configs/production.yaml"))

YAML Format

# configs/custom.yaml

dcs:
  alpha: 0.4
  beta: 0.35
  gamma: 0.25
  threshold: 0.5

chunk_constraints:
  min_tokens: 100
  max_tokens: 2000
  target_tokens: 500

embedding:
  model_name: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
  device: cuda
  batch_size: 64

llm:
  enabled: true
  provider: ollama
  model_name: gemma3:latest

fusion:
  structural_weight: 0.6
  semantic_weight: 0.4
  acceptance_threshold: 0.5

Environment Variables

# .env file
COSMIC_LLM_PROVIDER=ollama
COSMIC_LLM_URL=http://localhost:11434/v1
COSMIC_EMBEDDING_DEVICE=cuda
from cosmic import COSMICConfig

# Loads from environment
config = COSMICConfig.from_env()

Validation

Configuration is validated on creation:

# Raises ValueError: DCS weights must sum to 1.0
config = COSMICConfig(
    dcs=DCSConfig(alpha=0.5, beta=0.5, gamma=0.5)
)

# Raises ValueError: min_tokens must be less than max_tokens
config = COSMICConfig(
    chunk_constraints=ChunkConstraints(min_tokens=1000, max_tokens=500)
)

Merging Configurations

from dataclasses import replace

base_config = COSMICConfig.from_yaml(Path("configs/base.yaml"))

# Override specific settings
custom_config = replace(
    base_config,
    dcs=DCSConfig(alpha=0.5, beta=0.3, gamma=0.2),
)