COSMICConfig¶

Configuration system for COSMIC.

Class Definition¶

@dataclass
class COSMICConfig:
    """
    Main configuration container for COSMIC.

    All configuration options can be set via:
    - Constructor arguments
    - YAML file loading
    - Environment variables
    """

Attributes¶

Attribute	Type	Default	Description
`dcs`	`DCSConfig`	See below	DCS weights
`structure`	`StructureConfig`	See below	Structure analysis
`embedding`	`EmbeddingConfig`	See below	Embedding model
`llm`	`LLMConfig`	See below	LLM verification
`reference`	`ReferenceConfig`	See below	Reference linking
`fusion`	`FusionConfig`	See below	Boundary fusion
`chunk_constraints`	`ChunkConstraints`	See below	Size constraints

Sub-Configurations¶

DCSConfig¶

Discourse Coherence Score weights.

@dataclass
class DCSConfig:
    alpha: float = 0.4      # Topical coherence weight
    beta: float = 0.35      # Coreference density weight
    gamma: float = 0.25     # Discourse marker weight
    threshold: float = 0.5  # Boundary detection threshold

ChunkConstraints¶

Chunk size limits.

@dataclass
class ChunkConstraints:
    min_tokens: int = 100   # Minimum chunk size
    max_tokens: int = 2000  # Maximum chunk size
    target_tokens: int = 500  # Target size

LLMConfig¶

LLM verification settings.

@dataclass
class LLMConfig:
    enabled: bool = True
    provider: str = "auto"       # "openai", "ollama", "auto"
    base_url: str = ""
    model_name: str = ""
    api_key: str = ""
    confidence_threshold: float = 0.8
    timeout_seconds: int = 30

EmbeddingConfig¶

Embedding model settings.

@dataclass
class EmbeddingConfig:
    model_name: str = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
    device: str = "cuda"
    batch_size: int = 64
    cache_size: int = 10000
    normalize: bool = True

FusionConfig¶

Boundary fusion weights.

@dataclass
class FusionConfig:
    structural_weight: float = 0.6
    semantic_weight: float = 0.4
    acceptance_threshold: float = 0.5

Class Methods¶

from_yaml¶

@classmethod
def from_yaml(cls, path: Path) -> COSMICConfig:
    """
    Load configuration from YAML file.

    Args:
        path: Path to YAML configuration file.

    Returns:
        COSMICConfig instance.

    Example:
        config = COSMICConfig.from_yaml(Path("configs/custom.yaml"))
    """

from_env¶

@classmethod
def from_env(cls) -> COSMICConfig:
    """
    Load configuration from environment variables.

    Environment variables:
        COSMIC_LLM_PROVIDER
        COSMIC_LLM_URL
        COSMIC_LLM_MODEL
        COSMIC_LLM_API_KEY
        COSMIC_EMBEDDING_DEVICE
        OLLAMA_HOST
        COSMIC_OLLAMA_MODEL

    Returns:
        COSMICConfig with environment overrides.
    """

Usage Examples¶

Default Configuration¶

from cosmic import COSMICConfig

config = COSMICConfig()  # All defaults

Custom DCS Weights¶

from cosmic import COSMICConfig
from cosmic.core.config import DCSConfig

config = COSMICConfig(
    dcs=DCSConfig(
        alpha=0.5,   # More weight on topical coherence
        beta=0.3,
        gamma=0.2,
        threshold=0.45,  # Lower threshold = more boundaries
    )
)

Custom Chunk Sizes¶

from cosmic.core.config import ChunkConstraints

config = COSMICConfig(
    chunk_constraints=ChunkConstraints(
        min_tokens=50,
        max_tokens=1024,
        target_tokens=400,
    )
)

Configure LLM¶

from cosmic.core.config import LLMConfig

# OpenAI
config = COSMICConfig(
    llm=LLMConfig(
        enabled=True,
        provider="openai",
        base_url="https://api.openai.com/v1",
        model_name="gpt-4",
        api_key="sk-...",
    )
)

# Ollama
config = COSMICConfig(
    llm=LLMConfig(
        enabled=True,
        provider="ollama",
        base_url="http://localhost:11434/v1",
        model_name="gemma3:latest",
    )
)

# Disabled
config = COSMICConfig(
    llm=LLMConfig(enabled=False)
)

From YAML File¶

from pathlib import Path

config = COSMICConfig.from_yaml(Path("configs/production.yaml"))

YAML Format¶

# configs/custom.yaml

dcs:
  alpha: 0.4
  beta: 0.35
  gamma: 0.25
  threshold: 0.5

chunk_constraints:
  min_tokens: 100
  max_tokens: 2000
  target_tokens: 500

embedding:
  model_name: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
  device: cuda
  batch_size: 64

llm:
  enabled: true
  provider: ollama
  model_name: gemma3:latest

fusion:
  structural_weight: 0.6
  semantic_weight: 0.4
  acceptance_threshold: 0.5

Environment Variables¶

# .env file
COSMIC_LLM_PROVIDER=ollama
COSMIC_LLM_URL=http://localhost:11434/v1
COSMIC_EMBEDDING_DEVICE=cuda

from cosmic import COSMICConfig

# Loads from environment
config = COSMICConfig.from_env()

Validation¶

Configuration is validated on creation:

# Raises ValueError: DCS weights must sum to 1.0
config = COSMICConfig(
    dcs=DCSConfig(alpha=0.5, beta=0.5, gamma=0.5)
)

# Raises ValueError: min_tokens must be less than max_tokens
config = COSMICConfig(
    chunk_constraints=ChunkConstraints(min_tokens=1000, max_tokens=500)
)

Merging Configurations¶

from dataclasses import replace

base_config = COSMICConfig.from_yaml(Path("configs/base.yaml"))

# Override specific settings
custom_config = replace(
    base_config,
    dcs=DCSConfig(alpha=0.5, beta=0.3, gamma=0.2),
)