Synthesizing Interactions

Module for generating multi-turn dialogues between a student and tutor agent using large language models.

This class wraps backend-specific model interfaces and orchestrates the simulation of conversations between two agents. It supports customizable educational modes and sampling behavior and ensures reproducibility via global seeding. Outputs are returned as structured pandas DataFrames.

Attributes:

Name	Type	Description
`backend`	`str`	Backend to use for inference. Options are "hf" (Hugging Face) or "mlx" (MLX).
`model_id`	`str`	The identifier of the model to use, e.g., "gpt2" (Hugging Face) or "Qwen2.5-7B-Instruct-1M-4bit" (MLX).
`sampling_params`	`Optional[dict]`	Sampling hyperparameters such as temperature, top_p, or top_k.

Methods:

Name	Description
`simulate_dialogue`	Simulates a dialogue and returns it as a pandas DataFrame.

Source code in src/educhateval/core.py

class DialogueSimulator:
    """
    Module for generating multi-turn dialogues between a student and tutor agent using large language models.

    This class wraps backend-specific model interfaces and orchestrates the simulation of conversations between two agents.
    It supports customizable educational modes and sampling behavior and ensures reproducibility via global seeding. Outputs are returned as structured pandas DataFrames.

    Attributes:
        backend (str): Backend to use for inference. Options are "hf" (Hugging Face) or "mlx" (MLX).
        model_id (str): The identifier of the model to use, e.g., "gpt2" (Hugging Face) or "Qwen2.5-7B-Instruct-1M-4bit" (MLX).
        sampling_params (Optional[dict]): Sampling hyperparameters such as temperature, top_p, or top_k.

    Methods:
        simulate_dialogue(...): Simulates a dialogue and returns it as a pandas DataFrame.
    """

    def __init__(
        self,
        backend: str = "mlx",
        model_id: str = "mlx-community/Qwen2.5-7B-Instruct-1M-4bit",
        sampling_params: Optional[dict] = None,
    ):
        if backend == "hf":
            self.model = ChatHF(
                model_id=model_id,
                sampling_params=sampling_params
                or {"temperature": 0.9, "top_p": 0.9, "top_k": 50},
            )
        elif backend == "mlx":
            self.model = ChatMLX(
                model_id=model_id,
                sampling_params=sampling_params
                or {"temp": 0.9, "top_p": 0.9, "top_k": 40},
            )
        else:
            raise ValueError("Unsupported backend")

        self.model.load()

    def simulate_dialogue(
        self,
        mode: str = "general_task_solving",
        turns: int = 5,
        seed_message_input: str = "Hi, I'm a student seeking assistance with my studies.",
        log_dir: Optional[Path] = None,
        save_csv_path: Optional[Path] = None,
        seed: int = 42,
        custom_prompt_file: Optional[Path] = None,
        system_prompts: Optional[dict] = None,
    ) -> pd.DataFrame:
        """
        Simulates a multi-turn dialogue using either built-in or custom prompts.

        Args:
            mode: Mode key to select prompt pair (student/tutor).
            turns: Number of back-and-forth turns to simulate.
            seed_message_input: First message from the student.
            log_dir: Directory to save raw log (optional).
            save_csv_path: Path to save structured DataFrame (optional).
            seed: Random seed for reproducibility.
            custom_prompt_file: Optional path to custom YAML defining prompt modes.
            system_prompts: Optional dictionary of custom dict of prompt modes.

        Returns:
            pd.DataFrame: Structured DataFrame of the conversation.
        """
        set_seed(seed)

        # Validate input source
        if system_prompts is not None and custom_prompt_file is not None:
            raise ValueError("Provide only one of `system_prompts` or `custom_prompt_file`, not both.")

        # Load prompts from file if needed
        if system_prompts is None:
            if custom_prompt_file:
                import yaml
                try:
                    with open(custom_prompt_file, "r") as f:
                        custom_prompts = yaml.safe_load(f)
                    print(f" Loaded custom prompts from: {custom_prompt_file}")
                except Exception as e:
                    raise ValueError(f"Failed to load YAML from {custom_prompt_file}: {e}")

                if "conversation_types" not in custom_prompts:
                    raise ValueError(f"Missing 'conversation_types' in custom prompt file: {custom_prompt_file}")

                if mode not in custom_prompts["conversation_types"]:
                    raise ValueError(f"Mode '{mode}' not found in custom prompt file: {custom_prompt_file}")

                system_prompts = custom_prompts["conversation_types"][mode]

            else:
                # Use built-in fallback
                print("Using default hardcoded prompts.")
                system_prompts = {
                    "student": "You are a student asking for help with a task.",
                    "tutor": "You are a helpful tutor guiding the student step by step.",
                }

        # Simulate conversation
        df = simulate_conversation(
            model=self.model,
            turns=turns,
            seed_message_input=seed_message_input,
            log_dir=log_dir,
            save_csv_path=save_csv_path,
            system_prompts=system_prompts,
            custom_prompt_file=None,  # already used, no need to pass again
            mode=mode,
        )

        print("\nFull dialogue stored in DataFrame. Use the returned object or view as `df`.")
        return df

`simulate_dialogue(mode='general_task_solving', turns=5, seed_message_input="Hi, I'm a student seeking assistance with my studies.", log_dir=None, save_csv_path=None, seed=42, custom_prompt_file=None, system_prompts=None)`

Simulates a multi-turn dialogue using either built-in or custom prompts.

Parameters:

Name	Type	Description	Default
`mode`	`str`	Mode key to select prompt pair (student/tutor).	`'general_task_solving'`
`turns`	`int`	Number of back-and-forth turns to simulate.	`5`
`seed_message_input`	`str`	First message from the student.	`"Hi, I'm a student seeking assistance with my studies."`
`log_dir`	`Optional[Path]`	Directory to save raw log (optional).	`None`
`save_csv_path`	`Optional[Path]`	Path to save structured DataFrame (optional).	`None`
`seed`	`int`	Random seed for reproducibility.	`42`
`custom_prompt_file`	`Optional[Path]`	Optional path to custom YAML defining prompt modes.	`None`
`system_prompts`	`Optional[dict]`	Optional dictionary of custom dict of prompt modes.	`None`

Returns:

Type	Description
`DataFrame`	pd.DataFrame: Structured DataFrame of the conversation.

Source code in src/educhateval/core.py

def simulate_dialogue(
    self,
    mode: str = "general_task_solving",
    turns: int = 5,
    seed_message_input: str = "Hi, I'm a student seeking assistance with my studies.",
    log_dir: Optional[Path] = None,
    save_csv_path: Optional[Path] = None,
    seed: int = 42,
    custom_prompt_file: Optional[Path] = None,
    system_prompts: Optional[dict] = None,
) -> pd.DataFrame:
    """
    Simulates a multi-turn dialogue using either built-in or custom prompts.

    Args:
        mode: Mode key to select prompt pair (student/tutor).
        turns: Number of back-and-forth turns to simulate.
        seed_message_input: First message from the student.
        log_dir: Directory to save raw log (optional).
        save_csv_path: Path to save structured DataFrame (optional).
        seed: Random seed for reproducibility.
        custom_prompt_file: Optional path to custom YAML defining prompt modes.
        system_prompts: Optional dictionary of custom dict of prompt modes.

    Returns:
        pd.DataFrame: Structured DataFrame of the conversation.
    """
    set_seed(seed)

    # Validate input source
    if system_prompts is not None and custom_prompt_file is not None:
        raise ValueError("Provide only one of `system_prompts` or `custom_prompt_file`, not both.")

    # Load prompts from file if needed
    if system_prompts is None:
        if custom_prompt_file:
            import yaml
            try:
                with open(custom_prompt_file, "r") as f:
                    custom_prompts = yaml.safe_load(f)
                print(f" Loaded custom prompts from: {custom_prompt_file}")
            except Exception as e:
                raise ValueError(f"Failed to load YAML from {custom_prompt_file}: {e}")

            if "conversation_types" not in custom_prompts:
                raise ValueError(f"Missing 'conversation_types' in custom prompt file: {custom_prompt_file}")

            if mode not in custom_prompts["conversation_types"]:
                raise ValueError(f"Mode '{mode}' not found in custom prompt file: {custom_prompt_file}")

            system_prompts = custom_prompts["conversation_types"][mode]

        else:
            # Use built-in fallback
            print("Using default hardcoded prompts.")
            system_prompts = {
                "student": "You are a student asking for help with a task.",
                "tutor": "You are a helpful tutor guiding the student step by step.",
            }

    # Simulate conversation
    df = simulate_conversation(
        model=self.model,
        turns=turns,
        seed_message_input=seed_message_input,
        log_dir=log_dir,
        save_csv_path=save_csv_path,
        system_prompts=system_prompts,
        custom_prompt_file=None,  # already used, no need to pass again
        mode=mode,
    )

    print("\nFull dialogue stored in DataFrame. Use the returned object or view as `df`.")
    return df