main: improved parameter descriptions

README: added new 'question' command parameters
chat: changed default glob to '*.msg' in all ChatDB functions
2023-10-20 08:57:24 +02:00 · 2023-10-20 08:57:24 +02:00 · 2023-10-20 08:24:58 +02:00
17 changed files with 120 additions and 1089 deletions
@@ -65,7 +65,7 @@ cmm question [-t OTAGS]... [-k ATAGS]... [-x XTAGS]... [-o OUTTAGS]... [-A AI_ID
 * `-O, --overwrite`: Overwrite existing messages when repeating them
 * `-s, --source-text FILE`: Add content of a file to the query
 * `-S, --source-code FILE`: Add source code file content to the chat history
-* `-l, --location {cache,db,all}`: Use given location when building the chat history (default: 'db')
+* `-l, --location {disk,cache,db,all}`: Use given location when building the chat history (default: 'db')
 * `-g, --glob GLOB`: Filter message files using the given glob pattern
 #### Hist
@@ -85,8 +85,6 @@ cmm hist [--print | --convert FORMAT] [-t OTAGS]... [-k ATAGS]... [-x XTAGS]...
 * `-S, --source-code-only`: Only print embedded source code
 * `-A, --answer SUBSTRING`: Filter for answer substring
 * `-Q, --question SUBSTRING`: Filter for question substring
 * `-l, --location {cache,db,all}`: Use given location when building the chat history (default: 'db')
 * `-g, --glob GLOB`: Filter message files using the given glob pattern
 #### Tags
@@ -2,8 +2,7 @@
 Implements the OpenAI client classes and functions.
 """
 import openai
-import tiktoken
+from typing import Optional, Union
 from typing import Optional, Union, Generator
 from ..tags import Tag
 from ..message import Message, Answer
 from ..chat import Chat
@@ -13,52 +12,6 @@ from ..configuration import OpenAIConfig
 ChatType = list[dict[str, str]]
 class OpenAIAnswer:
    def __init__(self,
                 idx: int,
                 streams: dict[int, 'OpenAIAnswer'],
                 response: openai.ChatCompletion,
                 tokens: Tokens,
                 encoding: tiktoken.core.Encoding) -> None:
        self.idx = idx
        self.streams = streams
        self.response = response
        self.position: int = 0
        self.encoding = encoding
        self.data: list[str] = []
        self.finished: bool = False
        self.tokens = tokens
    def stream(self) -> Generator[str, None, None]:
        while True:
            if not self.next():
                continue
            if len(self.data) <= self.position:
                break
            yield self.data[self.position]
            self.position += 1
    def next(self) -> bool:
        if self.finished:
            return True
        try:
            chunk = next(self.response)
        except StopIteration:
            self.finished = True
        if not self.finished:
            found_choice = False
            for choice in chunk['choices']:
                if not choice['finish_reason']:
                    self.streams[choice['index']].data.append(choice['delta']['content'])
                    self.tokens.completion += len(self.encoding.encode(choice['delta']['content']))
                    self.tokens.total = self.tokens.prompt + self.tokens.completion
                if choice['index'] == self.idx:
                    found_choice = True
            if not found_choice:
                return False
        return True
 class OpenAI(AI):
    """
    The OpenAI AI client.
@@ -68,7 +21,7 @@ class OpenAI(AI):
        self.ID = config.ID
        self.name = config.name
        self.config = config
-        openai.api_key = self.config.api_key
+        openai.api_key = config.api_key
    def request(self,
                question: Message,
@@ -80,9 +33,7 @@ class OpenAI(AI):
        chat history. The nr. of requested answers corresponds to the
        nr. of messages in the 'AIResponse'.
        """
-        self.encoding = tiktoken.encoding_for_model(self.config.model)
+        oai_chat = self.openai_chat(chat, self.config.system, question)
        oai_chat, prompt_tokens = self.openai_chat(chat, self.config.system, question)
        tokens: Tokens = Tokens(prompt_tokens, 0, prompt_tokens)
        response = openai.ChatCompletion.create(
            model=self.config.model,
            messages=oai_chat,
@@ -90,24 +41,22 @@ class OpenAI(AI):
            max_tokens=self.config.max_tokens,
            top_p=self.config.top_p,
            n=num_answers,
            stream=True,
            frequency_penalty=self.config.frequency_penalty,
            presence_penalty=self.config.presence_penalty)
-        streams: dict[int, OpenAIAnswer] = {}
+        question.answer = Answer(response['choices'][0]['message']['content'])
        for n in range(num_answers):
            streams[n] = OpenAIAnswer(n, streams, response, tokens, self.encoding)
        question.answer = Answer(streams[0].stream())
        question.tags = set(otags) if otags is not None else None
        question.ai = self.ID
        question.model = self.config.model
        answers: list[Message] = [question]
-        for idx in range(1, num_answers):
+        for choice in response['choices'][1:]:  # type: ignore
            answers.append(Message(question=question.question,
-                                   answer=Answer(streams[idx].stream()),
+                                   answer=Answer(choice['message']['content']),
                                   tags=otags,
                                   ai=self.ID,
                                   model=self.config.model))
-        return AIResponse(answers, tokens)
+        return AIResponse(answers, Tokens(response['usage']['prompt_tokens'],
                                          response['usage']['completion_tokens'],
                                          response['usage']['total_tokens']))
    def models(self) -> list[str]:
        """
@@ -134,26 +83,24 @@ class OpenAI(AI):
            print('\nNot ready: ' + ', '.join(not_ready))
    def openai_chat(self, chat: Chat, system: str,
-                    question: Optional[Message] = None) -> tuple[ChatType, int]:
+                    question: Optional[Message] = None) -> ChatType:
        """
        Create a chat history with system message in OpenAI format.
        Optionally append a new question.
        """
        oai_chat: ChatType = []
        prompt_tokens: int = 0
-        def append(role: str, content: str) -> int:
+        def append(role: str, content: str) -> None:
            oai_chat.append({'role': role, 'content': content.replace("''", "'")})
            return len(self.encoding.encode(', '.join(['role:', oai_chat[-1]['role'], 'content:', oai_chat[-1]['content']])))
-        prompt_tokens += append('system', system)
+        append('system', system)
        for message in chat.messages:
            if message.answer:
-                prompt_tokens += append('user', message.question)
+                append('user', message.question)
-                prompt_tokens += append('assistant', str(message.answer))
+                append('assistant', message.answer)
        if question:
-            prompt_tokens += append('user', question.question)
+            append('user', question.question)
-        return oai_chat, prompt_tokens
+        return oai_chat
    def tokens(self, data: Union[Message, Chat]) -> int:
        raise NotImplementedError
@@ -323,10 +323,7 @@ class ChatDB(Chat):
        * 'glob': if specified, files will be filtered using 'path.glob()'
        * 'mfilter': use with 'Message.from_file()' to filter messages
          when reading them.
        * 'loc': read messages from given location instead of 'db_path'
        """
        if loc == msg_location.MEM:
            raise ChatError(f"Can't build ChatDB from message location '{loc}'")
        messages: list[Message] = []
        if loc in [msg_location.DB, msg_location.DISK, msg_location.ALL]:
            messages.extend(read_dir(db_path, glob, mfilter))
@@ -1,69 +0,0 @@
 """
 Contains shared functions for the various CMM subcommands.
 """
 import argparse
 from pathlib import Path
 from ..message import Message, MessageError, source_code
 def read_text_file(file: Path) -> str:
    with open(file) as r:
        content = r.read().strip()
    return content
 def add_file_as_text(question_parts: list[str], file: str) -> None:
    """
    Add the given file as plain text to the question part list.
    If the file is a Message, add the answer.
    """
    file_path = Path(file)
    content: str
    try:
        message = Message.from_file(file_path)
        if message and message.answer:
            content = message.answer
    except MessageError:
        content = read_text_file(Path(file))
    if len(content) > 0:
        question_parts.append(content)
 def add_file_as_code(question_parts: list[str], file: str) -> None:
    """
    Add all source code from the given file. If no code segments can be extracted,
    the whole content is added as source code segment. If the file is a Message,
    extract the source code from the answer.
    """
    file_path = Path(file)
    content: str
    try:
        message = Message.from_file(file_path)
        if message and message.answer:
            content = message.answer
    except MessageError:
        with open(file) as r:
            content = r.read().strip()
    # extract and add source code
    code_parts = source_code(content, include_delims=True)
    if len(code_parts) > 0:
        question_parts += code_parts
    else:
        question_parts.append(f"```\n{content}\n```")
 def invert_input_tag_args(args: argparse.Namespace) -> None:
    """
    Changes the semantics of the INPUT tags for this command:
    * not tags specified on the CLI -> no tags are selected
    * empty tags specified on the CLI -> all tags are selected
    """
    if args.or_tags is None:
        args.or_tags = set()
    elif len(args.or_tags) == 0:
        args.or_tags = None
    if args.and_tags is None:
        args.and_tags = set()
    elif len(args.and_tags) == 0:
        args.and_tags = None
@@ -1,95 +0,0 @@
 import sys
 import argparse
 from pathlib import Path
 from pydoc import pager
 from ..configuration import Config
 from ..glossary import Glossary
 class GlossaryCmdError(Exception):
    pass
 def print_paged(text: str) -> None:
    pager(text)
 def get_glossary_file_path(name: str, config: Config) -> Path:
    """
    Get the complete filename for a glossary with the given path.
    """
    if not config.glossaries:
        raise GlossaryCmdError("Can't create glossary name without a glossary directory")
    return Path(config.glossaries, name).with_suffix(Glossary.file_suffix).absolute()
 def list_glossaries(args: argparse.Namespace, config: Config) -> None:
    """
    List existing glossaries in the 'glossaries' directory.
    """
    if not config.glossaries:
        raise GlossaryCmdError("Glossaries directory missing in the configuration file")
    glossaries = Path(config.glossaries).glob(f'*{Glossary.file_suffix}')
    for glo in sorted(glossaries):
        print(Glossary.from_file(glo).to_str())
 def print_glossary(args: argparse.Namespace, config: Config) -> None:
    """
    Print an existing glossary.
    """
    # sanity checks
    if args.name is None:
        raise GlossaryCmdError("Missing glossary name")
    if config.glossaries is None and args.file is None:
        raise GlossaryCmdError("Glossaries directory missing in the configuration file")
    # create file path or use the given one
    glo_file = Path(args.file) if args.file else get_glossary_file_path(args.name, config)
    if not glo_file.exists():
        raise GlossaryCmdError(f"Glossary '{glo_file}' does not exist")
    # read glossary
    glo = Glossary.from_file(glo_file)
    print_paged(glo.to_str(with_entries=True))
 def create_glossary(args: argparse.Namespace, config: Config) -> None:
    """
    Create a new glossary and write it either to the glossaries directory
    or the given file.
    """
    # sanity checks
    if args.name is None:
        raise GlossaryCmdError("Missing glossary name")
    if args.source_lang is None:
        raise GlossaryCmdError("Missing source language")
    if args.target_lang is None:
        raise GlossaryCmdError("Missing target language")
    if config.glossaries is None and args.file is None:
        raise GlossaryCmdError("Glossaries directory missing in the configuration file")
    # create file path or use the given one
    glo_file = Path(args.file) if args.file else get_glossary_file_path(args.name, config)
    if glo_file.exists():
        raise GlossaryCmdError(f"Glossary '{glo_file}' already exists")
    glo = Glossary(name=args.name,
                   source_lang=args.source_lang,
                   target_lang=args.target_lang,
                   desc=args.description,
                   file_path=glo_file)
    glo.to_file()
    print(f"Successfully created new glossary '{glo_file}'.")
 def glossary_cmd(args: argparse.Namespace, config: Config) -> None:
    """
    Handler for the 'glossary' command.
    """
    try:
        if args.create:
            create_glossary(args, config)
        elif args.list:
            list_glossaries(args, config)
        elif args.print:
            print_glossary(args, config)
    except GlossaryCmdError as err:
        print(f"Error: {err}")
        sys.exit(1)
@@ -3,10 +3,9 @@ import argparse
 from pathlib import Path
 from itertools import zip_longest
 from copy import deepcopy
 from .common import invert_input_tag_args, add_file_as_code, add_file_as_text
 from ..configuration import Config
 from ..chat import ChatDB, msg_location
-from ..message import Message, MessageFilter, Question
+from ..message import Message, MessageFilter, MessageError, Question, source_code
 from ..ai_factory import create_ai
 from ..ai import AI, AIResponse
@@ -15,6 +14,47 @@ class QuestionCmdError(Exception):
    pass
 def add_file_as_text(question_parts: list[str], file: str) -> None:
    """
    Add the given file as plain text to the question part list.
    If the file is a Message, add the answer.
    """
    file_path = Path(file)
    content: str
    try:
        message = Message.from_file(file_path)
        if message and message.answer:
            content = message.answer
    except MessageError:
        with open(file) as r:
            content = r.read().strip()
    if len(content) > 0:
        question_parts.append(content)
 def add_file_as_code(question_parts: list[str], file: str) -> None:
    """
    Add all source code from the given file. If no code segments can be extracted,
    the whole content is added as source code segment. If the file is a Message,
    extract the source code from the answer.
    """
    file_path = Path(file)
    content: str
    try:
        message = Message.from_file(file_path)
        if message and message.answer:
            content = message.answer
    except MessageError:
        with open(file) as r:
            content = r.read().strip()
    # extract and add source code
    code_parts = source_code(content, include_delims=True)
    if len(code_parts) > 0:
        question_parts += code_parts
    else:
        question_parts.append(f"```\n{content}\n```")
 def create_msg_args(msg: Message, args: argparse.Namespace) -> argparse.Namespace:
    """
    Takes an existing message and CLI arguments, and returns modified args based
@@ -61,7 +101,7 @@ def create_message(chat: ChatDB, args: argparse.Namespace) -> Message:
        if code_file is not None and len(code_file) > 0:
            add_file_as_code(question_parts, code_file)
-    full_question = '\n\n'.join([str(s) for s in question_parts])
+    full_question = '\n\n'.join(question_parts)
    message = Message(question=Question(full_question),
                      tags=args.output_tags,
@@ -89,16 +129,13 @@ def make_request(ai: AI, chat: ChatDB, message: Message, args: argparse.Namespac
                                      args.output_tags)
    # only write the response messages to the cache,
    # don't add them to the internal list
    chat.cache_write(response.messages)
    for idx, msg in enumerate(response.messages):
-        print(f"=== ANSWER {idx+1} ===", flush=True)
+        print(f"=== ANSWER {idx+1} ===")
-        if msg.answer:
+        print(msg.answer)
            for piece in msg.answer:
                print(piece, end='', flush=True)
            print()
    if response.tokens:
        print("===============")
        print(response.tokens)
    chat.cache_write(response.messages)
 def repeat_messages(messages: list[Message], chat: ChatDB, args: argparse.Namespace, config: Config) -> None:
@@ -123,6 +160,22 @@ def repeat_messages(messages: list[Message], chat: ChatDB, args: argparse.Namesp
            make_request(ai, chat, message, msg_args)
 def invert_input_tag_args(args: argparse.Namespace) -> None:
    """
    Changes the semantics of the INPUT tags for this command:
    * not tags specified on the CLI -> no tags are selected
    * empty tags specified on the CLI -> all tags are selected
    """
    if args.or_tags is None:
        args.or_tags = set()
    elif len(args.or_tags) == 0:
        args.or_tags = None
    if args.and_tags is None:
        args.and_tags = set()
    elif len(args.and_tags) == 0:
        args.and_tags = None
 def question_cmd(args: argparse.Namespace, config: Config) -> None:
    """
    Handler for the 'question' command.
@@ -1,105 +0,0 @@
 import argparse
 import mimetypes
 from pathlib import Path
 from .common import invert_input_tag_args, read_text_file
 from ..configuration import Config
 from ..message import MessageFilter, Message, Question
 from ..chat import ChatDB, msg_location
 class TranslationCmdError(Exception):
    pass
 text_separator: str = 'TEXT:'
 def assert_document_type_supported_openai(document_file: Path) -> None:
    doctype = mimetypes.guess_type(document_file)
    if doctype != 'text/plain':
        raise TranslationCmdError("AI 'OpenAI' only supports document type 'text/plain''")
 def translation_prompt_openai(source_lang: str, target_lang: str) -> str:
    """
    Return the prompt for GPT that tells it to do the translation.
    """
    return f"Translate the text below the line {text_separator} from {source_lang} to {target_lang}."
 def create_message_openai(chat: ChatDB, args: argparse.Namespace) -> Message:
    """
    Create a new message from the given arguments and write it to the cache directory.
    Message format
    1. Translation prompt (tells GPT to do a translation)
    2. Glossary (if specified as an argument)
    3. User provided prompt enhancements
    4. Translation separator
    5. User provided text to be translated
    The text to be translated is determined as a follows:
    - if a document is provided in the arguments, translate its content
    - if no document is provided, translate the last text argument
    The other text arguments will be put into the "header" and can be used
    to improve the translation prompt.
    """
    text_args: list[str] = []
    if args.create is not None:
        text_args = args.create
    elif args.ask is not None:
        text_args = args.ask
    else:
        raise TranslationCmdError("No input text found")
    # extract user prompt and user text to be translated
    user_text: str
    user_prompt: str
    if args.input_document is not None:
        assert_document_type_supported_openai(Path(args.input_document))
        user_text = read_text_file(Path(args.input_document))
        user_prompt = '\n\n'.join([str(s) for s in text_args])
    else:
        user_text = text_args[-1]
        user_prompt = '\n\n'.join([str(s) for s in text_args[:-1]])
    # build full question string
    # FIXME: add glossaries if given
    question_text: str = '\n\n'.join([translation_prompt_openai(args.source_lang, args.target_lang),
                                      user_prompt,
                                      text_separator,
                                      user_text])
    # create and write the message
    message = Message(question=Question(question_text),
                      tags=args.output_tags,
                      ai=args.AI,
                      model=args.model)
    # only write the new message to the cache,
    # don't add it to the internal list
    chat.cache_write([message])
    return message
 def translation_cmd(args: argparse.Namespace, config: Config) -> None:
    """
    Handler for the 'translation' command. Creates and executes translation
    requests based on the input and selected AI. Depending on the AI, the
    whole process may be significantly different (e.g. DeepL vs OpenAI).
    """
    invert_input_tag_args(args)
    mfilter = MessageFilter(tags_or=args.or_tags,
                            tags_and=args.and_tags,
                            tags_not=args.exclude_tags)
    chat = ChatDB.from_dir(cache_path=Path(config.cache),
                           db_path=Path(config.db),
                           mfilter=mfilter,
                           glob=args.glob,
                           loc=msg_location(args.location))
    # if it's a new translation, create and store it immediately
    # FIXME: check AI type
    if args.ask or args.create:
        # message = create_message(chat, args)
        create_message_openai(chat, args)
    if args.create:
        return
@@ -118,7 +118,6 @@ class Config:
    # a default configuration
    cache: str = '.'
    db: str = './db/'
    glossaries: str | None = './glossaries/'
    ais: dict[str, AIConfig] = field(default_factory=create_default_ai_configs)
    @classmethod
@@ -136,8 +135,7 @@ class Config:
        return cls(
            cache=str(source['cache']) if 'cache' in source else '.',
            db=str(source['db']),
-            ais=ais,
+            ais=ais
            glossaries=str(source['glossaries']) if 'glossaries' in source else None
        )
    @classmethod
@@ -150,8 +148,6 @@ class Config:
    @classmethod
    def from_file(cls: Type[ConfigInst], path: str) -> ConfigInst:
        if not Path(path).exists():
            raise ConfigError(f"Configuration file '{path}' not found. Use 'cmm config --create' to create one.")
        with open(path, 'r') as f:
            source = yaml.load(f, Loader=yaml.FullLoader)
        return cls.from_dict(source)
@@ -1,165 +0,0 @@
 """
 Module implementing glossaries for translations.
 """
 import yaml
 import tempfile
 import shutil
 import csv
 from pathlib import Path
 from dataclasses import dataclass, field
 from typing import Type, TypeVar, ClassVar
 GlossaryInst = TypeVar('GlossaryInst', bound='Glossary')
 class GlossaryError(Exception):
    pass
 def str_presenter(dumper: yaml.Dumper, data: str) -> yaml.ScalarNode:
    """
    Changes the YAML dump style to multiline syntax for multiline strings.
    """
    if len(data.splitlines()) > 1:
        return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
    return dumper.represent_scalar('tag:yaml.org,2002:str', data)
@dataclass
 class Glossary:
    """
    A glossary consists of the following parameters:
        - Name (freely selectable)
        - Path (full file path, suffix is automatically generated)
        - Source language
        - Target language
        - Description (optional)
        - Entries (pairs of source lang and target lang terms)
        - ID (automatically generated / modified, required by DeepL)
    """
    name: str
    source_lang: str
    target_lang: str
    file_path: Path | None = None
    desc: str | None = None
    entries: dict[str, str] = field(default_factory=lambda: dict())
    ID: str | None = None
    file_suffix: ClassVar[str] = '.glo'
    def __post_init__(self) -> None:
        # FIXME: check for valid languages
        pass
    @classmethod
    def from_file(cls: Type[GlossaryInst], file_path: Path) -> GlossaryInst:
        """
        Create a glossary from the given file.
        """
        if not file_path.exists():
            raise GlossaryError(f"Glossary file '{file_path}' does not exist")
        if file_path.suffix != cls.file_suffix:
            raise GlossaryError(f"File type '{file_path.suffix}' is not supported")
        with open(file_path, "r") as fd:
            try:
                # use BaseLoader so every entry is read as a string
                # - disables automatic conversions
                # - makes it possible to omit quoting for YAML keywords in entries (e. g. 'yes')
                # - also correctly reads quoted entries
                data = yaml.load(fd, Loader=yaml.BaseLoader)
                clean_entries = data['Entries']
                return cls(name=data['Name'],
                           source_lang=data['SourceLang'],
                           target_lang=data['TargetLang'],
                           file_path=file_path,
                           desc=data['Description'],
                           entries=clean_entries,
                           ID=data['ID'] if data['ID'] != 'None' else None)
            except Exception:
                raise GlossaryError(f"'{file_path}' does not contain a valid glossary")
    def to_file(self, file_path: Path | None = None) -> None:
        """
        Write glossary to given file.
        """
        if file_path:
            self.file_path = file_path
        if not self.file_path:
            raise GlossaryError("Got no valid path to write glossary")
        # check / add valid suffix
        if not self.file_path.suffix:
            self.file_path = self.file_path.with_suffix(self.file_suffix)
        elif self.file_path.suffix != self.file_suffix:
            raise GlossaryError(f"File suffix '{self.file_path.suffix}' is not supported")
        # write YAML
        with tempfile.NamedTemporaryFile(dir=self.file_path.parent, prefix=self.file_path.name, mode="w", delete=False) as temp_fd:
            temp_file_path = Path(temp_fd.name)
            data = {'Name': self.name,
                    'Description': str(self.desc),
                    'ID': str(self.ID),
                    'SourceLang': self.source_lang,
                    'TargetLang': self.target_lang,
                    'Entries': self.entries}
            yaml.dump(data, temp_fd, sort_keys=False)
        shutil.move(temp_file_path, self.file_path)
    def export_csv(self, dictionary: dict[str, str], file_path: Path) -> None:
        """
        Export the 'entries' of this glossary to a file in CSV format (compatible with DeepL).
        """
        with open(file_path, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
            for source_entry, target_entry in self.entries.items():
                writer.writerow([source_entry, target_entry])
    def export_tsv(self, entries: dict[str, str], file_path: Path) -> None:
        """
        Export the 'entries' of this glossary to a file in TSV format (compatible with DeepL).
        """
        with open(file_path, 'w', encoding='utf-8') as file:
            for source_entry, target_entry in self.entries.items():
                file.write(f"{source_entry}\t{target_entry}\n")
    def import_csv(self, file_path: Path) -> None:
        """
        Import the entries from the given CSV file to those of the current glossary.
        Existing entries are overwritten.
        """
        try:
            with open(file_path, mode='r', encoding='utf-8') as csvfile:
                reader = csv.reader(csvfile, delimiter=',', quotechar='"')
                self.entries = {rows[0]: rows[1] for rows in reader if len(rows) >= 2}
        except Exception as e:
            raise GlossaryError(f"Error importing CSV: {e}")
    def import_tsv(self, file_path: Path) -> None:
        """
        Import the entries from the given CSV file to those of the current glossary.
        Existing entries are overwritten.
        """
        try:
            with open(file_path, mode='r', encoding='utf-8') as tsvfile:
                self.entries = {}
                for line in tsvfile:
                    parts = line.strip().split('\t')
                    if len(parts) == 2:
                        self.entries[parts[0]] = parts[1]
        except Exception as e:
            raise GlossaryError(f"Error importing TSV: {e}")
    def to_str(self, with_entries: bool = False) -> str:
        """
        Return the current glossary as a string.
        """
        output: list[str] = []
        output.append(f'{self.name} (ID: {self.ID}):')
        if self.desc and self.desc != 'None':
            output.append('- ' + self.desc)
        output.append(f'- Languages: {self.source_lang} -> {self.target_lang}')
        if with_entries:
            output.append('- Entries:')
            for source, target in self.entries.items():
                output.append(f'  {source} : {target}')
        else:
            output.append(f'- Entries: {len(self.entries)}')
        return '\n'.join(output)
@@ -3,20 +3,17 @@
 # vim: set fileencoding=utf-8 :
 import sys
 import os
 import argcomplete
 import argparse
 from pathlib import Path
 from typing import Any
-from .configuration import Config, default_config_file, ConfigError
+from .configuration import Config, default_config_file
 from .message import Message
 from .commands.question import question_cmd
 from .commands.tags import tags_cmd
 from .commands.config import config_cmd
 from .commands.hist import hist_cmd
 from .commands.print import print_cmd
 from .commands.translation import translation_cmd
 from .commands.glossary import glossary_cmd
 from .chat import msg_location
@@ -55,7 +52,7 @@ def create_parser() -> argparse.ArgumentParser:
    ai_parser = argparse.ArgumentParser(add_help=False)
    ai_parser.add_argument('-A', '--AI', help='AI ID to use', metavar='AI_ID')
    ai_parser.add_argument('-M', '--model', help='Model to use', metavar='MODEL')
-    ai_parser.add_argument('-N', '--num-answers', help='Number of answers to request', type=int, default=1)
+    ai_parser.add_argument('-n', '--num-answers', help='Number of answers to request', type=int, default=1)
    ai_parser.add_argument('-m', '--max-tokens', help='Max. nr. of tokens', type=int)
    ai_parser.add_argument('-T', '--temperature', help='Temperature value', type=float)
@@ -70,10 +67,10 @@ def create_parser() -> argparse.ArgumentParser:
    question_group.add_argument('-r', '--repeat', nargs='*', help='Repeat a question', metavar='MESSAGE')
    question_group.add_argument('-p', '--process', nargs='*', help='Process existing questions', metavar='MESSAGE')
    question_cmd_parser.add_argument('-l', '--location',
-                                     choices=[x.value for x in msg_location if x not in [msg_location.MEM, msg_location.DISK]],
+                                     choices=[x.value for x in msg_location],
                                     default='db',
-                                     help='Use given location when building the chat history (default: \'db\')')
+                                     help='Select message location, default is \'db\'')
-    question_cmd_parser.add_argument('-g', '--glob', help='Filter message files using the given glob pattern')
+    question_cmd_parser.add_argument('-g', '--glob', help='Glob for message file names')
    question_cmd_parser.add_argument('-O', '--overwrite', help='Overwrite existing messages when repeating them',
                                     action='store_true')
    question_cmd_parser.add_argument('-s', '--source-text', nargs='+', help='Add content of a file to the query', metavar='FILE')
@@ -97,7 +94,7 @@ def create_parser() -> argparse.ArgumentParser:
    hist_cmd_parser.add_argument('-d', '--tight', help='Print without message separators', action='store_true')
    hist_cmd_parser.add_argument('-P', '--no-paging', help='Print without paging', action='store_true')
    hist_cmd_parser.add_argument('-l', '--location',
-                                 choices=[x.value for x in msg_location if x not in [msg_location.MEM, msg_location.DISK]],
+                                 choices=[x.value for x in msg_location],
                                 default='db',
                                 help='Use given location when building the chat history (default: \'db\')')
    hist_cmd_parser.add_argument('-g', '--glob', help='Filter message files using the given glob pattern')
@@ -105,7 +102,7 @@ def create_parser() -> argparse.ArgumentParser:
    # 'tags' command parser
    tags_cmd_parser = cmdparser.add_parser('tags',
                                           help="Manage tags.",
-                                           aliases=['T'])
+                                           aliases=['t'])
    tags_cmd_parser.set_defaults(func=tags_cmd)
    tags_group = tags_cmd_parser.add_mutually_exclusive_group(required=True)
    tags_group.add_argument('-l', '--list', help="List all tags and their frequency",
@@ -139,80 +136,10 @@ def create_parser() -> argparse.ArgumentParser:
    print_cmd_modes.add_argument('-a', '--answer', help='Only print the answer', action='store_true')
    print_cmd_modes.add_argument('-S', '--only-source-code', help='Only print embedded source code', action='store_true')
    # 'translation' command parser
    translation_cmd_parser = cmdparser.add_parser('translation', parents=[ai_parser, tag_parser],
                                                  help="Ask, create and repeat translations.",
                                                  aliases=['t'])
    translation_cmd_parser.set_defaults(func=translation_cmd)
    translation_group = translation_cmd_parser.add_mutually_exclusive_group(required=True)
    translation_group.add_argument('-a', '--ask', nargs='+', help='Ask to translate the given text', metavar='TEXT')
    translation_group.add_argument('-c', '--create', nargs='+', help='Create a translation', metavar='TEXT')
    translation_group.add_argument('-r', '--repeat', nargs='*', help='Repeat a translation', metavar='MESSAGE')
    translation_cmd_parser.add_argument('-l', '--source-lang', help="Source language", metavar="LANGUAGE", required=True)
    translation_cmd_parser.add_argument('-L', '--target-lang', help="Target language", metavar="LANGUAGE", required=True)
    translation_cmd_parser.add_argument('-G', '--glossaries', nargs='+', help="List of glossary names", metavar="GLOSSARY")
    translation_cmd_parser.add_argument('-d', '--input-document', help="Document to translate", metavar="FILE")
    translation_cmd_parser.add_argument('-D', '--output-document', help="Path for the translated document", metavar="FILE")
    # 'glossary' command parser
    glossary_cmd_parser = cmdparser.add_parser('glossary', parents=[ai_parser],
                                               help="Manage glossaries.",
                                               aliases=['g'])
    glossary_cmd_parser.set_defaults(func=glossary_cmd)
    glossary_group = glossary_cmd_parser.add_mutually_exclusive_group(required=True)
    glossary_group.add_argument('-c', '--create', help='Create a glossary', action='store_true')
    glossary_cmd_parser.add_argument('-n', '--name', help="Glossary name (not ID)", metavar="NAME")
    glossary_cmd_parser.add_argument('-l', '--source-lang', help="Source language", metavar="LANGUAGE")
    glossary_cmd_parser.add_argument('-L', '--target-lang', help="Target language", metavar="LANGUAGE")
    glossary_cmd_parser.add_argument('-f', '--file', help='File path of the goven glossary', metavar='GLOSSARY_FILE')
    glossary_cmd_parser.add_argument('-D', '--description', help="Glossary description", metavar="DESCRIPTION")
    glossary_group.add_argument('-i', '--list', help='List existing glossaries', action='store_true')
    glossary_group.add_argument('-p', '--print', help='Print an existing glossary', action='store_true')
    argcomplete.autocomplete(parser)
    return parser
 def create_directories(config: Config) -> None:  # noqa: 11
    """
    Create the directories in the given configuration if they don't exist.
    """
    def make_dir(path: Path) -> None:
        try:
            os.makedirs(path.absolute())
        except Exception as e:
            print(f"Creating directory '{path.absolute()}' failed with: {e}")
            sys.exit(1)
    # Cache
    cache_path = Path(config.cache)
    if not cache_path.exists():
        answer = input(f"Cache directory '{cache_path}' does not exist. Create it? [y/n]")
        if answer.lower() in ['y', 'yes']:
            make_dir(cache_path.absolute())
        else:
            print("Can't continue without a valid cache directory!")
            sys.exit(1)
    # DB
    db_path = Path(config.db)
    if not db_path.exists():
        answer = input(f"DB directory '{db_path}' does not exist. Create it? [y/n]")
        if answer.lower() in ['y', 'yes']:
            make_dir(db_path.absolute())
        else:
            print("Can't continue without a valid DB directory!")
            sys.exit(1)
    # Glossaries
    if config.glossaries:
        glossaries_path = Path(config.glossaries)
        if not glossaries_path.exists():
            answer = input(f"Glossaries directory '{glossaries_path}' does not exist. Create it? [y/n]")
            if answer.lower() in ['y', 'yes']:
                make_dir(glossaries_path.absolute())
            else:
                print("Can't continue without a valid glossaries directory. Create it or remove it from the configuration.")
                sys.exit(1)
 def main() -> int:
    parser = create_parser()
    args = parser.parse_args()
@@ -221,12 +148,7 @@ def main() -> int:
    if command.func == config_cmd:
        command.func(command)
    else:
-        try:
+        config = Config.from_file(args.config)
            config = Config.from_file(args.config)
        except ConfigError as err:
            print(f"{err}")
            return 1
        create_directories(config)
        command.func(command, config)
    return 0
@@ -5,9 +5,7 @@ import pathlib
 import yaml
 import tempfile
 import shutil
 import io
 from typing import Type, TypeVar, ClassVar, Optional, Any, Union, Final, Literal, Iterable, Tuple
 from typing import Generator, Iterator
 from typing import get_args as typing_get_args
 from dataclasses import dataclass, asdict, field
 from .tags import Tag, TagLine, TagError, match_tags, rename_tags
@@ -51,7 +49,7 @@ def source_code(text: str, include_delims: bool = False) -> list[str]:
    code_lines: list[str] = []
    in_code_block = False
-    for line in str(text).split('\n'):
+    for line in text.split('\n'):
        if line.strip().startswith('```'):
            if include_delims:
                code_lines.append(line)
@@ -144,100 +142,30 @@ class Answer(str):
    txt_header: ClassVar[str] = '==== ANSWER ===='
    yaml_key: ClassVar[str] = 'answer'
-    def __init__(self, data: Union[str, Generator[str, None, None]]) -> None:
+    def __new__(cls: Type[AnswerInst], string: str) -> AnswerInst:
        # Indicator of whether all of data has been processed
        self.is_exhausted: bool = False
        # Initialize data
        self.iterator: Iterator[str] = self._init_data(data)
        # Set up the buffer to hold the 'Answer' content
        self.buffer: io.StringIO = io.StringIO()
    def _init_data(self, data: Union[str, Generator[str, None, None]]) -> Iterator[str]:
        """
-        Process input data (either a string or a string generator)
+        Make sure the answer string does not contain the header as a whole line.
        """
-        if isinstance(data, str):
+        if cls.txt_header in string.split('\n'):
-            yield data
+            raise MessageError(f"Answer '{string}' contains the header '{cls.txt_header}'")
-        else:
+        instance = super().__new__(cls, string)
-            yield from data
+        return instance
    def __str__(self) -> str:
        """
        Output all content when converted into a string
        """
        # Ensure all data has been processed
        for _ in self:
            pass
        # Return the 'Answer' content
        return self.buffer.getvalue()
    def __repr__(self) -> str:
        return repr(str(self))
    def __iter__(self) -> Generator[str, None, None]:
        """
        Allows the object to be iterable
        """
        # Generate content if not all data has been processed
        if not self.is_exhausted:
            yield from self.generator_iter()
        else:
            yield self.buffer.getvalue()
    def generator_iter(self) -> Generator[str, None, None]:
        """
        Main generator method to process data
        """
        for piece in self.iterator:
            # Write to buffer and yield piece for the iterator
            self.buffer.write(piece)
            yield piece
        self.is_exhausted = True  # Set the flag that all data has been processed
        # If the header occurs in the 'Answer' content, raise an error
        if f'\n{self.txt_header}' in self.buffer.getvalue() or self.buffer.getvalue().startswith(self.txt_header):
            raise MessageError(f"Answer {repr(self.buffer.getvalue())} contains the header {repr(Answer.txt_header)}")
    def __eq__(self, other: object) -> bool:
        """
        Comparing the object to a string or another object
        """
        if isinstance(other, str):
            return str(self) == other  # Compare the string value of this object to the other string
        # Default behavior for comparing non-string objects
        return super().__eq__(other)
    def __hash__(self) -> int:
        """
        Generate a hash for the object based on its string representation.
        """
        return hash(str(self))
    def __format__(self, format_spec: str) -> str:
        """
        Return a formatted version of the string as per the format specification.
        """
        return str(self).__format__(format_spec)
    @classmethod
    def from_list(cls: Type[AnswerInst], strings: list[str]) -> AnswerInst:
        """
-        Build Answer from a list of strings. Make sure strings do not contain the header.
+        Build Question from a list of strings. Make sure strings do not contain the header.
        """
-        def _gen() -> Generator[str, None, None]:
+        if cls.txt_header in strings:
-            if len(strings) > 0:
+            raise MessageError(f"Question contains the header '{cls.txt_header}'")
-                yield strings[0]
+        instance = super().__new__(cls, '\n'.join(strings).strip())
-                for s in strings[1:]:
+        return instance
                    yield '\n'
                    yield s
        return cls(_gen())
    def source_code(self, include_delims: bool = False) -> list[str]:
        """
        Extract and return all source code sections.
        """
-        return source_code(str(self), include_delims)
+        return source_code(self, include_delims)
 class Question(str):
@@ -513,7 +441,7 @@ class Message():
        output.append(self.question)
        if self.answer:
            output.append(Answer.txt_header)
-            output.append(str(self.answer))
+            output.append(self.answer)
        return '\n'.join(output)
    def to_file(self, file_path: Optional[pathlib.Path]=None, mformat: MessageFormat = message_default_format) -> None:  # noqa: 11
@@ -563,7 +491,7 @@ class Message():
                temp_fd.write(f'{ModelLine.from_model(self.model)}\n')
            temp_fd.write(f'{Question.txt_header}\n{self.question}\n')
            if self.answer:
-                temp_fd.write(f'{Answer.txt_header}\n{str(self.answer)}\n')
+                temp_fd.write(f'{Answer.txt_header}\n{self.answer}\n')
        shutil.move(temp_file_path, file_path)
    def __to_file_yaml(self, file_path: pathlib.Path) -> None:
@@ -632,7 +560,7 @@ class Message():
           or (mfilter.ai and (not self.ai or mfilter.ai != self.ai))  # noqa: W503
           or (mfilter.model and (not self.model or mfilter.model != self.model))  # noqa: W503
           or (mfilter.question_contains and mfilter.question_contains not in self.question)  # noqa: W503
-           or (mfilter.answer_contains and (not self.answer or mfilter.answer_contains not in str(self.answer)))  # noqa: W503
+           or (mfilter.answer_contains and (not self.answer or mfilter.answer_contains not in self.answer))  # noqa: W503
           or (mfilter.answer_state == 'available' and not self.answer)  # noqa: W503
           or (mfilter.ai_state == 'available' and not self.ai)  # noqa: W503
           or (mfilter.model_state == 'available' and not self.model)  # noqa: W503
@@ -2,4 +2,3 @@ openai
 PyYAML
 argcomplete
 pytest
 tiktoken
@@ -16,37 +16,26 @@ class OpenAITest(unittest.TestCase):
        openai = OpenAI(config)
        # Set up the mock response from openai.ChatCompletion.create
-        mock_chunk1 = {
+        mock_response = {
            'choices': [
                {
-                    'index': 0,
+                    'message': {
                    'delta': {
                        'content': 'Answer 1'
-                    },
+                    }
                    'finish_reason': None
                },
                {
-                    'index': 1,
+                    'message': {
                    'delta': {
                        'content': 'Answer 2'
-                    },
+                    }
                    'finish_reason': None
                }
            ],
            'usage': {
                'prompt_tokens': 10,
                'completion_tokens': 20,
                'total_tokens': 30
            }
        }
-        mock_chunk2 = {
+        mock_create.return_value = mock_response
            'choices': [
                {
                    'index': 0,
                    'finish_reason': 'stop'
                },
                {
                    'index': 1,
                    'finish_reason': 'stop'
                }
            ],
        }
        mock_create.return_value = iter([mock_chunk1, mock_chunk2])
        # Create test data
        question = Message(Question('Question'))
@@ -68,9 +57,9 @@ class OpenAITest(unittest.TestCase):
        self.assertIsNotNone(response.tokens)
        self.assertIsInstance(response.tokens, Tokens)
        assert response.tokens
-        self.assertEqual(response.tokens.prompt, 53)
+        self.assertEqual(response.tokens.prompt, 10)
-        self.assertEqual(response.tokens.completion, 6)
+        self.assertEqual(response.tokens.completion, 20)
-        self.assertEqual(response.tokens.total, 59)
+        self.assertEqual(response.tokens.total, 30)
        # Assert the mock call to openai.ChatCompletion.create
        mock_create.assert_called_once_with(
@@ -87,7 +76,6 @@ class OpenAITest(unittest.TestCase):
            max_tokens=config.max_tokens,
            top_p=config.top_p,
            n=2,
            stream=True,
            frequency_penalty=config.frequency_penalty,
            presence_penalty=config.presence_penalty
        )
@@ -71,13 +71,11 @@ class TestConfig(unittest.TestCase):
                    'frequency_penalty': 0.7,
                    'presence_penalty': 0.2
                }
-            },
+            }
            'glossaries': './glossaries/'
        }
        config = Config.from_dict(source_dict)
        self.assertEqual(config.cache, '.')
        self.assertEqual(config.db, './test_db/')
        self.assertEqual(config.glossaries, './glossaries/')
        self.assertEqual(len(config.ais), 1)
        self.assertEqual(config.ais['myopenai'].name, 'openai')
        self.assertEqual(cast(OpenAIConfig, config.ais['myopenai']).system, 'Custom system')
@@ -107,7 +105,6 @@ class TestConfig(unittest.TestCase):
                    'frequency_penalty': 0.7,
                    'presence_penalty': 0.2
                }
                # omit glossaries, since it's optional
            }
        }
        with open(self.test_file.name, 'w') as f:
@@ -116,8 +113,6 @@ class TestConfig(unittest.TestCase):
        self.assertIsInstance(config, Config)
        self.assertEqual(config.cache, './test_cache/')
        self.assertEqual(config.db, './test_db/')
        # missing 'glossaries' should result in 'None'
        self.assertEqual(config.glossaries, None)
        self.assertEqual(len(config.ais), 1)
        self.assertIsInstance(config.ais['default'], AIConfig)
        self.assertEqual(cast(OpenAIConfig, config.ais['default']).system, 'Custom system')
@@ -1,209 +0,0 @@
 import unittest
 import tempfile
 from pathlib import Path
 from chatmastermind.glossary import Glossary, GlossaryError
 glossary_suffix: str = Glossary.file_suffix
 class TestGlossary(unittest.TestCase):
    def test_from_file_yaml_unquoted(self) -> None:
        """
        Test glossary creatiom from YAML with unquoted entries.
        """
        with tempfile.NamedTemporaryFile('w', delete=False, suffix=glossary_suffix) as yaml_file:
            yaml_file.write("Name: Sample\n"
                            "Description: A brief description\n"
                            "ID: '123'\n"
                            "SourceLang: en\n"
                            "TargetLang: es\n"
                            "Entries:\n"
                            "  hello: hola\n"
                            "  goodbye: adiós\n"
                            # 'yes' is a YAML keyword and would normally be quoted
                            "  yes: sí\n"
                            "  I'm going home: me voy a casa\n")
            yaml_file_path = Path(yaml_file.name)
        # create and check valid glossary
        glossary = Glossary.from_file(yaml_file_path)
        self.assertEqual(glossary.name, "Sample")
        self.assertEqual(glossary.desc, "A brief description")
        self.assertEqual(glossary.ID, "123")
        self.assertEqual(glossary.source_lang, "en")
        self.assertEqual(glossary.target_lang, "es")
        self.assertEqual(glossary.entries, {"hello": "hola",
                                            "goodbye": "adiós",
                                            "yes": "sí",
                                            "I'm going home": "me voy a casa"})
        yaml_file_path.unlink()  # Remove the temporary file
    def test_from_file_yaml_quoted(self) -> None:
        """
        Test glossary creatiom from YAML with quoted entries.
        """
        with tempfile.NamedTemporaryFile('w', delete=False, suffix=glossary_suffix) as yaml_file:
            yaml_file.write("Name: Sample\n"
                            "Description: A brief description\n"
                            "ID: '123'\n"
                            "SourceLang: en\n"
                            "TargetLang: es\n"
                            "Entries:\n"
                            "  'hello': 'hola'\n"
                            "  'goodbye': 'adiós'\n"
                            "  'yes': 'sí'\n"
                            "  \"I'm going home\": 'me voy a casa'\n")
            yaml_file_path = Path(yaml_file.name)
        # create and check valid glossary
        glossary = Glossary.from_file(yaml_file_path)
        self.assertEqual(glossary.name, "Sample")
        self.assertEqual(glossary.desc, "A brief description")
        self.assertEqual(glossary.ID, "123")
        self.assertEqual(glossary.source_lang, "en")
        self.assertEqual(glossary.target_lang, "es")
        self.assertEqual(glossary.entries, {"hello": "hola",
                                            "goodbye": "adiós",
                                            "yes": "sí",
                                            "I'm going home": "me voy a casa"})
        yaml_file_path.unlink()  # Remove the temporary file
    def test_to_file_writes_yaml(self) -> None:
        # Create glossary instance
        glossary = Glossary(name="Test",
                            desc="Test description",
                            ID="666",
                            source_lang="en",
                            target_lang="fr",
                            entries={"yes": "oui"})
        with tempfile.NamedTemporaryFile('w', suffix=glossary_suffix) as tmp_file:
            file_path = Path(tmp_file.name)
            glossary.to_file(file_path)
            # read and check valid YAML
            with open(file_path, 'r') as file:
                content = file.read()
            self.assertIn("Name: Test", content)
            self.assertIn("Description: Test description", content)
            self.assertIn("ID: '666'", content)
            self.assertIn("SourceLang: en", content)
            self.assertIn("TargetLang: fr", content)
            self.assertIn("Entries", content)
            # 'yes' is a YAML keyword and therefore quoted
            self.assertIn("'yes': oui", content)
    def test_write_read_glossary(self) -> None:
        # Create glossary instance
        # -> use 'yes' in order to test if the YAML quoting is correctly removed when reading the file
        glossary_write = Glossary(name="Test", source_lang="en", target_lang="fr", entries={"yes": "oui"})
        with tempfile.NamedTemporaryFile('w', suffix=glossary_suffix) as tmp_file:
            file_path = Path(tmp_file.name)
            glossary_write.to_file(file_path)
            # create new instance from glossary file
            glossary_read = Glossary.from_file(file_path)
            self.assertEqual(glossary_write.name, glossary_read.name)
            self.assertEqual(glossary_write.source_lang, glossary_read.source_lang)
            self.assertEqual(glossary_write.target_lang, glossary_read.target_lang)
            self.assertDictEqual(glossary_write.entries, glossary_read.entries)
    def test_import_export_csv(self) -> None:
        glossary = Glossary(name="Test", source_lang="en", target_lang="fr", entries={})
        # First export to CSV
        with tempfile.NamedTemporaryFile('w', suffix=glossary_suffix) as csvfile:
            csv_file_path = Path(csvfile.name)
            glossary.entries = {"hello": "salut", "goodbye": "au revoir"}
            glossary.export_csv(glossary.entries, csv_file_path)
            # Now import CSV
            glossary.import_csv(csv_file_path)
            self.assertEqual(glossary.entries, {"hello": "salut", "goodbye": "au revoir"})
    def test_import_export_tsv(self) -> None:
        glossary = Glossary(name="Test", source_lang="en", target_lang="fr", entries={})
        # First export to TSV
        with tempfile.NamedTemporaryFile('w', suffix=glossary_suffix) as tsvfile:
            tsv_file_path = Path(tsvfile.name)
            glossary.entries = {"hello": "salut", "goodbye": "au revoir"}
            glossary.export_tsv(glossary.entries, tsv_file_path)
            # Now import TSV
            glossary.import_tsv(tsv_file_path)
            self.assertEqual(glossary.entries, {"hello": "salut", "goodbye": "au revoir"})
    def test_to_file_wrong_suffix(self) -> None:
        """
        Test for exception if suffix is wrong.
        """
        glossary = Glossary(name="Test", source_lang="en", target_lang="fr", entries={"yes": "oui"})
        with tempfile.NamedTemporaryFile('w', suffix='.wrong') as tmp_file:
            file_path = Path(tmp_file.name)
            with self.assertRaises(GlossaryError) as err:
                glossary.to_file(file_path)
                self.assertEqual(str(err.exception), "File suffix '.wrong' is not supported")
    def test_to_file_auto_suffix(self) -> None:
        """
        Test if suffix is auto-generated if omitted.
        """
        glossary = Glossary(name="Test", source_lang="en", target_lang="fr", entries={"yes": "oui"})
        with tempfile.NamedTemporaryFile('w', suffix='') as tmp_file:
            file_path = Path(tmp_file.name)
            glossary.to_file(file_path)
            assert glossary.file_path is not None
            self.assertEqual(glossary.file_path.suffix, glossary_suffix)
            # remove glossary file (differs from 'tmp_file' because of the added suffix
            glossary.file_path.unlink()
    def test_to_str_with_id(self) -> None:
        # Create a Glossary instance with an ID
        glossary_with_id = Glossary(name="TestGlossary", source_lang="en", target_lang="fr",
                                    desc="A simple test glossary", ID="1001", entries={"one": "un"})
        glossary_str = glossary_with_id.to_str()
        self.assertIn("TestGlossary (ID: 1001):", glossary_str)
        self.assertIn("- A simple test glossary", glossary_str)
        self.assertIn("- Languages: en -> fr", glossary_str)
        self.assertIn("- Entries: 1", glossary_str)
    def test_to_str_with_id_and_entries(self) -> None:
        # Create a Glossary instance with an ID and include entries
        glossary_with_entries = Glossary(name="TestGlossaryWithEntries", source_lang="en", target_lang="fr",
                                         desc="Another test glossary", ID="2002",
                                         entries={"hello": "salut", "goodbye": "au revoir"})
        glossary_str_with_entries = glossary_with_entries.to_str(with_entries=True)
        self.assertIn("TestGlossaryWithEntries (ID: 2002):", glossary_str_with_entries)
        self.assertIn("- Entries:", glossary_str_with_entries)
        self.assertIn("  hello : salut", glossary_str_with_entries)
        self.assertIn("  goodbye : au revoir", glossary_str_with_entries)
    def test_to_str_without_id(self) -> None:
        # Create a Glossary instance without an ID
        glossary_without_id = Glossary(name="TestGlossaryNoID", source_lang="en", target_lang="fr",
                                       desc="A test glossary without an ID", ID=None, entries={"yes": "oui"})
        glossary_str_no_id = glossary_without_id.to_str()
        self.assertIn("TestGlossaryNoID (ID: None):", glossary_str_no_id)
        self.assertIn("- A test glossary without an ID", glossary_str_no_id)
        self.assertIn("- Languages: en -> fr", glossary_str_no_id)
        self.assertIn("- Entries: 1", glossary_str_no_id)
    def test_to_str_without_id_and_no_entries(self) -> None:
        # Create a Glossary instance without an ID and no entries
        glossary_no_id_no_entries = Glossary(name="EmptyGlossary", source_lang="en", target_lang="fr",
                                             desc="An empty test glossary", ID=None, entries={})
        glossary_str_no_id_no_entries = glossary_no_id_no_entries.to_str()
        self.assertIn("EmptyGlossary (ID: None):", glossary_str_no_id_no_entries)
        self.assertIn("- An empty test glossary", glossary_str_no_id_no_entries)
        self.assertIn("- Languages: en -> fr", glossary_str_no_id_no_entries)
        self.assertIn("- Entries: 0", glossary_str_no_id_no_entries)
    def test_to_str_no_description(self) -> None:
        # Create a Glossary instance with an ID
        glossary_with_id = Glossary(name="TestGlossary", source_lang="en", target_lang="fr",
                                    ID="1001", entries={"one": "un"})
        glossary_str = glossary_with_id.to_str()
        expected_str = """TestGlossary (ID: 1001):
 - Languages: en -> fr
 - Entries: 1"""
        self.assertEqual(expected_str, glossary_str)
@@ -1,149 +0,0 @@
 import unittest
 import argparse
 import tempfile
 import io
 from contextlib import redirect_stdout
 from chatmastermind.configuration import Config
 from chatmastermind.commands.glossary import (
    Glossary,
    GlossaryCmdError,
    glossary_cmd,
    get_glossary_file_path,
    create_glossary,
    print_glossary,
    list_glossaries
 )
 class TestGlossaryCmdNoGlossaries(unittest.TestCase):
    def setUp(self) -> None:
        # create DB and cache
        self.db_dir = tempfile.TemporaryDirectory()
        self.cache_dir = tempfile.TemporaryDirectory()
        self.glossaries_dir = tempfile.TemporaryDirectory()
        # create configuration
        self.config = Config()
        self.config.cache = self.cache_dir.name
        self.config.db = self.db_dir.name
        self.config.glossaries = self.glossaries_dir.name
        # create a mock argparse.Namespace
        self.args = argparse.Namespace(
            create=True,
            list=False,
            print=False,
            name='new_glossary',
            file=None,
            source_lang='en',
            target_lang='de',
            description=False,
        )
    def test_glossary_create_no_glossaries_err(self) -> None:
        self.config.glossaries = None
        with self.assertRaises(GlossaryCmdError) as err:
            create_glossary(self.args, self.config)
            self.assertIn(str(err.exception).lower(), "glossaries directory missing")
    def test_glossary_create_no_name_err(self) -> None:
        self.args.name = None
        with self.assertRaises(GlossaryCmdError) as err:
            create_glossary(self.args, self.config)
            self.assertIn(str(err.exception).lower(), "missing glossary name")
    def test_glossary_create_no_source_lang_err(self) -> None:
        self.args.source_lang = None
        with self.assertRaises(GlossaryCmdError) as err:
            create_glossary(self.args, self.config)
            self.assertIn(str(err.exception).lower(), "missing source language")
    def test_glossary_create_no_target_lang_err(self) -> None:
        self.args.target_lang = None
        with self.assertRaises(GlossaryCmdError) as err:
            create_glossary(self.args, self.config)
            self.assertIn(str(err.exception).lower(), "missing target language")
    def test_glossary_print_no_name_err(self) -> None:
        self.args.name = None
        with self.assertRaises(GlossaryCmdError) as err:
            print_glossary(self.args, self.config)
            self.assertIn(str(err.exception).lower(), "missing glossary name")
    def test_glossary_list_no_glossaries_err(self) -> None:
        self.config.glossaries = None
        with self.assertRaises(GlossaryCmdError) as err:
            list_glossaries(self.args, self.config)
            self.assertIn(str(err.exception).lower(), "glossaries directory missing")
    def test_glossary_create(self) -> None:
        self.args.create = True
        self.args.list = False
        self.args.print = False
        glossary_cmd(self.args, self.config)
        expected_path = get_glossary_file_path(self.args.name, self.config)
        glo = Glossary.from_file(expected_path)
        self.assertEqual(glo.name, self.args.name)
        expected_path.unlink()
    def test_glossary_create_twice_err(self) -> None:
        self.args.create = True
        self.args.list = False
        self.args.print = False
        glossary_cmd(self.args, self.config)
        expected_path = get_glossary_file_path(self.args.name, self.config)
        glo = Glossary.from_file(expected_path)
        self.assertEqual(glo.name, self.args.name)
        # create glossary with the same name again
        with self.assertRaises(GlossaryCmdError) as err:
            create_glossary(self.args, self.config)
            self.assertIn(str(err.exception).lower(), "already exists")
        expected_path.unlink()
 class TestGlossaryCmdWithGlossaries(unittest.TestCase):
    def setUp(self) -> None:
        # create DB and cache
        self.db_dir = tempfile.TemporaryDirectory()
        self.cache_dir = tempfile.TemporaryDirectory()
        self.glossaries_dir = tempfile.TemporaryDirectory()
        # create configuration
        self.config = Config()
        self.config.cache = self.cache_dir.name
        self.config.db = self.db_dir.name
        self.config.glossaries = self.glossaries_dir.name
        # create a mock argparse.Namespace
        self.args = argparse.Namespace(
            create=True,
            list=False,
            print=False,
            name='Glossary1',
            file=None,
            source_lang='en',
            target_lang='de',
            description=False,
        )
        # create Glossary1
        glossary_cmd(self.args, self.config)
        self.Glossary1_path = get_glossary_file_path('Glossary1', self.config)
        # create Glossary2
        self.args.name = 'Glossary2'
        glossary_cmd(self.args, self.config)
        self.Glossary2_path = get_glossary_file_path('Glossary2', self.config)
    def test_glossaries_exist(self) -> None:
        """
        Test if the default glossaries created in setUp exist.
        """
        glo = Glossary.from_file(self.Glossary1_path)
        self.assertEqual(glo.name, 'Glossary1')
        glo = Glossary.from_file(self.Glossary2_path)
        self.assertEqual(glo.name, 'Glossary2')
    def test_glossaries_list(self) -> None:
        self.args.create = False
        self.args.list = True
        with redirect_stdout(io.StringIO()) as list_output:
            glossary_cmd(self.args, self.config)
            self.assertIn('Glossary1', list_output.getvalue())
            self.assertIn('Glossary2', list_output.getvalue())
@@ -91,7 +91,7 @@ class QuestionTestCase(unittest.TestCase):
 class AnswerTestCase(unittest.TestCase):
    def test_answer_with_header(self) -> None:
        with self.assertRaises(MessageError):
-            str(Answer(f"{Answer.txt_header}\nno"))
+            Answer(f"{Answer.txt_header}\nno")
    def test_answer_with_legal_header(self) -> None:
        answer = Answer(f"This is a line contaning '{Answer.txt_header}'\nIt is what it is.")
Author	SHA1	Message	Date
juk0de	d2a3ab0adb	main: improved parameter descriptions	2023-10-20 08:57:24 +02:00
juk0de	fbc48d20b2	README: added new 'question' command parameters	2023-10-20 08:57:24 +02:00
juk0de	480987774d	chat: changed default glob to '*.msg' in all ChatDB functions	2023-10-20 08:24:58 +02:00