translation: added check for valid document format when using OpenAI

translation: speficied / implemented the question format for OpenAI based translations
translation: some small required refactoring
2023-11-08 20:11:15 +01:00 · 2023-11-08 19:38:37 +01:00 · 2023-11-08 18:20:22 +01:00 · 2023-11-07 17:35:44 +01:00
6 changed files with 242 additions and 92 deletions
@@ -47,12 +47,12 @@ class OpenAIAnswer:
            self.finished = True
        if not self.finished:
            found_choice = False
-            for choice in chunk.choices:
+            for choice in chunk['choices']:
-                if not choice.finish_reason:
+                if not choice['finish_reason']:
-                    self.streams[choice.index].data.append(choice.delta.content)
+                    self.streams[choice['index']].data.append(choice['delta']['content'])
-                    self.tokens.completion += len(self.encoding.encode(choice.delta.content))
+                    self.tokens.completion += len(self.encoding.encode(choice['delta']['content']))
                    self.tokens.total = self.tokens.prompt + self.tokens.completion
-                if choice.index == self.idx:
+                if choice['index'] == self.idx:
                    found_choice = True
            if not found_choice:
                return False
@@ -68,10 +68,6 @@ class OpenAI(AI):
        self.ID = config.ID
        self.name = config.name
        self.config = config
        self.client = openai.OpenAI(api_key=self.config.api_key)
    def _completions(self, *args, **kw):  # type: ignore
        return self.client.chat.completions.create(*args, **kw)
    def request(self,
                question: Message,
@@ -84,9 +80,10 @@ class OpenAI(AI):
        nr. of messages in the 'AIResponse'.
        """
        self.encoding = tiktoken.encoding_for_model(self.config.model)
        openai.api_key = self.config.api_key
        oai_chat, prompt_tokens = self.openai_chat(chat, self.config.system, question)
        tokens: Tokens = Tokens(prompt_tokens, 0, prompt_tokens)
-        response = self._completions(
+        response = openai.ChatCompletion.create(
            model=self.config.model,
            messages=oai_chat,
            temperature=self.config.temperature,
@@ -117,8 +114,9 @@ class OpenAI(AI):
        Return all models supported by this AI.
        """
        ret = []
-        for engine in sorted(self.client.models.list().data, key=lambda x: x.id):
+        for engine in sorted(openai.Engine.list()['data'], key=lambda x: x['id']):
-            ret.append(engine.id)
+            if engine['ready']:
                ret.append(engine['id'])
        ret.sort()
        return ret
@@ -126,8 +124,14 @@ class OpenAI(AI):
        """
        Print all models supported by the current AI.
        """
-        for model in self.models():
+        not_ready = []
-            print(model)
+        for engine in sorted(openai.Engine.list()['data'], key=lambda x: x['id']):
            if engine['ready']:
                print(engine['id'])
            else:
                not_ready.append(engine['id'])
        if len(not_ready) > 0:
            print('\nNot ready: ' + ', '.join(not_ready))
    def openai_chat(self, chat: Chat, system: str,
                    question: Optional[Message] = None) -> tuple[ChatType, int]:
@@ -0,0 +1,69 @@
 """
 Contains shared functions for the various CMM subcommands.
 """
 import argparse
 from pathlib import Path
 from ..message import Message, MessageError, source_code
 def read_text_file(file: Path) -> str:
    with open(file) as r:
        content = r.read().strip()
    return content
 def add_file_as_text(question_parts: list[str], file: str) -> None:
    """
    Add the given file as plain text to the question part list.
    If the file is a Message, add the answer.
    """
    file_path = Path(file)
    content: str
    try:
        message = Message.from_file(file_path)
        if message and message.answer:
            content = message.answer
    except MessageError:
        content = read_text_file(Path(file))
    if len(content) > 0:
        question_parts.append(content)
 def add_file_as_code(question_parts: list[str], file: str) -> None:
    """
    Add all source code from the given file. If no code segments can be extracted,
    the whole content is added as source code segment. If the file is a Message,
    extract the source code from the answer.
    """
    file_path = Path(file)
    content: str
    try:
        message = Message.from_file(file_path)
        if message and message.answer:
            content = message.answer
    except MessageError:
        with open(file) as r:
            content = r.read().strip()
    # extract and add source code
    code_parts = source_code(content, include_delims=True)
    if len(code_parts) > 0:
        question_parts += code_parts
    else:
        question_parts.append(f"```\n{content}\n```")
 def invert_input_tag_args(args: argparse.Namespace) -> None:
    """
    Changes the semantics of the INPUT tags for this command:
    * not tags specified on the CLI -> no tags are selected
    * empty tags specified on the CLI -> all tags are selected
    """
    if args.or_tags is None:
        args.or_tags = set()
    elif len(args.or_tags) == 0:
        args.or_tags = None
    if args.and_tags is None:
        args.and_tags = set()
    elif len(args.and_tags) == 0:
        args.and_tags = None
@@ -3,9 +3,10 @@ import argparse
 from pathlib import Path
 from itertools import zip_longest
 from copy import deepcopy
 from .common import invert_input_tag_args, add_file_as_code, add_file_as_text
 from ..configuration import Config
 from ..chat import ChatDB, msg_location
-from ..message import Message, MessageFilter, MessageError, Question, source_code
+from ..message import Message, MessageFilter, Question
 from ..ai_factory import create_ai
 from ..ai import AI, AIResponse
@@ -14,47 +15,6 @@ class QuestionCmdError(Exception):
    pass
 def add_file_as_text(question_parts: list[str], file: str) -> None:
    """
    Add the given file as plain text to the question part list.
    If the file is a Message, add the answer.
    """
    file_path = Path(file)
    content: str
    try:
        message = Message.from_file(file_path)
        if message and message.answer:
            content = message.answer
    except MessageError:
        with open(file) as r:
            content = r.read().strip()
    if len(content) > 0:
        question_parts.append(content)
 def add_file_as_code(question_parts: list[str], file: str) -> None:
    """
    Add all source code from the given file. If no code segments can be extracted,
    the whole content is added as source code segment. If the file is a Message,
    extract the source code from the answer.
    """
    file_path = Path(file)
    content: str
    try:
        message = Message.from_file(file_path)
        if message and message.answer:
            content = message.answer
    except MessageError:
        with open(file) as r:
            content = r.read().strip()
    # extract and add source code
    code_parts = source_code(content, include_delims=True)
    if len(code_parts) > 0:
        question_parts += code_parts
    else:
        question_parts.append(f"```\n{content}\n```")
 def create_msg_args(msg: Message, args: argparse.Namespace) -> argparse.Namespace:
    """
    Takes an existing message and CLI arguments, and returns modified args based
@@ -163,22 +123,6 @@ def repeat_messages(messages: list[Message], chat: ChatDB, args: argparse.Namesp
            make_request(ai, chat, message, msg_args)
 def invert_input_tag_args(args: argparse.Namespace) -> None:
    """
    Changes the semantics of the INPUT tags for this command:
    * not tags specified on the CLI -> no tags are selected
    * empty tags specified on the CLI -> all tags are selected
    """
    if args.or_tags is None:
        args.or_tags = set()
    elif len(args.or_tags) == 0:
        args.or_tags = None
    if args.and_tags is None:
        args.and_tags = set()
    elif len(args.and_tags) == 0:
        args.and_tags = None
 def question_cmd(args: argparse.Namespace, config: Config) -> None:
    """
    Handler for the 'question' command.
@@ -0,0 +1,105 @@
 import argparse
 import mimetypes
 from pathlib import Path
 from .common import invert_input_tag_args, read_text_file
 from ..configuration import Config
 from ..message import MessageFilter, Message, Question
 from ..chat import ChatDB, msg_location
 class TranslationCmdError(Exception):
    pass
 text_separator: str = 'TEXT:'
 def assert_document_type_supported_openai(document_file: Path) -> None:
    doctype = mimetypes.guess_type(document_file)
    if doctype != 'text/plain':
        raise TranslationCmdError("AI 'OpenAI' only supports document type 'text/plain''")
 def translation_prompt_openai(source_lang: str, target_lang: str) -> str:
    """
    Return the prompt for GPT that tells it to do the translation.
    """
    return f"Translate the text below the line {text_separator} from {source_lang} to {target_lang}."
 def create_message_openai(chat: ChatDB, args: argparse.Namespace) -> Message:
    """
    Create a new message from the given arguments and write it to the cache directory.
    Message format
    1. Translation prompt (tells GPT to do a translation)
    2. Glossary (if specified as an argument)
    3. User provided prompt enhancements
    4. Translation separator
    5. User provided text to be translated
    The text to be translated is determined as a follows:
    - if a document is provided in the arguments, translate its content
    - if no document is provided, translate the last text argument
    The other text arguments will be put into the "header" and can be used
    to improve the translation prompt.
    """
    text_args: list[str] = []
    if args.create is not None:
        text_args = args.create
    elif args.ask is not None:
        text_args = args.ask
    else:
        raise TranslationCmdError("No input text found")
    # extract user prompt and user text to be translated
    user_text: str
    user_prompt: str
    if args.input_document is not None:
        assert_document_type_supported_openai(Path(args.input_document))
        user_text = read_text_file(Path(args.input_document))
        user_prompt = '\n\n'.join([str(s) for s in text_args])
    else:
        user_text = text_args[-1]
        user_prompt = '\n\n'.join([str(s) for s in text_args[:-1]])
    # build full question string
    # FIXME: add glossaries if given
    question_text: str = '\n\n'.join([translation_prompt_openai(args.source_lang, args.target_lang),
                                      user_prompt,
                                      text_separator,
                                      user_text])
    # create and write the message
    message = Message(question=Question(question_text),
                      tags=args.output_tags,
                      ai=args.AI,
                      model=args.model)
    # only write the new message to the cache,
    # don't add it to the internal list
    chat.cache_write([message])
    return message
 def translation_cmd(args: argparse.Namespace, config: Config) -> None:
    """
    Handler for the 'translation' command. Creates and executes translation
    requests based on the input and selected AI. Depending on the AI, the
    whole process may be significantly different (e.g. DeepL vs OpenAI).
    """
    invert_input_tag_args(args)
    mfilter = MessageFilter(tags_or=args.or_tags,
                            tags_and=args.and_tags,
                            tags_not=args.exclude_tags)
    chat = ChatDB.from_dir(cache_path=Path(config.cache),
                           db_path=Path(config.db),
                           mfilter=mfilter,
                           glob=args.glob,
                           loc=msg_location(args.location))
    # if it's a new translation, create and store it immediately
    # FIXME: check AI type
    if args.ask or args.create:
        # message = create_message(chat, args)
        create_message_openai(chat, args)
    if args.create:
        return
@@ -14,6 +14,7 @@ from .commands.tags import tags_cmd
 from .commands.config import config_cmd
 from .commands.hist import hist_cmd
 from .commands.print import print_cmd
 from .commands.translation import translation_cmd
 from .chat import msg_location
@@ -102,7 +103,7 @@ def create_parser() -> argparse.ArgumentParser:
    # 'tags' command parser
    tags_cmd_parser = cmdparser.add_parser('tags',
                                           help="Manage tags.",
-                                           aliases=['t'])
+                                           aliases=['T'])
    tags_cmd_parser.set_defaults(func=tags_cmd)
    tags_group = tags_cmd_parser.add_mutually_exclusive_group(required=True)
    tags_group.add_argument('-l', '--list', help="List all tags and their frequency",
@@ -136,6 +137,21 @@ def create_parser() -> argparse.ArgumentParser:
    print_cmd_modes.add_argument('-a', '--answer', help='Only print the answer', action='store_true')
    print_cmd_modes.add_argument('-S', '--only-source-code', help='Only print embedded source code', action='store_true')
    # 'translation' command parser
    translation_cmd_parser = cmdparser.add_parser('translation', parents=[ai_parser, tag_parser],
                                                  help="ask, create and repeat translations.",
                                                  aliases=['t'])
    translation_cmd_parser.set_defaults(func=translation_cmd)
    translation_group = translation_cmd_parser.add_mutually_exclusive_group(required=True)
    translation_group.add_argument('-a', '--ask', nargs='+', help='Ask to translate the given text', metavar='TEXT')
    translation_group.add_argument('-c', '--create', nargs='+', help='Create a translation', metavar='TEXT')
    translation_group.add_argument('-r', '--repeat', nargs='*', help='Repeat a translation', metavar='MESSAGE')
    translation_cmd_parser.add_argument('-S', '--source-lang', help="Source language", metavar="LANGUAGE", required=True)
    translation_cmd_parser.add_argument('-T', '--target-lang', help="Target language", metavar="LANGUAGE", required=True)
    translation_cmd_parser.add_argument('-G', '--glossaries', nargs='+', help="List of glossaries", metavar="GLOSSARY")
    translation_cmd_parser.add_argument('-d', '--input-document', help="Document to translate", metavar="FILE")
    translation_cmd_parser.add_argument('-D', '--output-document', help="Path for the translated document", metavar="FILE")
    argcomplete.autocomplete(parser)
    return parser
@@ -9,31 +9,43 @@ from chatmastermind.configuration import OpenAIConfig
 class OpenAITest(unittest.TestCase):
-    @mock.patch('chatmastermind.ais.openai.OpenAI._completions')
+    @mock.patch('openai.ChatCompletion.create')
    def test_request(self, mock_create: mock.MagicMock) -> None:
        # Create a test instance of OpenAI
        config = OpenAIConfig()
        openai = OpenAI(config)
        # Set up the mock response from openai.ChatCompletion.create
-        class mock_obj:
+        mock_chunk1 = {
-            pass
+            'choices': [
-        mock_chunk1 = mock_obj()
+                {
-        mock_chunk1.choices = [mock_obj(), mock_obj()]  # type: ignore
+                    'index': 0,
-        mock_chunk1.choices[0].index = 0  # type: ignore
+                    'delta': {
-        mock_chunk1.choices[0].delta = mock_obj()  # type: ignore
+                        'content': 'Answer 1'
-        mock_chunk1.choices[0].delta.content = 'Answer 1'  # type: ignore
+                    },
-        mock_chunk1.choices[0].finish_reason = None  # type: ignore
+                    'finish_reason': None
-        mock_chunk1.choices[1].index = 1  # type: ignore
+                },
-        mock_chunk1.choices[1].delta = mock_obj()  # type: ignore
+                {
-        mock_chunk1.choices[1].delta.content = 'Answer 2'  # type: ignore
+                    'index': 1,
-        mock_chunk1.choices[1].finish_reason = None  # type: ignore
+                    'delta': {
-        mock_chunk2 = mock_obj()
+                        'content': 'Answer 2'
-        mock_chunk2.choices = [mock_obj(), mock_obj()]  # type: ignore
+                    },
-        mock_chunk2.choices[0].index = 0  # type: ignore
+                    'finish_reason': None
-        mock_chunk2.choices[0].finish_reason = 'stop'  # type: ignore
+                }
-        mock_chunk2.choices[1].index = 1  # type: ignore
+            ],
-        mock_chunk2.choices[1].finish_reason = 'stop'  # type: ignore
+        }
        mock_chunk2 = {
            'choices': [
                {
                    'index': 0,
                    'finish_reason': 'stop'
                },
                {
                    'index': 1,
                    'finish_reason': 'stop'
                }
            ],
        }
        mock_create.return_value = iter([mock_chunk1, mock_chunk2])
        # Create test data
Author	SHA1	Message	Date
juk0de	6672e5ee3a	translation: added check for valid document format when using OpenAI	2023-11-08 20:11:15 +01:00
juk0de	75314cd777	translation: speficied / implemented the question format for OpenAI based translations	2023-11-08 19:38:37 +01:00
juk0de	44f7ebe365	translation: some small required refactoring	2023-11-08 18:20:22 +01:00
juk0de	7b8ee56230	added new command 'translation'	2023-11-07 17:35:44 +01:00