Source code for wdoc.utils.interact

"""
Code related to the prompt (in the sense of "directly ask the user a question")
"""

import json
import time
from pathlib import Path
from textwrap import dedent

from beartype.typing import Any, Tuple
from prompt_toolkit import PromptSession
from prompt_toolkit.completion import Completer, Completion
from prompt_toolkit.formatted_text import FormattedText
from loguru import logger

from wdoc.utils.logger import md_printer
from wdoc.utils.misc import cache_dir
from wdoc.utils.env import is_out_piped


[docs] def get_toolbar_text(settings: dict) -> Any: """Parse settings for display in the prompt toolbar. Args: settings (dict): Dictionary containing the current settings Returns: Any: Formatted text suitable for display in the toolbar """ out = [] keys = sorted(list(settings.keys())) for k in keys: if k == "task": continue v = settings[k] out.append(f"{k.replace('_', ' - ').title()}: {v}") out = ["class:toolbar"] + [" - ".join(out)] return FormattedText([tuple(out)])
[docs] class SettingsCompleter(Completer): def __init__( self, wdocCliSettings, wdocHistoryPrompts, wdocHistoryWords, *args, **kwargs ): super().__init__(*args, **kwargs) self.wdocCliSettings = wdocCliSettings self.wdocHistoryPrompts = wdocHistoryPrompts self.wdocHistoryWords = wdocHistoryWords
[docs] def get_completions(self, document, complete_event): text = document.text_before_cursor if not text.strip(): yield Completion("/debug", start_position=-len(text)) yield Completion("/settings", start_position=-len(text)) yield Completion("/help", start_position=-len(text)) elif text.startswith("/"): if "/debug".startswith(text): yield Completion("/debug", start_position=-len(text)) if "/help".startswith(text): yield Completion("/help", start_position=-len(text)) if "/settings ".startswith(text) or "/settings " in text: settings = sorted(list(self.wdocCliSettings.keys())) for setting in settings: if setting == "task": continue compl = f"/settings {setting}={self.wdocCliSettings[setting]}" if compl.startswith(text): yield Completion(compl, start_position=-len(text)) else: # words autocompletion if " " in text and not text.endswith(" "): last_word = text.split(" ")[-1] word_cnt = 0 for word in self.wdocHistoryWords: if word_cnt >= 3: break if word.lower().startswith(last_word.lower()): yield Completion(word, start_position=-len(last_word)) word_cnt += 1 # entire prompt autocompletion for hist in self.wdocHistoryPrompts: if hist.lower().startswith(text.lower()): yield Completion(hist, start_position=-len(text))
[docs] def show_help() -> None: """Display CLI help information. This function displays the CLI help information by formatting and showing the docstring from the ask_user function. Returns: None """ md_printer(dedent(ask_user.__doc__).strip())
[docs] def ask_user(settings: dict) -> Tuple[str, dict]: """ ## Command line manual * **Available Commands:** * /help or ? * /debug * /settings (syntax: '/settings top_k=5') * **Settings keys and values:** * top_k: int > 0 * multiline: boolean *retriever: a string containing '_' separated retriever from the following list: * 'default' to use regular embedding search * 'knn' to use KNN * 'svm' to use SVM * 'multiquery' to use Hypothetical Document Embedding search * 'parent' to use parent retriever To use several '/settings retriever=knn_svm_default' * relevancy: float, from set [-1:+1] * **Tips:** * Each LLM used has a nickname: use it to adress specific instructions. The nicknames are "Summarizer", "Evaluator", "Answerer" and "Combiner". * In multiline mode, use ctrl+D to send the text (sometimes multiple times). * For more information run 'wdoc --help' * History is saved and shared across all runs * If you use '>>>>' once in the middle of your text, the left part will be used as a query find the documents and the right part will be the question to answer. For example: 'tuberculosis among medical students in the 20th century >>>> what are the statistics about epidemiology of tuberculosis among medical students in the 20th century?'. This is not always useful but in some cases depending on documents and retriever it can be needed to avoid having to set top_k too high. """ assert not is_out_piped, ( "Trying to load the CLI even though we are in a piped script. Crashing." ) md_printer("# wdoc Prompt") # loading history from files prev_questions = [] pp_file = cache_dir / "query_history.json" if pp_file.exists(): pp_list = json.loads(pp_file.read_text()) assert isinstance(pp_list, list), "Invalid cache type" for i, pp in enumerate(pp_list): assert isinstance(pp, dict), "Invalid item in cache" assert "prompt" in pp, "Invalid item in cache" for pp in pp_list: if "timestamp" not in pp: pp["timestamp"] = 0 if "task" not in pp: pp["task"] = "query" if pp not in prev_questions: prev_questions.append(pp) prev_questions = sorted( prev_questions, key=lambda x: x["timestamp"], ) prompts = [x["prompt"] for x in prev_questions if x["task"] == settings["task"]] words = [w for w in " ".join(prompts).split(" ") if len(w) > 2 and w.isalpha()] completer = SettingsCompleter( wdocCliSettings=settings, wdocHistoryPrompts=prompts, wdocHistoryWords=words ) while True: if settings["multiline"]: logger.info("Multiline mode enabled. Use ctrl+D to send.") session = PromptSession( bottom_toolbar=lambda: get_toolbar_text(settings), completer=completer, ) try: user_input = session.prompt( "> ", # completer=autocomplete, vi_mode=True, multiline=settings["multiline"], ) except KeyboardInterrupt: logger.warning("Quitting.") raise SystemExit() except EOFError: if settings["multiline"]: pass else: logger.warning("Quitting.") raise SystemExit() user_input = user_input.strip() # quit if user_input.strip() in ["quit", "Q", "q"]: logger.info("Quitting.") raise SystemExit() elif user_input == "/debug": logger.info("Entering debug mode.") breakpoint() logger.info("Going back to the prompt.") continue elif user_input in ["/help", "?"]: show_help() continue # handle settings if user_input.startswith("/settings "): if "=" not in user_input: logger.warning("Invalid settings syntax: missing '='") show_help() continue input_sett = user_input.split(" ") if not input_sett[0] == "/settings": logger.warning( "Invalid settings syntax: does not start with '/settings '" ) show_help() continue if not len(input_sett) == 2: logger.warning("Invalid settings syntax: too many spaces") show_help() continue input_sett = input_sett[1] input_sett = input_sett.split("=") if not len(input_sett) == 2: logger.warning("Invalid settings syntax: expected one '=' symbol") show_help() continue sett_k, sett_v = input_sett if sett_k not in settings.keys(): logger.warning( "Invalid settings: '{sett_k}' is not a valid setting key" ) show_help() continue if settings[sett_k] == sett_v: logger.warning( "Invalid settings: '{sett_k}' is already has value '{sett_v}'" ) show_help() continue try: if sett_k == "top_k": assert int(sett_v) > 0, f"Can't set top_k to <= 0 ({sett_v})" elif sett_k == "relevancy": assert float(sett_v) >= -1 and float(sett_v) <= 1, ( f"Can't set relevancy to < -1 or > +1 ({sett_v})" ) sett_v = float(sett_v) elif sett_k == "retriever": assert all( retriev in ["default", "multiquery", "knn", "svm", "parent"] for retriev in sett_v.split("_") ), f"Invalid retriever value: {sett_v}" elif sett_k == "multiline": if sett_v.lower() == "true": sett_v = True elif sett_v.lower() == "false": sett_v = False sett_v = bool(sett_v) settings[sett_k] = type(settings[sett_k])(sett_v) except Exception as err: logger.warning( f"Error: can't set '{sett_k}' to '{sett_v}' because it " f"can't keep the type '{type(settings[sett_k])}'\n" f"Error: '{err}'" ) show_help() continue logger.info(f"Set {sett_k}={sett_v}") continue elif "/settings" in user_input: logger.warning("Detected '/settings' but not at the start, retrying.") show_help() continue break md_printer("### Done prompting") # saving new history to file if len([x for x in prev_questions if x["prompt"].strip() == user_input]) == 0: prev_questions.append( { "prompt": user_input, "timestamp": int(time.time()), "task": settings["task"], } ) prev_questions = sorted( prev_questions, key=lambda x: x["timestamp"], ) temp_file = Path(str(pp_file.resolve().absolute()) + ".temp") json.dump(prev_questions, temp_file.open("w"), indent=4) assert temp_file.exists() pp_file.unlink(missing_ok=True) temp_file.rename(pp_file) if not user_input.strip(): logger.exception("Query cannot be empty, retrying") return ask_user(settings) return user_input, settings