Source code for wdoc.utils.env

"""
Sets the default value for environment variables, parse the actual values,
wdoc.
Also set some variables useful to access globally like is_linux for example.
"""

import platform
from textwrap import indent, dedent
from pathlib import Path
import os
import sys
from dataclasses import MISSING, dataclass, asdict, field

from loguru import logger

from beartype import BeartypeConf, beartype
from beartype.door import is_bearable
from beartype.typing import Literal, Optional, Union

try:
    from wdoc.utils.errors import FrozenAttributeCantBeSet
except ImportError:  # for debugging purposes
    from errors import FrozenAttributeCantBeSet

# must create it because we can't import it from typechecker.py
warn_typecheck = beartype(conf=BeartypeConf(violation_type=UserWarning))

is_linux = platform.system() == "Linux"

# if wdoc is executed in a pytest scenario, some things related to piping must be modified
_pytest_check = os.environ.get("PYTEST_IS_TESTING_WDOC", "false")
pytest_ongoing = True if _pytest_check == "true" else False

# useful to know if we should use tqdm or not (it can cause broken pipe errors
# otherwise) and modify the formatting output.ArithmeticError
is_input_piped = not sys.stdin.isatty()
# Also useful to modify the loglevel
is_out_piped = not sys.stdout.isatty() if not pytest_ongoing else False


[docs] @dataclass class EnvDataclass: """ This dataclass holds the env variables used by wdoc. It is frozen when env.py is done. This allows modification of env values to dynamically affect wdoc without having to restart the python execution or reimporting wdoc. """ # stores the name of env variable that starts by WDOC_ but are not expected by EnvDataclass, to warn user only once __warned_unexpected__: list = field(default_factory=list) __frozen__: bool = False WDOC_DUMMY_ENV_VAR: bool = False # used to test the __frozen__ mechanism WDOC_DEBUG: bool = False WDOC_VERBOSE: bool = False WDOC_TYPECHECKING: Literal["disabled", "warn", "crash"] = "warn" WDOC_NO_MODELNAME_MATCHING: bool = True WDOC_ALLOW_NO_PRICE: bool = False WDOC_OPEN_ANKI: bool = False WDOC_STRICT_DOCDICT: Union[bool, Literal["strip"]] = False WDOC_MAX_LOADER_TIMEOUT: int = -1 WDOC_MAX_PDF_LOADER_TIMEOUT: int = -1 # disabled as it can make the parsing slower WDOC_PRIVATE_MODE: bool = False WDOC_DEBUGGER: bool = False WDOC_EXPIRE_CACHE_DAYS: int = 0 WDOC_EMPTY_LOADER: bool = False WDOC_BEHAVIOR_EXCL_INCL_USELESS: Literal["warn", "crash"] = "warn" # by default use lazy loading if using --help argument WDOC_IMPORT_TYPE: Literal["native", "lazy", "thread", "both"] = ( "native" if " --help" not in " ".join(sys.argv) else "lazy" ) WDOC_LOADER_LAZY_LOADING: bool = True WDOC_MOD_FAISS_SCORE_FN: bool = True WDOC_FAISS_COMPRESSION: bool = True WDOC_FAISS_BINARY: bool = False WDOC_LLM_MAX_CONCURRENCY: int = 1 WDOC_LLM_REQUEST_TIMEOUT: int = 600 WDOC_SEMANTIC_BATCH_MAX_TOKEN_SIZE: int = 2000 WDOC_MAX_CHUNK_SIZE: int = 16_000 WDOC_MAX_EMBED_CONTEXT: int = 7_000 WDOC_INTERMEDIATE_ANSWER_MAX_TOKENS: int = 4000 WDOC_DEFAULT_MODEL: str = "openrouter/deepseek/deepseek-v4-pro" WDOC_DEFAULT_EMBED_MODEL: str = "openai/text-embedding-3-small" WDOC_DEFAULT_EMBED_DIMENSION: Optional[int] = None WDOC_EMBED_TESTING: bool = True WDOC_DISABLE_EMBEDDINGS_CACHE: bool = False WDOC_DEFAULT_QUERY_EVAL_MODEL: str = "openrouter/deepseek/deepseek-v4-flash" WDOC_LANGFUSE_PUBLIC_KEY: Optional[str] = None WDOC_LANGFUSE_SECRET_KEY: Optional[str] = None WDOC_LANGFUSE_HOST: Optional[str] = None WDOC_LITELLM_TAGS: Optional[str] = None WDOC_LITELLM_USER: str = "wdoc_llm" WDOC_APPLY_ASYNCIO_PATCH: bool = False WDOC_CONTINUE_ON_INVALID_EVAL: bool = True WDOC_WHISPER_PARALLEL_SPLITS: bool = True WDOC_WHISPER_ENDPOINT: Optional[str] = "" WDOC_WHISPER_API_KEY: Optional[str] = "" WDOC_WHISPER_MODEL: str = "whisper-1" WDOC_IN_DOCKER: bool = False @warn_typecheck def __parse__(self, val: str) -> Optional[Union[bool, int, str]]: """ Parse a string value from environment variables into appropriate Python types. This method converts string values to their corresponding Python types: - "true" (case-insensitive) → True (boolean) - "false" (case-insensitive) → False (boolean) - String of digits → int - "none" (case-insensitive) or empty string → None - Any other string remains a string Args: val: The string value to parse Returns: The parsed value as bool, int, None, or string """ if val.lower() == "true": return True elif val.lower() == "false": return False elif val.isdigit(): return int(val) elif val.lower() == "none" or val == "": return None else: return val def __check_unexpected_vars__(self) -> None: """ Look for env variables that start by WDOC_ but are not defined in EnvDataclass. This would indicate an error in env handling. The message is only printed once per var. """ for k in os.environ.keys(): if not k.lower().startswith("wdoc_"): continue if ( k.upper() not in [key.upper() for key in env.__dataclass_fields__.keys()] and k.upper() not in self.__warned_unexpected__ ): self.__warned_unexpected__.append(k.upper()) logger.debug( f"Unexpected key env variable starting by 'wdoc_': {k}. This might be a typo in your configuration!" ) def __setattr__(self, name, value): """ Controls attribute assignment for the EnvDataclass. This method enforces the frozen state of the class once it's been frozen: - Prevents attempts to unfreeze the instance - Allows normal attribute setting only when not frozen - Raises an exception when trying to set attributes on a frozen instance Args: name: The attribute name being set value: The value to assign to the attribute Raises: Exception: If attempting to unfreeze a frozen instance FrozenAttributeCantBeSet: If attempting to set any attribute on a frozen instance """ # dont allow unfreezing if name == "__frozen__" and self.__frozen__ is True and value is False: raise Exception("Cannot unfreeze the frozen EnvDataclass instance") # allow setting variable values only until frozen if self.__frozen__ is not True: return super().__setattr__(name, value) raise FrozenAttributeCantBeSet(name, value) def __getattribute__(self, name): """ Controls attribute access for the EnvDataclass. This method implements a dynamic environment variable synchronization system: - For special attributes, returns them directly - For normal attributes in a non-frozen state, returns the current value - For attributes in a frozen state, checks the environment variables for runtime changes - Enforces type safety for all values - Has special handling for attributes containing 'private' for security Args: name: The attribute name being accessed Returns: The attribute value, possibly updated from environment variables Raises: Exception: If there's an error getting the attribute from the class AttributeError: If trying to access a security-sensitive attribute that has changed AssertionError: If a value doesn't conform to its expected type """ # non WDOC env can be gotten right away if name in [ "__dataclass_fields__", "__frozen__", "__parse__", "__warned_unexpected__", "__check_unexpected_vars__", "__doc__", "__class__", ]: return super().__getattribute__(name) elif name.startswith("__") and name.ends_with("__"): logger.debug(f"Unexpected attribute of EnvDataclass was accessed: '{name}'") return super().__getattribute__(name) self.__check_unexpected_vars__() # get the current value stored in the dataclass try: cur_val = super().__getattribute__(name) except Exception as e: raise Exception( f"Error when getting attribute {name} from EnvClass: {e}" ) from e # check that the stored value is of appropriate type assert is_bearable(cur_val, self.__dataclass_fields__[name].type), cur_val if self.__frozen__ is not True: return cur_val # get the value from the env env_val = os.environ.get(name, MISSING) # if missing from env, if the value is not the default that means # it has been deleted so we should return the default value. But if # it's the default we can return it safely. Special case if the attribute # contains 'private', don't allow modifying it at runtime out of paranoia. if env_val is MISSING: default = self.__dataclass_fields__[name].default if cur_val == default: return default if "private" in name.lower(): raise AttributeError( f"Error when accessible env variable '{name}': its env variable counterpart is missing but its name contains 'private' so out of an abundance of caution we crash." ) else: logger.warning( f"Env variable '{name}' is missing but the stored value ('{cur_val}' ) is different than the default ('{default}'). We are then setting the wdoc attribute to its default value." ) os.environ[name] = default return default env_val = self.__parse__(env_val) # if unchanged, we can return it if cur_val == env_val: return cur_val # the env variable has changed logger.warning( f"Env variable '{name}' changed between initialization and now: env value is '{env_val}' and already loaded variable is '{cur_val}'. Returning the env value" ) if "private" in name.lower(): raise AttributeError( f"Quitting out of an abundance of caution: env vaiable '{name}' contains 'private' in its name so it's to important to allow changing it at runtime." ) # check that it has the appropriate type assert is_bearable(env_val, self.__dataclass_fields__[name].type), env_val # if we were no using the freezing mechanism we could store it like # that but let's not # super().__setattr__(name, env_val) return env_val
# sanity check for the default values of the dataclass itself for k, v in EnvDataclass.__dataclass_fields__.items(): if k.startswith("WDOC_"): assert is_bearable(v.default, v.type), v # add the actual documentation of each env var to the __doc__ of EnvDataclass help_content = (Path(__file__).parent / Path("../docs/help.md")).read_text() env_list = [ e for e in dir(EnvDataclass) if e.startswith("WDOC_") and e != "WDOC_DUMMY_ENV_VAR" ] # check that it's properly documented to begin with for e in env_list: if f"* `{e}`" not in help_content: logger.error( f"The env variable '{e}' seems to be missing from the help.md page" ) help_sections = help_content.split("# Environment variables") assert len(help_sections) == 2 doc = EnvDataclass.__doc__ indentation = len(doc.splitlines(keepends=True)[0].rstrip()) - len( doc.splitlines(keepends=True)[0].strip() ) EnvDataclass.__doc__ = dedent(EnvDataclass.__doc__) EnvDataclass.__doc__ += ( f"\n\n## Documentation of each environment variables:\n{help_sections[1]}" ) EnvDataclass.__doc__ = indent(EnvDataclass.__doc__, indentation * " ") env = EnvDataclass() # check that the freezing works as expected try: env.WDOC_DUMMY_ENV_VAR = False except FrozenAttributeCantBeSet as e: raise Exception( f"Something is wrong with the freezing of EnvDataclass: '{e}'" ) from e if " --help" in " ".join(sys.argv): # just notify the user logger.debug("--help so using lazy loading by default") # if --debug -d --verbose or -v are in the command line: we set WDOC_DEBUG and WDOC_VERBOSE accordingly
[docs] def check_kwargs(arg: str, abbrv: str = None) -> bool: # match argv tokens exactly so values containing e.g. " debug" don't trigger tokens = sys.argv[1:] long_forms = {arg, f"--{arg}"} if any(t in long_forms or t.startswith(f"--{arg}=") for t in tokens): return True if abbrv: short_forms = {f"-{abbrv}", f"--{abbrv}"} if any(t in short_forms or t.startswith(f"-{abbrv}=") for t in tokens): return True return False
if check_kwargs("debug", "d"): logger.debug("Found 'debug' arg, setting WDOC_DEBUG and WDOC_VERBOSE to true") os.environ["WDOC_DEBUG"] = "true" os.environ["WDOC_VERBOSE"] = "true" elif check_kwargs("verbose", "v"): logger.debug("Found 'verbose' arg, setting WDOC_VERBOSE to true") os.environ["WDOC_VERBOSE"] = "true" # store the env variable instead of the default values but check their types for k in os.environ.keys(): if not k.lower().startswith("wdoc_"): continue v = env.__parse__(os.environ[k]) if k.upper() not in [k.upper() for k in env.__dataclass_fields__.keys()]: if k.upper() not in env.__warned_unexpected__: env.__warned_unexpected__.append(k.upper()) logger.debug( f"Unexpected key env variable starting by 'wdoc_': {k}. This might be a typo in your configuration!" ) else: assert is_bearable(v, env.__dataclass_fields__[k].type), ( f"Unexpected type of env variable '{k}': '{type(v)}' but expected '{env.__dataclass_fields__[k].type}'" ) v_stored = getattr(env, k) setattr(env, k, v) env.__frozen__ = True try: env.WDOC_DUMMY_ENV_VAR = False raise Exception("Something is wrong with the freezing of EnvDataclass") except FrozenAttributeCantBeSet: pass # sanity check for the stored values for k, v in asdict(env).items(): # if not k.startswith("WDOC_"): # continue assert is_bearable(v, env.__dataclass_fields__[k].type), v # Check for incompatible WDOC_DEBUGGER and WDOC_IN_DOCKER settings if env.WDOC_DEBUGGER and env.WDOC_IN_DOCKER: logger.warning( "Both WDOC_DEBUGGER and WDOC_IN_DOCKER are set to true. " "The debugger (pdb) is not compatible with Docker containers. " "WDOC_IN_DOCKER takes priority - the debugger will be disabled and errors will be logged instead." ) # If langfuse env variables are set AND WDOC_LANGFUSE_PUBLIC_KEY etc are set: we replace langfuse's env variable to make sure any underlyng lib use wdoc's instead for k in [ "LANGFUSE_PUBLIC_KEY", "LANGFUSE_SECRET_KEY", "LANGFUSE_HOST", ]: newk = "WDOC_" + k if newk in os.environ and os.environ[newk]: os.environ[k] = os.environ[newk] # even though that check is done at runtime we also check it at startup if env.WDOC_FAISS_BINARY: assert not env.WDOC_MOD_FAISS_SCORE_FN, ( "You can't use the env variable WDOC_MOD_FAISS_SCORE_FN=true and WDOC_FAISS_BINARY=true at the same time." ) assert env.WDOC_FAISS_COMPRESSION, ( "You can't use the env variable WDOC_FAISS_BINARY=true and WDOC_FAISS_COMPRESSION=false at the same time." )