working caching & better diff prompt

2024-09-22 16:14:36 +03:00 · 2024-07-31 01:11:37 -07:00
parent b86807b5f3
commit cc2aa7d3f8
7 changed files with 163 additions and 103 deletions
--- a/README.md
+++ b/README.md
@@ -129,6 +129,7 @@ You can then visualize your promtps by visiting the frontend on `http://localhos
 - [ ] Serialize lkstrs in the jkson dumps in pyhton the same way as the db serializers them for the frontend (\_\_lstr vs SerialziedLstr) <- these are pydantic models and so we can reuse them
 - [ ] handle failure to serialize.
 - [ ] Unify cattrs deserialziation and serialization its fucked right now.
+- [ ] Fix URL not changing on invocation click

 ## Tests

--- a/ell-studio/src/components/source/CodeHighlighter.js
+++ b/ell-studio/src/components/source/CodeHighlighter.js
@@ -13,6 +13,7 @@ export function CodeHighlighter({
  startingLineNumber = 1,
  customHooks = [], // New prop for custom hooks
  defaultRowPadding = 1, // New parameter for default row padding
+  offset: indentOffset = 35,
 }) {
  const { cleanedCode, hookRanges } = useMemo(() => {
    const hookRanges = customHooks.map(() => []);
@@ -60,8 +61,6 @@ export function CodeHighlighter({
    return { cleanedCode, hookRanges };
  }, [code, customHooks]);

-  console.log("hoohkhookRanges", hookRanges);
-
  const renderer = useCallback(
    ({ rows, stylesheet, useInlineStyles }) => {
      const rowsElements = rows.map((node, i) =>
@@ -75,13 +74,13 @@ export function CodeHighlighter({

      const rowTree = [];
      const activeHooks = customHooks.map(() => null);
-      const offset = 35;
+
      for (var i = 0; i < rowsElements.length; i++) {
        var currentElement = (
          <div
            style={{
-              paddingLeft: `${offset + defaultRowPadding}px`,
-              textIndent: `-${offset}px`,
+              paddingLeft: `${indentOffset + defaultRowPadding}px`,
+              textIndent: `-${indentOffset}px`,
            }}
            key={i}
          >
@@ -117,7 +116,6 @@ export function CodeHighlighter({
            activeHooks[hookIndex] = null;
          }
        }
-        console.log(i, currentElement);

        if (currentElement) {
          rowTree.push(currentElement);
@@ -127,13 +125,7 @@ export function CodeHighlighter({
      customHooks.forEach((hook, hookIndex) => {
        if (activeHooks[hookIndex] !== null) {
          const range = hookRanges[hookIndex][hookRanges[hookIndex].length - 1];
-          console.log(
-            "range",
-            range,
-            i,
-            hookRanges[hookIndex],
-            activeHooks[hookIndex]
-          );
+
          rowTree.push(
            hook.wrapper({
              children: activeHooks[hookIndex],
--- a/ell-studio/src/components/source/LMPSourceView.js
+++ b/ell-studio/src/components/source/LMPSourceView.js
@@ -7,21 +7,31 @@ import '../../styles/SourceCodeView.css';
 import { CodeSection } from './CodeSection';
 import { CodeHighlighter } from './CodeHighlighter';

-const BoundedVariableWrapper = ({ children, selectedInvocation,  content, merged_initial_bound_vars }) => {
+const BoundedVariableWrapper = ({ children, selectedInvocation, content, initial_global_vars, initial_free_vars }) => {
  const var_name = content.split('=')[0].trim();
-  const mergedInvocationBoundVars = useMemo(() => selectedInvocation ? { ...selectedInvocation.global_vars, ...selectedInvocation.free_vars } : merged_initial_bound_vars, [selectedInvocation, merged_initial_bound_vars]);
-  const value = mergedInvocationBoundVars?.[var_name];
-  const formattedValue = `${var_name} = ${JSON.stringify(value)}`;
+  const invocationVars = selectedInvocation ? selectedInvocation.global_vars : initial_global_vars;
+  const invocationFreeVars = selectedInvocation ? selectedInvocation.free_vars : initial_free_vars;
+  const value = invocationVars?.[var_name] || invocationFreeVars?.[var_name];
+
+  const isGlobal = var_name in invocationVars;
+  const isFree = var_name in invocationFreeVars;
+  const formattedValue = `${var_name} = ${JSON.stringify(value).replace(/"<Object of type ([^>]+)>"/g, '<Object of type $1>')}`;
+  
  return (
    <div className="relative rounded border border-gray-500 mt-2 py-2">
-      <span className="absolute -top-2 left-2 bg-gray-800  text-[0.6rem]  text-gray-400">
-        bound global {!selectedInvocation ?  'at definition' : `at invocation ${selectedInvocation.id}`}
+      <span className="absolute -top-2 left-2 bg-gray-800 text-[0.6rem] text-gray-400" style={{'backgroundColor': 'rgb(28, 31, 38)'}}>
+        {isGlobal ? 'mutable globalvar' : isFree ? 'freevar' : 'unknown'} {!selectedInvocation ? 'value at lmp definition' : `value at ${selectedInvocation.id}`}
      </span>
-       <div className='ml-5'>
-       <CodeHighlighter code={formattedValue} showLineNumbers={false} defaultRowPadding='' highlighterStyle={{
-          padding: '0px 0px 0px 20px'
-        }} />
-       </div>
+      <div className='ml-5'>
+        <CodeHighlighter 
+          code={formattedValue} 
+          showLineNumbers={false} 
+          defaultRowPadding='' 
+          highlighterStyle={{
+            padding: '0px 0px 0px 20px'
+          }} 
+        />
+      </div>
    </div>
  );
 };
@@ -31,10 +41,6 @@ const LMPSourceView = ({ lmp, showDependenciesInitial = false, selectedInvocatio
  const { dependencies, source, uses, initial_global_vars, initial_free_vars } = lmp;

  console.log(lmp)
-  const merged_initial_bound_vars = useMemo(() => {
-    return { ...initial_global_vars, ...initial_free_vars };
-  }, [initial_global_vars, initial_free_vars]);
-
  const [showDependencies, setShowDependencies] = useState(showDependenciesInitial);
  const [showSource, setShowSource] = useState(true);

@@ -44,23 +50,30 @@ const LMPSourceView = ({ lmp, showDependenciesInitial = false, selectedInvocatio
  const dependentLMPs = uses.length;

  const boundedVariableHooks = useMemo(() => {
-    const wrapper = ({ children, key, content }) => (  
-      <BoundedVariableWrapper key={key} selectedInvocation={selectedInvocation} content={content} merged_initial_bound_vars={merged_initial_bound_vars} >
+    const mutableBVWrapper = ({ children, key, content }) => (  
+      <BoundedVariableWrapper 
+        key={key} 
+        selectedInvocation={selectedInvocation} 
+        content={content} 
+        initial_global_vars={initial_global_vars}
+        initial_free_vars={initial_free_vars}
+      >
        {children}
      </BoundedVariableWrapper>
-      );
+    );

    return [{
    name: 'boundedVariable',
    startTag: '#<BV>',
    endTag: '#</BV>',
-    wrapper
-    },
+    wrapper: ({children, key, content}) => {
+      return <>{children}</>
+    }},
    {
      name: 'boundedMutableVariable',
      startTag: '#<BmV>',
      endTag: '#</BmV>',
-      wrapper
+      wrapper: mutableBVWrapper
    }
  ];
  }, [selectedInvocation]);
--- a/examples/bv.py
+++ b/examples/bv.py
@@ -6,18 +6,22 @@ from ell.stores.sql import SQLiteStore
 CODE_INSTURCTIONS = """

 Other Instructions:
- You only respond in code with no commentary (except in the and docstrings.) 
- Do not respond in markdown just write code. 
- It is extremely important that you don't start you code with ```python. """
+- You only respond in code without any commentary (except in the docstrings.) 
+- Don't respond in markdown just write code!
+- It is extremely important that you don't start you code with ```python <...> """

-test = ["asd"]*10
+class Tests:
+    pass
+test = Tests()

-def get_lmp():
-    z = 6
-    @ell.lm("gpt-4o", temperature=0.1, max_tokens=6)
+another_serializeable_global = ["asd"]
+
+
+def get_lmp(z = 10):
+    @ell.lm("gpt-4o-mini", temperature=0.1, max_tokens=6)
    def write_a_complete_python_class(user_spec : str):
        return [ell.system(f"""You are an mid-tier python programmer capable of interpreting a user's spec and writing a python class to accomidate their request. You should document all your code, and you best practices.
-        {CODE_INSTURCTIONS} {z} {test[0]}
+        {CODE_INSTURCTIONS} {z} {test} {another_serializeable_global}
        """), ell.user(user_spec)]

    return write_a_complete_python_class
@@ -25,6 +29,8 @@ def get_lmp():
 if __name__ == "__main__":
    ell.config.verbose = True
    ell.set_store(SQLiteStore("sqlite_example"), autocommit=True)
-    test[0] = "modified at execution :O"
-    w = get_lmp()
+    # test[0] = "modified at execution :O"
+    w = get_lmp(z=13)
+    cls_Def = w("A class that represents a bank")
+    another_serializeable_global.append("new value during execution")
    cls_Def = w("A class that represents a bank")
--- a/src/ell/decorators/track.py
+++ b/src/ell/decorators/track.py
@@ -66,6 +66,7 @@ def track(fn: Callable) -> Callable:
            ))

            if len(cached_invocations) > 0:
+                # TODO THis is bad?
                results =  [SerializedLStr(**d).deserialize() for  d in cached_invocations[0]['results']]
                if len(results) == 1:
                    return results[0]
@@ -90,58 +91,15 @@ def track(fn: Callable) -> Callable:


        if not _has_serialized_lmp:
-            if not hasattr(func_to_track, "__ell_hash__")  and config.lazy_versioning:
+            if not hasattr(func_to_track, "__ell_hash__") and config.lazy_versioning:
                fn_closure, _ = ell.util.closure.lexically_closured_source(func_to_track)
-            # Compute commit messages if enabled
-            commit = None
-            lmps = config._store.get_lmps(name=_name)
-            version = 0
-            already_in_store =any(lmp['lmp_id'] == func_to_track.__ell_hash__ for lmp in lmps)
-            if not already_in_store :
-                # Do auto commitng and versioning if previous versions exist.
-                if len(lmps) > 0 :
-                    lmps.sort(key=lambda x: x['created_at'], reverse=True)
-                    latest_lmp = lmps[0]
+            
+            _serialize_lmp(func_to_track, _name, fn_closure, lmp, lm_kwargs)
+            _has_serialized_lmp = True


-                    version = (latest_lmp['version_number']) + 1
-                    if config.autocommit:
-                    # Get the latest lmp
-                    # sort by created at  
-                        from ell.util.differ import write_commit_message_for_diff
-                        commit = str(write_commit_message_for_diff(f"{latest_lmp['dependencies']}\n\n{latest_lmp['source']}", f"{fn_closure[1]}\n\n{fn_closure[0]}")[0])
-
-                config._store.write_lmp(
-                    lmp_id=func_to_track.__ell_hash__,
-                    name=_name,
-                    created_at=datetime.now(),
-                    source=fn_closure[0],
-                    dependencies=fn_closure[1],
-                    commit_message=(commit),
-                    global_vars={k: v for k, v in func_to_track.__ell_closure__[2].items() if ell.util.closure.is_immutable_variable(v)},
-                    free_vars={k: v for k, v in func_to_track.__ell_closure__[3].items() if ell.util.closure.is_immutable_variable(v)},
-                    is_lmp=lmp,
-                    lm_kwargs=(
-                        (lm_kwargs)
-                        if lm_kwargs
-                    else None
-                    ),
-                    version_number=version,
-                    uses=func_to_track.__ell_uses__,
-                )
-                _has_serialized_lmp = True
-
-
-        config._store.write_invocation(id=invocation_id,
-            lmp_id=func_to_track.__ell_hash__,  created_at=datetime.now(),
-            global_vars={k: v for k, v in func_to_track.__ell_closure__[2].items() if ell.util.closure.is_immutable_variable(v)},
-            free_vars={k: v for k, v in func_to_track.__ell_closure__[3].items() if ell.util.closure.is_immutable_variable(v)},
-            latency_ms=latency_ms,
-            prompt_tokens=prompt_tokens,
-            completion_tokens=completion_tokens,
-            input_hash=input_hash,
-            invocation_kwargs=invocation_kwargs,
-            **cleaned_invocation_params, consumes=consumes, result=result)
+        _write_invocation(func_to_track, invocation_id, latency_ms, prompt_tokens, completion_tokens, 
+                         input_hash, invocation_kwargs, cleaned_invocation_params, consumes, result)

        return result

@@ -152,7 +110,78 @@ def track(fn: Callable) -> Callable:

    return wrapper

+def _serialize_lmp(func, name, fn_closure, is_lmp, lm_kwargs):
+    lmps = config._store.get_lmps(name=name)
+    version = 0
+    already_in_store = any(lmp['lmp_id'] == func.__ell_hash__ for lmp in lmps)
+    
+    if not already_in_store:
+        if lmps:
+            latest_lmp = max(lmps, key=lambda x: x['created_at'])
+            version = latest_lmp['version_number'] + 1
+            if config.autocommit:
+                from ell.util.differ import write_commit_message_for_diff
+                commit = str(write_commit_message_for_diff(
+                    f"{latest_lmp['dependencies']}\n\n{latest_lmp['source']}", 
+                    f"{fn_closure[1]}\n\n{fn_closure[0]}")[0])
+        else:
+            commit = None

+        config._store.write_lmp(
+            lmp_id=func.__ell_hash__,
+            name=name,
+            created_at=datetime.now(),
+            source=fn_closure[0],
+            dependencies=fn_closure[1],
+            commit_message=commit,
+            global_vars=get_immutable_vars(func.__ell_closure__[2]),
+            free_vars=get_immutable_vars(func.__ell_closure__[3]),
+            is_lmp=is_lmp,
+            lm_kwargs=lm_kwargs if lm_kwargs else None,
+            version_number=version,
+            uses=func.__ell_uses__,
+        )
+
+def _write_invocation(func, invocation_id, latency_ms, prompt_tokens, completion_tokens, 
+                     input_hash, invocation_kwargs, cleaned_invocation_params, consumes, result):
+    config._store.write_invocation(
+        id=invocation_id,
+        lmp_id=func.__ell_hash__,
+        created_at=datetime.now(),
+        global_vars=get_immutable_vars(func.__ell_closure__[2]),
+        free_vars=get_immutable_vars(func.__ell_closure__[3]),
+        latency_ms=latency_ms,
+        prompt_tokens=prompt_tokens,
+        completion_tokens=completion_tokens,
+        input_hash=input_hash,
+        invocation_kwargs=invocation_kwargs,
+        **cleaned_invocation_params,
+        consumes=consumes,
+        result=result
+    )
+
+# TODO: If you are contributo this is a massive place to optimize jesus christ.
+# Consider using VS-code's prefered method or gdb's prefered method of strifying symbols recursively.
+def get_immutable_vars(vars_dict):
+    converter = cattrs.Converter()
+
+    def handle_complex_types(obj):
+        if isinstance(obj, (int, float, str, bool, type(None))):
+            return obj
+        elif isinstance(obj, (list, tuple)):
+            return [handle_complex_types(item) if not isinstance(item, (int, float, str, bool, type(None))) else item for item in obj]
+        elif isinstance(obj, dict):
+            return {k: handle_complex_types(v) if not isinstance(v, (int, float, str, bool, type(None))) else v for k, v in obj.items()}
+        elif isinstance(obj, (set, frozenset)):
+            return list(sorted(handle_complex_types(item) if not isinstance(item, (int, float, str, bool, type(None))) else item for item in obj))
+        elif isinstance(obj, np.ndarray):
+            return obj.tolist()
+        else:
+            return f"<Object of type {type(obj).__name__}>"
+
+    converter.register_unstructure_hook(object, handle_complex_types)
+    x = converter.unstructure(vars_dict)
+    return x

 def prepare_invocation_params(fn_args, fn_kwargs):
    invocation_params = dict(
@@ -186,5 +215,4 @@ def prepare_invocation_params(fn_args, fn_kwargs):

    cleaned_invocation_params = invocation_converter.unstructure(invocation_params)
    input_hash = hashlib.sha256(json.dumps(cleaned_invocation_params, sort_keys=True).encode('utf-8')).hexdigest()
-    return cleaned_invocation_params, input_hash, consumes
-
+    return cleaned_invocation_params, input_hash, consumes
--- a/src/ell/util/closure.py
+++ b/src/ell/util/closure.py
@@ -191,6 +191,7 @@ def lexical_closure(func: Any, already_closed=None, initial_call=False, recursio

    # These are not global variables these are globals, and other shit is actualy in cluded here
    _globals = collections.OrderedDict(dill.detect.globalvars(func))
+    print(_globals)
    _frees = collections.OrderedDict(dill.detect.freevars(func))

    # If func is a class we actually should check all the methods of the class for globalvars. Malekdiction (MSM) was here.
@@ -239,6 +240,7 @@ def lexical_closure(func: Any, already_closed=None, initial_call=False, recursio

    # Iterate over the global variables
    for var_name, var_value in {**_globals, **_frees}.items():
+        is_free = var_name in _frees
        # If the variable is a function, get its source code
        if isinstance(var_value, (types.FunctionType, type, types.MethodType)):
            if var_name not in FORBIDDEN_NAMES:
@@ -271,12 +273,12 @@ def lexical_closure(func: Any, already_closed=None, initial_call=False, recursio
            imports += [dill.source.getimport(var_value, alias=var_name)]

        else:
-            json_default = lambda x: f"<Object of type ()>"
+            json_default = lambda x: f"<Object of type {type(x).__name__}>"
            if isinstance(var_value, str) and '\n' in var_value:
-                clean_dump = f"'''{var_value}'''"
+                dependencies.append(f"{var_name} = '''{var_value}'''")
            else:
                # if is immutable
-                if is_immutable_variable(var_value):
+                if is_immutable_variable(var_value) and not is_free:
                    dependencies.append(f"#<BV>\n{var_name} = {repr(var_value)}\n#</BV>")
                else:

--- a/src/ell/util/differ.py
+++ b/src/ell/util/differ.py
@@ -1,25 +1,43 @@

 from ell.decorators.lm import lm
+import difflib

+# Todo: update this for single change stuff so that it doesn't summarize small chage but says it specifically.
@lm("gpt-4o-mini", temperature=0.2, exempt_from_tracking=True)
 def write_commit_message_for_diff(old : str, new : str) -> str:
    """You are an expert programmer who's goal is to write commit messages based on diffs. 

-You will be given an old version of a progrma and a new version of a program. 
+You will be given two version of source code. 
 You will be expected to write a commit message that describes the changes between the two versions. Your commit message should be at most one sentence and highly specific to the changes made. Don't just discuss the functions changed but how they were specifically changed.
 Your commit message cannot be more than 10 words so use sentence fragments and be concise.
 The @ell.lm decorator turns a function into a call to a language model: 
    the docstring is the system prompt and the string returned in the user prompt. 
 It is extremely important that if these are change: your commit message must say what specifically changed in the user or system prompt rather than saying they were updated or changed geneircally.
 It is extremely important that you never refer to a @ell.lm docstring as a docstring; it is a system prompt. 
-Respond in the following format: 
-`<commit_message summarizing all changes with specificity>:
+Don't say why a change was done but what specifically changed.
+Consider all changes ot the program including the globals and free variables
+Respond in the following format:
+<commit_message summarizing all changes with specificity>:
 <* bulleted list of all changes>."""
+    clean_program_of_all_bv_tags = lambda program : program.replace("#<BV>", "").replace("#</BV>", "").replace("#<BmV>", "").replace("#</BmV>", "")
+    old_clean = clean_program_of_all_bv_tags(old)
+    new_clean = clean_program_of_all_bv_tags(new)
+
+    diff = difflib.unified_diff(old_clean.splitlines(), new_clean.splitlines(), lineterm='')
+
    return f"""Write a commit message succinctly and specifically describing the changes between these two versions of a program.
 OLD VERISON:
-{old}
+```
+{old_clean}
+```
+
 NEW VERSION:
-{new}
+```
+{new_clean}
+```
+
+DIFF:
+{diff}
 """

 if __name__ == "__main__":