fix: port continuous-learning observer fixes

Ports continuous-learning observer signal, storage, remote normalization, and v1 deprecation fixes onto current main.
This commit is contained in:
Affaan Mustafa
2026-05-11 03:35:42 -04:00
committed by GitHub
parent e674a7dbd7
commit 12e1bc424d
17 changed files with 512 additions and 56 deletions

View File

@@ -19,7 +19,9 @@
# 3. git repo root path (fallback, machine-specific)
# 4. "global" (no project context detected)
_CLV2_HOMUNCULUS_DIR="${HOME}/.claude/homunculus"
# shellcheck disable=SC1091
. "$(dirname "${BASH_SOURCE[0]}")/lib/homunculus-dir.sh"
_CLV2_HOMUNCULUS_DIR="$(_ecc_resolve_homunculus_dir)"
_CLV2_PROJECTS_DIR="${_CLV2_HOMUNCULUS_DIR}/projects"
_CLV2_REGISTRY_FILE="${_CLV2_HOMUNCULUS_DIR}/projects.json"
@@ -49,6 +51,30 @@ export CLV2_PYTHON_CMD
CLV2_OBSERVER_PROMPT_PATTERN='Can you confirm|requires permission|Awaiting (user confirmation|confirmation|approval|permission)|confirm I should proceed|once granted access|grant.*access'
export CLV2_OBSERVER_PROMPT_PATTERN
_clv2_normalize_remote_url() {
local url="$1"
[ -z "$url" ] && return 0
local is_network=0
case "$url" in
file://*) is_network=0 ;;
*://*) is_network=1 ;;
*@*:*) is_network=1 ;;
*) is_network=0 ;;
esac
url=$(printf '%s' "$url" | sed -E 's|://[^@]+@|://|')
url=$(printf '%s' "$url" | sed -E 's|^[A-Za-z][A-Za-z0-9+.-]*://||')
url=$(printf '%s' "$url" | sed -E 's|^[^@/:]+@([^:/]+):|\1/|')
url=$(printf '%s' "$url" | sed -E 's|\.git/?$||; s|/+$||')
if [ "$is_network" = "1" ]; then
printf '%s' "$url" | tr '[:upper:]' '[:lower:]'
else
printf '%s' "$url"
fi
}
_clv2_detect_project() {
local project_root=""
local project_name=""
@@ -94,15 +120,20 @@ _clv2_detect_project() {
fi
fi
# Compute hash from the original remote URL (legacy, for backward compatibility)
local legacy_hash_input="${remote_url:-$project_root}"
local raw_remote_url="$remote_url"
# Strip embedded credentials from remote URL (e.g., https://ghp_xxxx@github.com/...)
if [ -n "$remote_url" ]; then
remote_url=$(printf '%s' "$remote_url" | sed -E 's|://[^@]+@|://|')
fi
local hash_input="${remote_url:-$project_root}"
local legacy_hash_input="${remote_url:-$project_root}"
local normalized_remote=""
if [ -n "$remote_url" ]; then
normalized_remote=$(_clv2_normalize_remote_url "$remote_url")
fi
local hash_input="${normalized_remote:-${remote_url:-$project_root}}"
# Prefer Python for consistent SHA256 behavior across shells/platforms.
# Pass the value via env var and encode as UTF-8 inside Python so the hash
# is locale-independent (shells vary between UTF-8 / CP932 / CP1252, which
@@ -122,19 +153,33 @@ print(hashlib.sha256(s.encode("utf-8")).hexdigest()[:12])
echo "fallback")
fi
# Backward compatibility: if credentials were stripped and the hash changed,
# check if a project dir exists under the legacy hash and reuse it
if [ "$legacy_hash_input" != "$hash_input" ] && [ -n "$_CLV2_PYTHON_CMD" ]; then
local legacy_id=""
legacy_id=$(_CLV2_HASH_INPUT="$legacy_hash_input" "$_CLV2_PYTHON_CMD" -c '
# Backward compatibility: migrate a single legacy project directory from
# credential-stripped or raw remote hashes to the normalized remote hash.
if [ -n "$_CLV2_PYTHON_CMD" ] && [ ! -d "${_CLV2_PROJECTS_DIR}/${project_id}" ]; then
local legacy_inputs=()
[ -n "$legacy_hash_input" ] && [ "$legacy_hash_input" != "$hash_input" ] \
&& legacy_inputs+=("$legacy_hash_input")
[ -n "$raw_remote_url" ] && [ "$raw_remote_url" != "$hash_input" ] \
&& [ "$raw_remote_url" != "$legacy_hash_input" ] \
&& legacy_inputs+=("$raw_remote_url")
local legacy_input legacy_id
for legacy_input in "${legacy_inputs[@]}"; do
legacy_id=$(_CLV2_HASH_INPUT="$legacy_input" "$_CLV2_PYTHON_CMD" -c '
import os, hashlib
s = os.environ["_CLV2_HASH_INPUT"]
print(hashlib.sha256(s.encode("utf-8")).hexdigest()[:12])
' 2>/dev/null)
if [ -n "$legacy_id" ] && [ -d "${_CLV2_PROJECTS_DIR}/${legacy_id}" ] && [ ! -d "${_CLV2_PROJECTS_DIR}/${project_id}" ]; then
# Migrate legacy directory to new hash
mv "${_CLV2_PROJECTS_DIR}/${legacy_id}" "${_CLV2_PROJECTS_DIR}/${project_id}" 2>/dev/null || project_id="$legacy_id"
fi
if [ -n "$legacy_id" ] && [ "$legacy_id" != "$project_id" ] \
&& [ -d "${_CLV2_PROJECTS_DIR}/${legacy_id}" ]; then
if mv "${_CLV2_PROJECTS_DIR}/${legacy_id}" "${_CLV2_PROJECTS_DIR}/${project_id}" 2>/dev/null; then
break
else
project_id="$legacy_id"
break
fi
fi
done
fi
# Export results

View File

@@ -38,7 +38,48 @@ except ImportError:
# Configuration
# ─────────────────────────────────────────────
HOMUNCULUS_DIR = Path.home() / ".claude" / "homunculus"
def _resolve_homunculus_dir() -> Path:
override = os.environ.get("CLV2_HOMUNCULUS_DIR")
if override:
if Path(override).is_absolute():
return Path(override)
print(f"[ecc] CLV2_HOMUNCULUS_DIR={override!r} is not absolute; ignoring", file=sys.stderr)
xdg = os.environ.get("XDG_DATA_HOME")
if xdg:
if Path(xdg).is_absolute():
return Path(xdg) / "ecc-homunculus"
print(f"[ecc] XDG_DATA_HOME={xdg!r} is not absolute; ignoring", file=sys.stderr)
return Path.home() / ".local" / "share" / "ecc-homunculus"
def _strip_remote_credentials(remote_url: str) -> str:
return re.sub(r"://[^@]+@", "://", remote_url or "")
def _normalize_remote_url(remote_url: str) -> str:
if not remote_url:
return ""
is_network = (
not remote_url.startswith("file://")
and ("://" in remote_url or re.match(r"^[^@/:]+@[^:/]+:", remote_url) is not None)
)
normalized = _strip_remote_credentials(remote_url)
normalized = re.sub(r"^[A-Za-z][A-Za-z0-9+.-]*://", "", normalized)
normalized = re.sub(r"^[^@/:]+@([^:/]+):", r"\1/", normalized)
normalized = re.sub(r"\.git/?$", "", normalized)
normalized = re.sub(r"/+$", "", normalized)
return normalized.lower() if is_network else normalized
def _project_hash(value: str) -> str:
return hashlib.sha256(value.encode("utf-8")).hexdigest()[:12]
HOMUNCULUS_DIR = _resolve_homunculus_dir()
PROJECTS_DIR = HOMUNCULUS_DIR / "projects"
REGISTRY_FILE = HOMUNCULUS_DIR / "projects.json"
@@ -177,11 +218,35 @@ def detect_project() -> dict:
except (subprocess.TimeoutExpired, FileNotFoundError):
pass
hash_source = remote_url if remote_url else project_root
project_id = hashlib.sha256(hash_source.encode()).hexdigest()[:12]
raw_remote_url = remote_url
if remote_url:
remote_url = _strip_remote_credentials(remote_url)
legacy_hash_source = remote_url if remote_url else project_root
normalized_remote = _normalize_remote_url(remote_url) if remote_url else ""
hash_source = normalized_remote if normalized_remote else legacy_hash_source
project_id = _project_hash(hash_source)
project_dir = PROJECTS_DIR / project_id
if not project_dir.exists():
legacy_sources = []
if legacy_hash_source and legacy_hash_source != hash_source:
legacy_sources.append(legacy_hash_source)
if raw_remote_url and raw_remote_url not in {hash_source, legacy_hash_source}:
legacy_sources.append(raw_remote_url)
for legacy_source in legacy_sources:
legacy_id = _project_hash(legacy_source)
legacy_dir = PROJECTS_DIR / legacy_id
if legacy_id != project_id and legacy_dir.exists():
try:
legacy_dir.rename(project_dir)
except OSError:
project_id = legacy_id
project_dir = legacy_dir
break
# Ensure project directory structure
for d in [
project_dir / "instincts" / "personal",

View File

@@ -0,0 +1,31 @@
#!/usr/bin/env bash
# Shared continuous-learning-v2 data-directory resolver.
#
# Resolution precedence:
# 1. CLV2_HOMUNCULUS_DIR, when absolute
# 2. XDG_DATA_HOME/ecc-homunculus, when XDG_DATA_HOME is absolute
# 3. HOME/.local/share/ecc-homunculus
_ecc_resolve_homunculus_dir() {
if [ -n "${CLV2_HOMUNCULUS_DIR:-}" ]; then
case "$CLV2_HOMUNCULUS_DIR" in
/*) printf '%s\n' "$CLV2_HOMUNCULUS_DIR"; return 0 ;;
*) printf '[ecc] CLV2_HOMUNCULUS_DIR=%s is not absolute; ignoring\n' "$CLV2_HOMUNCULUS_DIR" >&2 ;;
esac
fi
if [ -n "${XDG_DATA_HOME:-}" ]; then
case "$XDG_DATA_HOME" in
/*) printf '%s/ecc-homunculus\n' "$XDG_DATA_HOME"; return 0 ;;
*) printf '[ecc] XDG_DATA_HOME=%s is not absolute; ignoring\n' "$XDG_DATA_HOME" >&2 ;;
esac
fi
case "${HOME:-}" in
/*) printf '%s/.local/share/ecc-homunculus\n' "$HOME" ;;
*)
printf '[ecc] HOME=%s is not absolute; cannot resolve homunculus dir\n' "${HOME:-}" >&2
return 1
;;
esac
}

View File

@@ -0,0 +1,62 @@
#!/usr/bin/env bash
# One-shot migration from the legacy Claude config tree into the
# continuous-learning-v2 data directory.
set -euo pipefail
OLD="${HOME}/.claude/homunculus"
# shellcheck disable=SC1091
. "$(dirname "$0")/lib/homunculus-dir.sh"
NEW="$(_ecc_resolve_homunculus_dir)"
if [ "$NEW" = "$OLD" ]; then
echo "Resolved destination equals source ($OLD); nothing to migrate."
exit 0
fi
if [ ! -d "$OLD" ]; then
echo "Nothing to migrate (no $OLD)."
exit 0
fi
if command -v pgrep >/dev/null 2>&1; then
if pgrep -f "${HOME}.*observer-loop\\.sh" >/dev/null 2>&1; then
echo "Refusing to migrate: observer-loop.sh is running." >&2
echo "Exit all Claude Code sessions, then re-run." >&2
exit 1
fi
else
echo "Warning: pgrep not available; skipping running-observer check." >&2
fi
mkdir -p "$(dirname "$NEW")"
if [ ! -d "$NEW" ]; then
mv "$OLD" "$NEW"
echo "Moved $OLD -> $NEW"
elif [ -z "$(ls -A "$NEW" 2>/dev/null || true)" ]; then
rmdir "$NEW"
mv "$OLD" "$NEW"
echo "Moved $OLD -> $NEW (replaced empty destination)"
else
old_count="$(find "$OLD" -type f 2>/dev/null | wc -l | tr -d ' ')"
new_count="$(find "$NEW" -type f 2>/dev/null | wc -l | tr -d ' ')"
echo "Refusing to migrate: both paths exist with content." >&2
echo " Old: $OLD ($old_count files)" >&2
echo " New: $NEW ($new_count files)" >&2
echo "Resolve manually, then re-run." >&2
exit 1
fi
settings="${HOME}/.claude/settings.json"
if [ -f "$settings" ] && grep -q '"CLV2_CONFIG"' "$settings" 2>/dev/null; then
if grep -q '\.claude/homunculus' "$settings" 2>/dev/null; then
cat >&2 <<WARN
Advisory: ~/.claude/settings.json still sets CLV2_CONFIG under the old path.
Update it to: ${NEW}/config.json
(Not editing settings.json automatically.)
WARN
fi
fi