safe_commit.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315
  1. """
  2. Safe git-add helpers for Trellis-owned paths.
  3. Why this module exists
  4. ----------------------
  5. A real user incident: a project's `.gitignore` listed `.trellis/` (company-wide
  6. template / personal habit). When `add_session.py` and `task.py archive` ran
  7. their auto-commit and `git add` failed with `ignored by .gitignore`, the AI
  8. agent driving the workflow "fixed" it by retrying with
  9. `git add -f .trellis/` — which fan-out-included every ignored subtree
  10. (`.trellis/.backup-*/`, `.trellis/worktrees/`, `.trellis/.template-hashes.json`,
  11. `.trellis/.runtime/`), committing 548 files / 83474 lines of caches/backups.
  12. Design
  13. ------
  14. - Scripts only stage SPECIFIC product paths (journal files, index.md, the
  15. current task dir, the archive dir). Never the whole `.trellis/` tree.
  16. - If plain `git add <specific>` fails with "ignored by", DO NOT retry with
  17. ``-f``. The presence of `.trellis/` in `.gitignore` is treated as user
  18. intent ("keep .trellis/ local-only"). The script warns and skips the
  19. auto-commit; users who want auto-staging can either fix their `.gitignore`
  20. or set ``session_auto_commit: false`` and manage git themselves.
  21. - The warning includes a negative example: ``Do NOT use `git add -f .trellis/` ...``
  22. so any AI rereading the log doesn't reinvent the bug.
  23. History note: 0.5.10 introduced an automatic ``git add -f`` retry on the
  24. specific paths. That was reverted in 0.5.11 — auto-forcing into a tree the
  25. user had gitignored violates user intent even when the path list is narrow.
  26. The wider-grain forbidden command stays forbidden, and the narrow-grain auto
  27. ``-f`` is gone too.
  28. """
  29. from __future__ import annotations
  30. import sys
  31. from pathlib import Path
  32. from .git import run_git
  33. from .paths import (
  34. DIR_ARCHIVE,
  35. DIR_TASKS,
  36. DIR_WORKFLOW,
  37. DIR_WORKSPACE,
  38. FILE_JOURNAL_PREFIX,
  39. get_developer,
  40. )
  41. # Paths under .trellis/ that must NEVER be auto-staged. Listed here so the
  42. # warning to the user can show concrete subpaths to ignore individually
  43. # instead of ignoring the whole `.trellis/` tree.
  44. TRELLIS_IGNORED_SUBPATHS = (
  45. ".trellis/.backup-*",
  46. ".trellis/worktrees/",
  47. ".trellis/.template-hashes.json",
  48. ".trellis/.runtime/",
  49. ".trellis/.cache/",
  50. )
  51. def safe_trellis_paths_to_add(
  52. repo_root: Path,
  53. task_name: str | None = None,
  54. ) -> list[str]:
  55. """Return the list of repo-relative paths the auto-commit should stage.
  56. Only includes paths that exist on disk so callers don't pass non-existent
  57. arguments to git. The caller is responsible for `git diff --cached`
  58. checking afterwards.
  59. Included:
  60. - .trellis/workspace/<developer>/journal-*.md
  61. - .trellis/workspace/<developer>/index.md
  62. - .trellis/tasks/<task_name>/ (ONLY the current task dir when
  63. ``task_name`` is passed; plus its archive location if the task
  64. already lives under archive/)
  65. Excluded (intentionally — these must not be staged):
  66. - .trellis/.backup-*, .trellis/worktrees/,
  67. .trellis/.template-hashes.json, .trellis/.runtime/, .trellis/.cache/
  68. Scope contract (see #303 / break-loop analysis): when ``task_name`` is
  69. passed, the task segment stages ONLY that task directory — it never walks
  70. ``tasks_dir.iterdir()`` over all active tasks. This mirrors
  71. :func:`safe_archive_paths_to_add` and prevents dirty changes in OTHER
  72. parallel-window task dirs from being bundled into the session auto-commit.
  73. Backwards-compat: with no ``task_name``, the function walks every active
  74. task directory (+ the archive subtree) the old wide way. New callers
  75. should always pass ``task_name``.
  76. """
  77. paths: list[str] = []
  78. # Workspace journal files + index.md
  79. developer = get_developer(repo_root)
  80. if developer:
  81. ws = repo_root / DIR_WORKFLOW / DIR_WORKSPACE / developer
  82. if ws.is_dir():
  83. for f in sorted(ws.glob(f"{FILE_JOURNAL_PREFIX}*.md")):
  84. if f.is_file():
  85. paths.append(
  86. f"{DIR_WORKFLOW}/{DIR_WORKSPACE}/{developer}/{f.name}"
  87. )
  88. index_md = ws / "index.md"
  89. if index_md.is_file():
  90. paths.append(
  91. f"{DIR_WORKFLOW}/{DIR_WORKSPACE}/{developer}/index.md"
  92. )
  93. tasks_dir = repo_root / DIR_WORKFLOW / DIR_TASKS
  94. if not tasks_dir.is_dir():
  95. return paths
  96. if task_name is not None:
  97. # Narrow scope — ONLY the current task directory (active or archived).
  98. # Never iterdir() all tasks: parallel-window dirty task dirs must not
  99. # leak into the session auto-commit.
  100. active_task = tasks_dir / task_name
  101. if active_task.is_dir():
  102. paths.append(f"{DIR_WORKFLOW}/{DIR_TASKS}/{task_name}")
  103. archived_task = tasks_dir / DIR_ARCHIVE / task_name
  104. if archived_task.is_dir():
  105. paths.append(
  106. f"{DIR_WORKFLOW}/{DIR_TASKS}/{DIR_ARCHIVE}/{task_name}"
  107. )
  108. return paths
  109. # Legacy wide scope (no task_name): each direct child of tasks/ that is a
  110. # directory and not the archive root, plus the whole archive subtree.
  111. for child in sorted(tasks_dir.iterdir()):
  112. if not child.is_dir():
  113. continue
  114. if child.name == DIR_ARCHIVE:
  115. continue
  116. paths.append(f"{DIR_WORKFLOW}/{DIR_TASKS}/{child.name}")
  117. archive_dir = tasks_dir / DIR_ARCHIVE
  118. if archive_dir.is_dir():
  119. paths.append(f"{DIR_WORKFLOW}/{DIR_TASKS}/{DIR_ARCHIVE}")
  120. return paths
  121. def safe_archive_paths_to_add(
  122. repo_root: Path,
  123. task_name: str | None = None,
  124. modified_children: list[str] | None = None,
  125. ) -> list[str]:
  126. """Return paths to stage after `task.py archive`.
  127. Scoped to ONLY the paths the archive operation actually touched:
  128. - the archive subtree (where the freshly-moved task lives)
  129. - the source task directory (for source-side deletes; caller pairs
  130. this with `git rm --cached` since `git add` won't stage deletes
  131. for a path that no longer exists in the working tree)
  132. - any child task directories whose `task.json` was edited to drop
  133. the archived parent (parent-children relationship update)
  134. This narrow scope avoids "scope creep" — dirty changes in OTHER
  135. active task dirs (parallel-window edits) are NOT bundled into the
  136. archive commit. Callers handle each kind of change in its own
  137. commit boundary.
  138. Backwards-compat: with no arguments, the function walks the whole
  139. `.trellis/tasks/` subtree the old way (active tasks + archive). New
  140. callers should always pass `task_name`.
  141. """
  142. paths: list[str] = []
  143. tasks_dir = repo_root / DIR_WORKFLOW / DIR_TASKS
  144. if not tasks_dir.is_dir():
  145. return paths
  146. archive_dir = tasks_dir / DIR_ARCHIVE
  147. if task_name is not None:
  148. # Narrow scope — only paths that still exist on disk (so
  149. # `git add` doesn't choke on the moved-away source). The caller
  150. # handles the source-side deletes via `git rm --cached`
  151. # explicitly.
  152. if archive_dir.is_dir():
  153. paths.append(
  154. f"{DIR_WORKFLOW}/{DIR_TASKS}/{DIR_ARCHIVE}"
  155. )
  156. for child_name in modified_children or []:
  157. paths.append(f"{DIR_WORKFLOW}/{DIR_TASKS}/{child_name}")
  158. return paths
  159. # Legacy wide scope (no task_name): preserve old behavior so callers
  160. # that have not been updated keep working.
  161. if archive_dir.is_dir():
  162. paths.append(f"{DIR_WORKFLOW}/{DIR_TASKS}/{DIR_ARCHIVE}")
  163. for child in sorted(tasks_dir.iterdir()):
  164. if not child.is_dir():
  165. continue
  166. if child.name == DIR_ARCHIVE:
  167. continue
  168. paths.append(f"{DIR_WORKFLOW}/{DIR_TASKS}/{child.name}")
  169. return paths
  170. def _stderr_indicates_ignored(stderr: str) -> bool:
  171. """git add error indicates the path is excluded by .gitignore."""
  172. if not stderr:
  173. return False
  174. lowered = stderr.lower()
  175. return "ignored by" in lowered
  176. def safe_git_add(
  177. paths: list[str], repo_root: Path
  178. ) -> tuple[bool, bool, str]:
  179. """Run `git add` on specific paths; never retry with -f.
  180. Returns ``(success, used_force, stderr)``. The ``used_force`` field is
  181. kept for signature compatibility with the 0.5.10 implementation but is
  182. always ``False`` — we never auto-force.
  183. Behavior:
  184. - No paths passed → success, no force, empty stderr.
  185. - Plain ``git add -- <paths>`` succeeds → return success.
  186. - Plain fails (any reason — ignored or otherwise) → return failure with
  187. the stderr. Callers should inspect the stderr (see
  188. :func:`print_gitignore_warning`) and skip the auto-commit.
  189. """
  190. if not paths:
  191. return True, False, ""
  192. rc, _, err = run_git(["add", "--", *paths], cwd=repo_root)
  193. if rc == 0:
  194. return True, False, ""
  195. return False, False, err
  196. def print_gitignore_warning(paths: list[str]) -> None:
  197. """Explain to the user (and any AI reading the log) what to do.
  198. CRITICAL: includes the negative example
  199. ``Do NOT use `git add -f .trellis/``` — agents reading the warning are
  200. known to invent that command, which fans out to ignored caches/backups.
  201. """
  202. print(
  203. "[WARN] git add failed because .trellis/ paths are ignored by your .gitignore.",
  204. file=sys.stderr,
  205. )
  206. print(
  207. "[WARN] Skipping auto-commit. The journal/task files were still written to disk;",
  208. file=sys.stderr,
  209. )
  210. print(
  211. "[WARN] git was not touched.",
  212. file=sys.stderr,
  213. )
  214. print("[WARN]", file=sys.stderr)
  215. print(
  216. "[WARN] Trellis manages these specific paths and they should be tracked:",
  217. file=sys.stderr,
  218. )
  219. if paths:
  220. for p in paths:
  221. print(f"[WARN] {p}", file=sys.stderr)
  222. else:
  223. print(
  224. "[WARN] .trellis/workspace/<developer>/{journal-*.md,index.md}",
  225. file=sys.stderr,
  226. )
  227. print(
  228. "[WARN] .trellis/tasks/<task-dir>/",
  229. file=sys.stderr,
  230. )
  231. print(
  232. "[WARN] .trellis/tasks/archive/",
  233. file=sys.stderr,
  234. )
  235. print("[WARN]", file=sys.stderr)
  236. print(
  237. "[WARN] Recommended: change your .gitignore from `.trellis/` to specific",
  238. file=sys.stderr,
  239. )
  240. print(
  241. "[WARN] subpaths that should remain ignored, e.g.:",
  242. file=sys.stderr,
  243. )
  244. for sub in TRELLIS_IGNORED_SUBPATHS:
  245. print(f"[WARN] {sub}", file=sys.stderr)
  246. print("[WARN]", file=sys.stderr)
  247. print(
  248. "[WARN] Or, if you intentionally keep .trellis/ local-only, set in",
  249. file=sys.stderr,
  250. )
  251. print(
  252. "[WARN] .trellis/config.yaml:",
  253. file=sys.stderr,
  254. )
  255. print(
  256. "[WARN] session_auto_commit: false",
  257. file=sys.stderr,
  258. )
  259. print(
  260. "[WARN] so the scripts skip git entirely and you can review / commit",
  261. file=sys.stderr,
  262. )
  263. print(
  264. "[WARN] manually with `git status` / `git add` / `git commit`.",
  265. file=sys.stderr,
  266. )
  267. print("[WARN]", file=sys.stderr)
  268. print(
  269. "[WARN] Do NOT use `git add -f .trellis/` — it pulls in backups, worktrees,",
  270. file=sys.stderr,
  271. )
  272. print(
  273. "[WARN] and runtime caches that should never be committed.",
  274. file=sys.stderr,
  275. )