Explorar o código

fix: support entity alias fallback lookup

lingfengQAQ hai 1 mes
pai
achega
fc6362849b

+ 53 - 9
webnovel-writer/scripts/data_modules/index_entity_mixin.py

@@ -18,6 +18,31 @@ logger = logging.getLogger(__name__)
 
 
 class IndexEntityMixin:
+    def _register_alias_with_cursor(
+        self, cursor: sqlite3.Cursor, alias: str, entity_id: str, entity_type: str
+    ) -> bool:
+        alias = str(alias).strip() if alias is not None else ""
+        if not alias or not entity_id:
+            return False
+
+        cursor.execute(
+            """
+            INSERT OR IGNORE INTO aliases (alias, entity_id, entity_type)
+            VALUES (?, ?, ?)
+        """,
+            (alias, entity_id, entity_type),
+        )
+        return cursor.rowcount > 0
+
+    def _register_canonical_alias(
+        self, cursor: sqlite3.Cursor, entity: EntityMeta
+    ) -> None:
+        canonical_name = str(entity.canonical_name).strip() if entity.canonical_name else ""
+        if canonical_name and canonical_name != entity.id:
+            self._register_alias_with_cursor(
+                cursor, canonical_name, entity.id, entity.type
+            )
+
     def upsert_entity(self, entity: EntityMeta, update_metadata: bool = False) -> bool:
         """
         插入或更新实体 (智能合并)
@@ -94,6 +119,7 @@ class IndexEntityMixin:
                             entity.id,
                         ),
                     )
+                self._register_canonical_alias(cursor, entity)
                 conn.commit()
                 return False
             else:
@@ -118,18 +144,21 @@ class IndexEntityMixin:
                         1 if entity.is_archived else 0,
                     ),
                 )
+                self._register_canonical_alias(cursor, entity)
                 conn.commit()
                 return True
 
     def get_entity(self, entity_id: str) -> Optional[Dict]:
-        """获取单个实体"""
+        """获取单个实体;ID 查不到时回退到别名查找。"""
         with self._get_conn() as conn:
             cursor = conn.cursor()
             cursor.execute("SELECT * FROM entities WHERE id = ?", (entity_id,))
             row = cursor.fetchone()
             if row:
                 return self._row_to_dict(row, parse_json=["current_json"])
-            return None
+
+        alias_matches = self.get_entities_by_alias(entity_id)
+        return alias_matches[0] if alias_matches else None
 
     def get_entities_by_type(
         self, entity_type: str, include_archived: bool = False
@@ -260,18 +289,18 @@ class IndexEntityMixin:
 
         同一别名可映射多个实体 (如 "天云宗" → 地点 + 势力)
         """
+        alias = str(alias).strip() if alias is not None else ""
+        if not alias or not entity_id:
+            return False
+
         with self._get_conn() as conn:
             cursor = conn.cursor()
             try:
-                cursor.execute(
-                    """
-                    INSERT OR IGNORE INTO aliases (alias, entity_id, entity_type)
-                    VALUES (?, ?, ?)
-                """,
-                    (alias, entity_id, entity_type),
+                inserted = self._register_alias_with_cursor(
+                    cursor, alias, entity_id, entity_type
                 )
                 conn.commit()
-                return cursor.rowcount > 0
+                return inserted
             except sqlite3.IntegrityError:
                 return False
 
@@ -281,6 +310,10 @@ class IndexEntityMixin:
 
         返回所有匹配的实体 (可能有多个不同类型)
         """
+        alias = str(alias).strip() if alias is not None else ""
+        if not alias:
+            return []
+
         with self._get_conn() as conn:
             cursor = conn.cursor()
             cursor.execute(
@@ -289,6 +322,17 @@ class IndexEntityMixin:
                 FROM entities e
                 JOIN aliases a ON e.id = a.entity_id
                 WHERE a.alias = ?
+                ORDER BY
+                    e.is_archived ASC,
+                    e.is_protagonist DESC,
+                    CASE e.tier
+                        WHEN '核心' THEN 0
+                        WHEN '重要' THEN 1
+                        WHEN '次要' THEN 2
+                        ELSE 3
+                    END,
+                    e.last_appearance DESC,
+                    e.id ASC
             """,
                 (alias,),
             )

+ 33 - 6
webnovel-writer/scripts/data_modules/sql_state_manager.py

@@ -98,6 +98,23 @@ class SQLStateManager:
         self.config = config or get_config()
         self._index_manager = IndexManager(config)
 
+    def _unique_aliases(self, *groups: Any) -> List[str]:
+        """合并 Data Agent 传入的 aliases/mentions,保持顺序并去重。"""
+        result = []
+        seen = set()
+
+        for group in groups:
+            if not group:
+                continue
+            values = [group] if isinstance(group, str) else group
+            for value in values:
+                alias = str(value).strip() if value is not None else ""
+                if alias and alias not in seen:
+                    seen.add(alias)
+                    result.append(alias)
+
+        return result
+
     # ==================== 实体操作 ====================
 
     def upsert_entity(self, entity: EntityData) -> bool:
@@ -143,7 +160,7 @@ class SQLStateManager:
         entity = self._index_manager.get_entity(entity_id)
         if entity:
             # 添加别名
-            entity["aliases"] = self._index_manager.get_entity_aliases(entity_id)
+            entity["aliases"] = self._index_manager.get_entity_aliases(entity["id"])
         return entity
 
     def get_entities_by_type(self, entity_type: str, include_archived: bool = False) -> List[Dict]:
@@ -302,17 +319,25 @@ class SQLStateManager:
             if not entity_id:
                 continue
 
-            self._index_manager.update_entity_current(entity_id, {})  # 触发 updated_at
-            # 更新 last_appearance
             existing = self._index_manager.get_entity(entity_id)
+            resolved_id = existing.get("id") if existing else entity_id
+
+            if existing:
+                self._index_manager.update_entity_current(resolved_id, {})  # 触发 updated_at
+                entity_type = entity.get("type") or existing.get("type", "角色")
+                for alias in self._unique_aliases(entity.get("mentions", [])):
+                    if self._index_manager.register_alias(alias, resolved_id, entity_type):
+                        stats["aliases"] += 1
+
+            # 更新 last_appearance
             if existing:
                 # 使用 SQL 直接更新 last_appearance
-                self._update_last_appearance(entity_id, chapter)
+                self._update_last_appearance(resolved_id, chapter)
                 stats["entities_updated"] += 1
 
             # 记录出场(保留原有逻辑)
             self._index_manager.record_appearance(
-                entity_id=entity_id,
+                entity_id=resolved_id,
                 chapter=chapter,
                 mentions=entity.get("mentions", []),
                 confidence=entity.get("confidence", 1.0)
@@ -331,7 +356,9 @@ class SQLStateManager:
                 tier=entity.get("tier", "装饰"),
                 desc=entity.get("desc", ""),
                 current=entity.get("current", {}),
-                aliases=entity.get("aliases", []),
+                aliases=self._unique_aliases(
+                    entity.get("aliases", []), entity.get("mentions", [])
+                ),
                 first_appearance=chapter,
                 last_appearance=chapter,
                 is_protagonist=entity.get("is_protagonist", False)

+ 27 - 3
webnovel-writer/scripts/data_modules/state_manager.py

@@ -88,6 +88,21 @@ class _EntityPatch:
     appearance_chapter: Optional[int] = None
 
 
+def _unique_aliases(*groups: Any) -> List[str]:
+    result = []
+    seen = set()
+    for group in groups:
+        if not group:
+            continue
+        values = [group] if isinstance(group, str) else group
+        for value in values:
+            alias = str(value).strip() if value is not None else ""
+            if alias and alias not in seen:
+                seen.add(alias)
+                result.append(alias)
+    return result
+
+
 class StateManager:
     """状态管理器(v5.1 entities_v3 格式 + SQLite 同步,v5.4 沿用)"""
 
@@ -440,7 +455,7 @@ class StateManager:
                         tier=patch.base_entity.get("tier", "装饰"),
                         desc=patch.base_entity.get("desc", ""),
                         current=patch.base_entity.get("current", {}),
-                        aliases=[],
+                        aliases=patch.base_entity.get("aliases", []),
                         first_appearance=patch.base_entity.get("first_appearance", 0),
                         last_appearance=patch.base_entity.get("last_appearance", 0),
                         is_protagonist=patch.base_entity.get("is_protagonist", False)
@@ -658,7 +673,7 @@ class StateManager:
         """获取实体(v5.1 引入:优先从 SQLite 读取)"""
         # v5.1 引入: 优先从 SQLite 读取
         if self._sql_state_manager:
-            entity = self._sql_state_manager._index_manager.get_entity(entity_id)
+            entity = self._sql_state_manager.get_entity(entity_id)
             if entity:
                 return entity
 
@@ -763,6 +778,7 @@ class StateManager:
             "tier": entity.tier,
             "desc": "",
             "current": entity.attributes,
+            "aliases": list(entity.aliases),
             "first_appearance": entity.first_appearance,
             "last_appearance": entity.last_appearance,
             "history": []
@@ -1067,6 +1083,12 @@ class StateManager:
             entity_type = entity.get("type")
             if entity_id:
                 self.update_entity_appearance(entity_id, chapter, entity_type)
+                resolved_type = entity_type or self.get_entity_type(entity_id) or "角色"
+                for alias in _unique_aliases(entity.get("mentions", [])):
+                    entries = self._pending_alias_entries.setdefault(alias, [])
+                    alias_entry = {"type": resolved_type, "id": entity_id}
+                    if alias_entry not in entries:
+                        entries.append(alias_entry)
                 # v5.1 引入: 缓存用于 SQLite 同步
                 self._pending_sqlite_data["entities_appeared"].append(entity)
 
@@ -1079,7 +1101,9 @@ class StateManager:
                     name=entity.get("name", ""),
                     type=entity.get("type", "角色"),
                     tier=entity.get("tier", "装饰"),
-                    aliases=entity.get("mentions", []),
+                    aliases=_unique_aliases(
+                        entity.get("mentions", []), entity.get("aliases", [])
+                    ),
                     first_appearance=chapter,
                     last_appearance=chapter
                 )

+ 23 - 6
webnovel-writer/scripts/data_modules/tests/test_data_modules.py

@@ -72,8 +72,8 @@ class TestEntityLinker:
             )
         )
 
-        # 注册别名
-        assert linker.register_alias("xiaoyan", "萧炎")
+        # canonical_name 会在实体写入时自动注册别名
+        assert linker.lookup_alias("萧炎") == "xiaoyan"
         assert linker.register_alias("xiaoyan", "小炎子")
 
         # 查找
@@ -107,9 +107,7 @@ class TestEntityLinker:
             )
         )
 
-        linker.register_alias("xiaoyan", "萧炎", "角色")
-        # v5.0: 同一别名可绑定不同实体(一对多)
-        assert linker.register_alias("other_person", "萧炎", "角色")
+        # canonical_name 会自动作为别名;同一别名可绑定不同实体(一对多)
 
         # 查找所有匹配
         entries = linker.lookup_alias_all("萧炎")
@@ -455,6 +453,25 @@ class TestIndexManager:
         assert stats["scenes"] == 1
         assert stats["entities"] == 1
 
+    def test_entity_canonical_name_alias_fallback(self, temp_project):
+        manager = IndexManager(temp_project)
+
+        manager.upsert_entity(
+            EntityMeta(
+                id="chenfeng",
+                type="角色",
+                canonical_name="陈锋",
+                current={},
+                first_appearance=1,
+                last_appearance=1,
+            )
+        )
+
+        aliases = manager.get_entity_aliases("chenfeng")
+        assert "陈锋" in aliases
+        assert manager.get_entities_by_alias("陈锋")[0]["id"] == "chenfeng"
+        assert manager.get_entity("陈锋")["id"] == "chenfeng"
+
     def test_entity_alias_and_relationships(self, temp_project):
         manager = IndexManager(temp_project)
 
@@ -1421,4 +1438,4 @@ class TestRAGAdapter:
 
 
 if __name__ == "__main__":
-    pytest.main([__file__, "-v"])
+    pytest.main([__file__, "-v"])

+ 4 - 1
webnovel-writer/scripts/data_modules/tests/test_state_manager_extra.py

@@ -172,7 +172,7 @@ def test_process_chapter_result_and_sqlite_sync(temp_project):
 
     result = {
         "entities_appeared": [
-            {"id": "xiaoyan", "type": "角色", "mentions": ["萧炎"], "confidence": 0.9}
+            {"id": "xiaoyan", "type": "角色", "mentions": ["萧炎", "小炎子"], "confidence": 0.9}
         ],
         "entities_new": [
             {
@@ -216,6 +216,9 @@ def test_process_chapter_result_and_sqlite_sync(temp_project):
 
     idx = IndexManager(temp_project)
     assert idx.get_entity("yaolao") is not None
+    assert idx.get_entity("药老")["id"] == "yaolao"
+    assert idx.get_entity("小炎子")["id"] == "xiaoyan"
+    assert "药老先生" in idx.get_entity_aliases("yaolao")
     assert idx.get_relationship_between("xiaoyan", "yaolao")
     assert idx.get_entity_state_changes("xiaoyan")