python.ts 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. /**
  2. * Python Framework Resolver
  3. *
  4. * Handles Django, Flask, and FastAPI patterns.
  5. */
  6. import { Node } from '../../types';
  7. import { FrameworkResolver, UnresolvedRef, ResolutionContext, FrameworkExtractionResult } from '../types';
  8. import { stripCommentsForRegex } from '../strip-comments';
  9. export const djangoResolver: FrameworkResolver = {
  10. name: 'django',
  11. languages: ['python'],
  12. detect(context) {
  13. const requirements = context.readFile('requirements.txt');
  14. if (requirements && requirements.toLowerCase().includes('django')) return true;
  15. const setup = context.readFile('setup.py');
  16. if (setup && setup.toLowerCase().includes('django')) return true;
  17. const pyproject = context.readFile('pyproject.toml');
  18. if (pyproject && pyproject.toLowerCase().includes('django')) return true;
  19. return context.fileExists('manage.py');
  20. },
  21. resolve(ref, context) {
  22. if (ref.referenceName.endsWith('Model') || /^[A-Z][a-z]+$/.test(ref.referenceName)) {
  23. const result = resolveByNameAndKind(ref.referenceName, CLASS_KINDS, MODEL_DIRS, context);
  24. if (result) return { original: ref, targetNodeId: result, confidence: 0.8, resolvedBy: 'framework' };
  25. }
  26. if (ref.referenceName.endsWith('View') || ref.referenceName.endsWith('ViewSet')) {
  27. const result = resolveByNameAndKind(ref.referenceName, VIEW_KINDS, VIEW_DIRS, context);
  28. if (result) return { original: ref, targetNodeId: result, confidence: 0.8, resolvedBy: 'framework' };
  29. }
  30. if (ref.referenceName.endsWith('Form')) {
  31. const result = resolveByNameAndKind(ref.referenceName, CLASS_KINDS, FORM_DIRS, context);
  32. if (result) return { original: ref, targetNodeId: result, confidence: 0.8, resolvedBy: 'framework' };
  33. }
  34. // ORM dynamic dispatch: QuerySet._fetch_all (and siblings) call
  35. // `self._iterable_class(self)` — a runtime dispatch to the iterable class
  36. // (default ModelIterable) whose __iter__ runs the SQL compiler. Static
  37. // parsing can't resolve an attribute-as-callable, so it leaves an unresolved
  38. // `_iterable_class` ref and a hole in the QuerySet→compiler chain. Bridge it
  39. // to ModelIterable.__iter__ so the flow actually exists in the graph.
  40. if (ref.referenceName === '_iterable_class') {
  41. const target = resolveModelIterableIter(context);
  42. if (target) return { original: ref, targetNodeId: target, confidence: 0.7, resolvedBy: 'framework' };
  43. }
  44. return null;
  45. },
  46. // Let the ORM dynamic-dispatch ref reach resolve() despite no symbol being
  47. // named `_iterable_class` (it's a QuerySet attribute, not a declared method).
  48. claimsReference(name) {
  49. return name === '_iterable_class';
  50. },
  51. extract(filePath, content) {
  52. if (!filePath.endsWith('.py')) return { nodes: [], references: [] };
  53. const nodes: Node[] = [];
  54. const references: UnresolvedRef[] = [];
  55. const now = Date.now();
  56. const safe = stripCommentsForRegex(content, 'python');
  57. // path('url', handler, name=...) / re_path(r'...', handler) / url(r'...', handler)
  58. // Capture groups: 1=function name, 2=url string, 3=handler expr
  59. // Handler expr may contain one balanced () pair (e.g. View.as_view(), include('x.y'))
  60. const routeRegex = /\b(path|re_path|url)\s*\(\s*r?['"]([^'"]+)['"]\s*,\s*([\w.]+(?:\s*\([^)]*\))?)/g;
  61. let match: RegExpExecArray | null;
  62. while ((match = routeRegex.exec(safe)) !== null) {
  63. const [, _fn, urlPath, handlerExpr] = match;
  64. const line = safe.slice(0, match.index).split('\n').length;
  65. const routeNode: Node = {
  66. id: `route:${filePath}:${line}:${urlPath}`,
  67. kind: 'route',
  68. name: urlPath!,
  69. qualifiedName: `${filePath}::route:${urlPath}`,
  70. filePath,
  71. startLine: line,
  72. endLine: line,
  73. startColumn: 0,
  74. endColumn: match[0].length,
  75. language: 'python',
  76. updatedAt: now,
  77. };
  78. nodes.push(routeNode);
  79. const handler = handlerExpr!.trim();
  80. const target = resolveHandlerName(handler);
  81. if (target) {
  82. references.push({
  83. fromNodeId: routeNode.id,
  84. referenceName: target.name,
  85. referenceKind: target.kind,
  86. line,
  87. column: 0,
  88. filePath,
  89. language: 'python',
  90. });
  91. }
  92. }
  93. return { nodes, references };
  94. },
  95. };
  96. /**
  97. * Find ModelIterable.__iter__ — the default iterable QuerySet invokes via
  98. * `self._iterable_class(self)`. Its __iter__ statically calls the SQL compiler,
  99. * so linking the dynamic dispatch here closes the QuerySet→SQL call chain.
  100. * (Over-approximates to the default iterable; .values()/.values_list() swap in
  101. * other BaseIterable subclasses, but ModelIterable is the canonical path.)
  102. */
  103. function resolveModelIterableIter(context: ResolutionContext): string | null {
  104. const cls = context.getNodesByName('ModelIterable').find((n) => n.kind === 'class');
  105. if (!cls) return null;
  106. const iter = context.getNodesByName('__iter__').find(
  107. (n) => n.filePath === cls.filePath && n.startLine >= cls.startLine && n.startLine <= cls.endLine
  108. );
  109. return iter ? iter.id : null;
  110. }
  111. /**
  112. * Parse a Django URL handler expression and return the symbol/module to link.
  113. * Returns null for shapes we can't confidently link (e.g. lambdas).
  114. */
  115. function resolveHandlerName(expr: string): { name: string; kind: 'references' | 'imports' } | null {
  116. // include('module.path')
  117. const includeMatch = expr.match(/^include\s*\(\s*['"]([^'"]+)['"]/);
  118. if (includeMatch) return { name: includeMatch[1]!, kind: 'imports' };
  119. // Strip trailing .as_view(...) or .as_view()
  120. let head = expr.replace(/\.as_view\s*\([^)]*\)\s*$/, '');
  121. // Drop any other trailing method call
  122. head = head.replace(/\.\w+\s*\([^)]*\)\s*$/, '');
  123. const dotted = head.split('.').filter(Boolean);
  124. if (dotted.length === 0) return null;
  125. const last = dotted[dotted.length - 1]!;
  126. if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(last)) return null;
  127. return { name: last, kind: 'references' };
  128. }
  129. export const flaskResolver: FrameworkResolver = {
  130. name: 'flask',
  131. languages: ['python'],
  132. detect(context) {
  133. const requirements = context.readFile('requirements.txt');
  134. if (requirements && /\bflask\b/i.test(requirements)) return true;
  135. const pyproject = context.readFile('pyproject.toml');
  136. if (pyproject && /\bflask\b/i.test(pyproject)) return true;
  137. for (const file of ['app.py', 'application.py', 'main.py', '__init__.py']) {
  138. const content = context.readFile(file);
  139. if (content && content.includes('Flask(__name__)')) return true;
  140. }
  141. return false;
  142. },
  143. resolve(ref, context) {
  144. if (ref.referenceName.endsWith('_bp') || ref.referenceName.endsWith('_blueprint')) {
  145. const result = resolveByNameAndKind(ref.referenceName, VARIABLE_KINDS, [], context);
  146. if (result) return { original: ref, targetNodeId: result, confidence: 0.8, resolvedBy: 'framework' };
  147. }
  148. return null;
  149. },
  150. extract(filePath, content) {
  151. if (!filePath.endsWith('.py')) return { nodes: [], references: [] };
  152. return extractDecoratorRoutes(filePath, stripCommentsForRegex(content, 'python'), {
  153. // Flask: @x.route('/path', methods=[...])
  154. decoratorRegex: /@(\w+)\.route\s*\(\s*['"]([^'"]+)['"](?:\s*,\s*methods\s*=\s*\[([^\]]+)\])?\s*\)\s*\n\s*(?:async\s+)?def\s+(\w+)/g,
  155. defaultMethod: 'GET',
  156. methodFromGroup: 3,
  157. pathGroup: 2,
  158. handlerGroup: 4,
  159. language: 'python',
  160. });
  161. },
  162. };
  163. export const fastapiResolver: FrameworkResolver = {
  164. name: 'fastapi',
  165. languages: ['python'],
  166. detect(context) {
  167. const requirements = context.readFile('requirements.txt');
  168. if (requirements && /\bfastapi\b/i.test(requirements)) return true;
  169. const pyproject = context.readFile('pyproject.toml');
  170. if (pyproject && /\bfastapi\b/i.test(pyproject)) return true;
  171. for (const file of ['app.py', 'main.py', 'api.py']) {
  172. const content = context.readFile(file);
  173. if (content && content.includes('FastAPI(')) return true;
  174. }
  175. return false;
  176. },
  177. resolve(ref, context) {
  178. if (ref.referenceName.endsWith('_router') || ref.referenceName === 'router') {
  179. const result = resolveByNameAndKind(ref.referenceName, VARIABLE_KINDS, ROUTER_DIRS, context);
  180. if (result) return { original: ref, targetNodeId: result, confidence: 0.8, resolvedBy: 'framework' };
  181. }
  182. if (ref.referenceName.startsWith('get_') || ref.referenceName.startsWith('Depends')) {
  183. const result = resolveByNameAndKind(ref.referenceName, FUNCTION_KINDS, DEP_DIRS, context);
  184. if (result) return { original: ref, targetNodeId: result, confidence: 0.75, resolvedBy: 'framework' };
  185. }
  186. return null;
  187. },
  188. extract(filePath, content) {
  189. if (!filePath.endsWith('.py')) return { nodes: [], references: [] };
  190. return extractDecoratorRoutes(filePath, stripCommentsForRegex(content, 'python'), {
  191. // FastAPI: @x.METHOD('/path') -> handler on the next def line
  192. decoratorRegex: /@(\w+)\.(get|post|put|patch|delete|options|head)\s*\(\s*['"]([^'"]+)['"]/g,
  193. defaultMethod: '',
  194. methodGroup: 2,
  195. pathGroup: 3,
  196. findHandler: true,
  197. language: 'python',
  198. });
  199. },
  200. };
  201. interface DecoratorRouteOpts {
  202. decoratorRegex: RegExp;
  203. defaultMethod: string;
  204. methodGroup?: number;
  205. methodFromGroup?: number; // methods=[...] list
  206. pathGroup: number;
  207. handlerGroup?: number;
  208. findHandler?: boolean;
  209. language: 'python';
  210. }
  211. function extractDecoratorRoutes(filePath: string, content: string, opts: DecoratorRouteOpts): FrameworkExtractionResult {
  212. const nodes: Node[] = [];
  213. const references: UnresolvedRef[] = [];
  214. const now = Date.now();
  215. let match: RegExpExecArray | null;
  216. while ((match = opts.decoratorRegex.exec(content)) !== null) {
  217. const routePath = match[opts.pathGroup];
  218. let method = opts.defaultMethod;
  219. if (opts.methodGroup && match[opts.methodGroup]) {
  220. method = match[opts.methodGroup]!.toUpperCase();
  221. } else if (opts.methodFromGroup && match[opts.methodFromGroup]) {
  222. const m = match[opts.methodFromGroup]!.match(/['"]([A-Z]+)['"]/i);
  223. if (m) method = m[1]!.toUpperCase();
  224. }
  225. const line = content.slice(0, match.index).split('\n').length;
  226. const name = method ? `${method} ${routePath}` : routePath!;
  227. const routeNode: Node = {
  228. id: `route:${filePath}:${line}:${method}:${routePath}`,
  229. kind: 'route',
  230. name,
  231. qualifiedName: `${filePath}::${method}:${routePath}`,
  232. filePath,
  233. startLine: line,
  234. endLine: line,
  235. startColumn: 0,
  236. endColumn: match[0].length,
  237. language: opts.language,
  238. updatedAt: now,
  239. };
  240. nodes.push(routeNode);
  241. let handlerName: string | undefined;
  242. if (opts.handlerGroup && match[opts.handlerGroup]) {
  243. handlerName = match[opts.handlerGroup];
  244. } else if (opts.findHandler) {
  245. const tail = content.slice(match.index + match[0].length);
  246. const defMatch = tail.match(/\n\s*(?:async\s+)?def\s+(\w+)/);
  247. if (defMatch) handlerName = defMatch[1];
  248. }
  249. if (handlerName) {
  250. references.push({
  251. fromNodeId: routeNode.id,
  252. referenceName: handlerName,
  253. referenceKind: 'references',
  254. line,
  255. column: 0,
  256. filePath,
  257. language: 'python',
  258. });
  259. }
  260. }
  261. return { nodes, references };
  262. }
  263. // Directory patterns
  264. const MODEL_DIRS = ['models', 'app/models', 'src/models'];
  265. const VIEW_DIRS = ['views', 'app/views', 'src/views', 'api/views'];
  266. const FORM_DIRS = ['forms', 'app/forms', 'src/forms'];
  267. const ROUTER_DIRS = ['/routers/', '/api/', '/routes/', '/endpoints/'];
  268. const DEP_DIRS = ['/dependencies/', '/deps/', '/core/'];
  269. const CLASS_KINDS = new Set(['class']);
  270. const VIEW_KINDS = new Set(['class', 'function']);
  271. const VARIABLE_KINDS = new Set(['variable']);
  272. const FUNCTION_KINDS = new Set(['function']);
  273. /**
  274. * Resolve a symbol by name using indexed queries instead of scanning all files.
  275. */
  276. function resolveByNameAndKind(
  277. name: string,
  278. kinds: Set<string>,
  279. preferredDirPatterns: string[],
  280. context: ResolutionContext,
  281. ): string | null {
  282. const candidates = context.getNodesByName(name);
  283. if (candidates.length === 0) return null;
  284. const kindFiltered = candidates.filter((n) => kinds.has(n.kind));
  285. if (kindFiltered.length === 0) return null;
  286. // Prefer candidates in framework-conventional directories
  287. if (preferredDirPatterns.length > 0) {
  288. const preferred = kindFiltered.filter((n) =>
  289. preferredDirPatterns.some((d) => n.filePath.includes(d))
  290. );
  291. if (preferred.length > 0) return preferred[0]!.id;
  292. }
  293. // Fall back to any match
  294. return kindFiltered[0]!.id;
  295. }