python.ts 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. /**
  2. * Python Framework Resolver
  3. *
  4. * Handles Django, Flask, and FastAPI patterns.
  5. */
  6. import { Node } from '../../types';
  7. import { FrameworkResolver, UnresolvedRef, ResolutionContext, FrameworkExtractionResult } from '../types';
  8. import { stripCommentsForRegex } from '../strip-comments';
  9. export const djangoResolver: FrameworkResolver = {
  10. name: 'django',
  11. languages: ['python'],
  12. detect(context) {
  13. const requirements = context.readFile('requirements.txt');
  14. if (requirements && requirements.toLowerCase().includes('django')) return true;
  15. const setup = context.readFile('setup.py');
  16. if (setup && setup.toLowerCase().includes('django')) return true;
  17. const pyproject = context.readFile('pyproject.toml');
  18. if (pyproject && pyproject.toLowerCase().includes('django')) return true;
  19. return context.fileExists('manage.py');
  20. },
  21. resolve(ref, context) {
  22. if (ref.referenceName.endsWith('Model') || /^[A-Z][a-z]+$/.test(ref.referenceName)) {
  23. const result = resolveByNameAndKind(ref.referenceName, CLASS_KINDS, MODEL_DIRS, context);
  24. if (result) return { original: ref, targetNodeId: result, confidence: 0.8, resolvedBy: 'framework' };
  25. }
  26. if (ref.referenceName.endsWith('View') || ref.referenceName.endsWith('ViewSet')) {
  27. const result = resolveByNameAndKind(ref.referenceName, VIEW_KINDS, VIEW_DIRS, context);
  28. if (result) return { original: ref, targetNodeId: result, confidence: 0.8, resolvedBy: 'framework' };
  29. }
  30. if (ref.referenceName.endsWith('Form')) {
  31. const result = resolveByNameAndKind(ref.referenceName, CLASS_KINDS, FORM_DIRS, context);
  32. if (result) return { original: ref, targetNodeId: result, confidence: 0.8, resolvedBy: 'framework' };
  33. }
  34. // ORM dynamic dispatch: QuerySet._fetch_all (and siblings) call
  35. // `self._iterable_class(self)` — a runtime dispatch to the iterable class
  36. // (default ModelIterable) whose __iter__ runs the SQL compiler. Static
  37. // parsing can't resolve an attribute-as-callable, so it leaves an unresolved
  38. // `_iterable_class` ref and a hole in the QuerySet→compiler chain. Bridge it
  39. // to ModelIterable.__iter__ so the flow actually exists in the graph.
  40. if (ref.referenceName === '_iterable_class') {
  41. const target = resolveModelIterableIter(context);
  42. if (target) return { original: ref, targetNodeId: target, confidence: 0.7, resolvedBy: 'framework' };
  43. }
  44. return null;
  45. },
  46. // Let two ref shapes past resolveOne's "no possible match" pre-filter so they
  47. // reach resolution: the ORM dynamic-dispatch `_iterable_class` (a QuerySet
  48. // attribute, not a declared symbol), and a Django `include('app.urls')` module
  49. // path — a dotted module name with no symbol/import to match, which resolution
  50. // (resolvePythonAbsoluteModule) then maps to its `urls.py` file so the included
  51. // URLconf records a dependency on the root urlconf.
  52. claimsReference(name) {
  53. return name === '_iterable_class' || name.endsWith('.urls');
  54. },
  55. extract(filePath, content) {
  56. if (!filePath.endsWith('.py')) return { nodes: [], references: [] };
  57. const nodes: Node[] = [];
  58. const references: UnresolvedRef[] = [];
  59. const now = Date.now();
  60. const safe = stripCommentsForRegex(content, 'python');
  61. // path('url', handler, name=...) / re_path(r'...', handler) / url(r'...', handler)
  62. // Capture groups: 1=function name, 2=url string, 3=handler expr
  63. // Handler expr may contain one balanced () pair (e.g. View.as_view(), include('x.y'))
  64. const routeRegex = /\b(path|re_path|url)\s*\(\s*r?['"]([^'"]+)['"]\s*,\s*([\w.]+(?:\s*\([^)]*\))?)/g;
  65. let match: RegExpExecArray | null;
  66. while ((match = routeRegex.exec(safe)) !== null) {
  67. const [, _fn, urlPath, handlerExpr] = match;
  68. const line = safe.slice(0, match.index).split('\n').length;
  69. const routeNode: Node = {
  70. id: `route:${filePath}:${line}:${urlPath}`,
  71. kind: 'route',
  72. name: urlPath!,
  73. qualifiedName: `${filePath}::route:${urlPath}`,
  74. filePath,
  75. startLine: line,
  76. endLine: line,
  77. startColumn: 0,
  78. endColumn: match[0].length,
  79. language: 'python',
  80. updatedAt: now,
  81. };
  82. nodes.push(routeNode);
  83. const handler = handlerExpr!.trim();
  84. const target = resolveHandlerName(handler);
  85. if (target) {
  86. references.push({
  87. fromNodeId: routeNode.id,
  88. referenceName: target.name,
  89. referenceKind: target.kind,
  90. line,
  91. column: 0,
  92. filePath,
  93. language: 'python',
  94. });
  95. }
  96. }
  97. // DRF router registration: `router.register(r'articles', ArticleViewSet)` →
  98. // route → the ViewSet class (the core CRUD endpoints, which path()/url() miss).
  99. // The STRING first arg separates this from `admin.site.register(Model, Admin)`
  100. // (whose first arg is a model class, not a string); the View/ViewSet suffix on
  101. // the 2nd arg keeps it to DRF viewsets.
  102. const routerRegex = /\.register\s*\(\s*r?['"]([^'"]+)['"]\s*,\s*([\w.]+)/g;
  103. while ((match = routerRegex.exec(safe)) !== null) {
  104. const prefix = match[1]!.replace(/^\^|\/?\$$/g, '');
  105. const viewset = match[2]!.split('.').pop()!;
  106. if (!/View(Set)?$/.test(viewset)) continue;
  107. const line = safe.slice(0, match.index).split('\n').length;
  108. const routeNode: Node = {
  109. id: `route:${filePath}:${line}:VIEWSET:${prefix}`,
  110. kind: 'route',
  111. name: `VIEWSET /${prefix}`,
  112. qualifiedName: `${filePath}::route:${prefix}`,
  113. filePath, startLine: line, endLine: line, startColumn: 0, endColumn: match[0].length,
  114. language: 'python', updatedAt: now,
  115. };
  116. nodes.push(routeNode);
  117. references.push({
  118. fromNodeId: routeNode.id,
  119. referenceName: viewset,
  120. referenceKind: 'references',
  121. line, column: 0, filePath, language: 'python',
  122. });
  123. }
  124. return { nodes, references };
  125. },
  126. };
  127. /**
  128. * Find ModelIterable.__iter__ — the default iterable QuerySet invokes via
  129. * `self._iterable_class(self)`. Its __iter__ statically calls the SQL compiler,
  130. * so linking the dynamic dispatch here closes the QuerySet→SQL call chain.
  131. * (Over-approximates to the default iterable; .values()/.values_list() swap in
  132. * other BaseIterable subclasses, but ModelIterable is the canonical path.)
  133. */
  134. function resolveModelIterableIter(context: ResolutionContext): string | null {
  135. const cls = context.getNodesByName('ModelIterable').find((n) => n.kind === 'class');
  136. if (!cls) return null;
  137. const iter = context.getNodesByName('__iter__').find(
  138. (n) => n.filePath === cls.filePath && n.startLine >= cls.startLine && n.startLine <= cls.endLine
  139. );
  140. return iter ? iter.id : null;
  141. }
  142. /**
  143. * Parse a Django URL handler expression and return the symbol/module to link.
  144. * Returns null for shapes we can't confidently link (e.g. lambdas).
  145. */
  146. function resolveHandlerName(expr: string): { name: string; kind: 'references' | 'imports' } | null {
  147. // include('module.path')
  148. const includeMatch = expr.match(/^include\s*\(\s*['"]([^'"]+)['"]/);
  149. if (includeMatch) return { name: includeMatch[1]!, kind: 'imports' };
  150. // Strip trailing .as_view(...) or .as_view()
  151. let head = expr.replace(/\.as_view\s*\([^)]*\)\s*$/, '');
  152. // Drop any other trailing method call
  153. head = head.replace(/\.\w+\s*\([^)]*\)\s*$/, '');
  154. const dotted = head.split('.').filter(Boolean);
  155. if (dotted.length === 0) return null;
  156. const last = dotted[dotted.length - 1]!;
  157. if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(last)) return null;
  158. return { name: last, kind: 'references' };
  159. }
  160. export const flaskResolver: FrameworkResolver = {
  161. name: 'flask',
  162. languages: ['python'],
  163. detect(context) {
  164. for (const f of ['requirements.txt', 'pyproject.toml', 'Pipfile', 'setup.py']) {
  165. const c = context.readFile(f);
  166. if (c && /\bflask\b/i.test(c)) return true;
  167. }
  168. // Any app entrypoint (root OR subdir, e.g. conduit/app.py) that imports flask
  169. // and instantiates Flask(...) — covers Flask(__name__), Flask(__name__.split…),
  170. // and the app-factory pattern. Bounded to entrypoint-named files.
  171. const entrypoints = context
  172. .getAllFiles()
  173. .filter((f) => /(?:^|\/)(app|application|main|wsgi|__init__)\.py$/.test(f))
  174. .slice(0, 50);
  175. for (const f of entrypoints) {
  176. const c = context.readFile(f);
  177. if (c && /\bFlask\s*\(/.test(c) && /\bimport\s+flask\b|\bfrom\s+flask\b/.test(c)) return true;
  178. }
  179. return false;
  180. },
  181. resolve(ref, context) {
  182. if (ref.referenceName.endsWith('_bp') || ref.referenceName.endsWith('_blueprint')) {
  183. const result = resolveByNameAndKind(ref.referenceName, VARIABLE_KINDS, [], context);
  184. if (result) return { original: ref, targetNodeId: result, confidence: 0.8, resolvedBy: 'framework' };
  185. }
  186. return null;
  187. },
  188. extract(filePath, content) {
  189. if (!filePath.endsWith('.py')) return { nodes: [], references: [] };
  190. const safe = stripCommentsForRegex(content, 'python');
  191. const decorator = extractDecoratorRoutes(filePath, safe, {
  192. // Flask: @x.route('/path', methods=[...] | (...)) — the handler is the next
  193. // `def`, allowing intervening decorators (@login_required) and stacked
  194. // @x.route() lines. methods may be a list OR a tuple (methods=('GET',)).
  195. decoratorRegex: /@(\w+)\.route\s*\(\s*['"]([^'"]*)['"](?:\s*,\s*methods\s*=\s*[[(]([^\])]+)[\])])?\s*\)/g,
  196. defaultMethod: 'GET',
  197. methodFromGroup: 3,
  198. pathGroup: 2,
  199. findHandler: true,
  200. language: 'python',
  201. });
  202. const restful = extractFlaskRestful(filePath, safe);
  203. return {
  204. nodes: [...decorator.nodes, ...restful.nodes],
  205. references: [...decorator.references, ...restful.references],
  206. };
  207. },
  208. };
  209. export const fastapiResolver: FrameworkResolver = {
  210. name: 'fastapi',
  211. languages: ['python'],
  212. detect(context) {
  213. const requirements = context.readFile('requirements.txt');
  214. if (requirements && /\bfastapi\b/i.test(requirements)) return true;
  215. const pyproject = context.readFile('pyproject.toml');
  216. if (pyproject && /\bfastapi\b/i.test(pyproject)) return true;
  217. for (const file of ['app.py', 'main.py', 'api.py']) {
  218. const content = context.readFile(file);
  219. if (content && content.includes('FastAPI(')) return true;
  220. }
  221. return false;
  222. },
  223. resolve(ref, context) {
  224. if (ref.referenceName.endsWith('_router') || ref.referenceName === 'router') {
  225. const result = resolveByNameAndKind(ref.referenceName, VARIABLE_KINDS, ROUTER_DIRS, context);
  226. if (result) return { original: ref, targetNodeId: result, confidence: 0.8, resolvedBy: 'framework' };
  227. }
  228. if (ref.referenceName.startsWith('get_') || ref.referenceName.startsWith('Depends')) {
  229. const result = resolveByNameAndKind(ref.referenceName, FUNCTION_KINDS, DEP_DIRS, context);
  230. if (result) return { original: ref, targetNodeId: result, confidence: 0.75, resolvedBy: 'framework' };
  231. }
  232. return null;
  233. },
  234. extract(filePath, content) {
  235. if (!filePath.endsWith('.py')) return { nodes: [], references: [] };
  236. return extractDecoratorRoutes(filePath, stripCommentsForRegex(content, 'python'), {
  237. // FastAPI: @x.METHOD('/path') -> handler on the next def line. Path may be
  238. // empty ("") for routes mounted at the router/prefix root.
  239. decoratorRegex: /@(\w+)\.(get|post|put|patch|delete|options|head)\s*\(\s*['"]([^'"]*)['"]/g,
  240. defaultMethod: '',
  241. methodGroup: 2,
  242. pathGroup: 3,
  243. findHandler: true,
  244. language: 'python',
  245. });
  246. },
  247. };
  248. interface DecoratorRouteOpts {
  249. decoratorRegex: RegExp;
  250. defaultMethod: string;
  251. methodGroup?: number;
  252. methodFromGroup?: number; // methods=[...] list
  253. pathGroup: number;
  254. handlerGroup?: number;
  255. findHandler?: boolean;
  256. language: 'python';
  257. }
  258. function extractDecoratorRoutes(filePath: string, content: string, opts: DecoratorRouteOpts): FrameworkExtractionResult {
  259. const nodes: Node[] = [];
  260. const references: UnresolvedRef[] = [];
  261. const now = Date.now();
  262. let match: RegExpExecArray | null;
  263. while ((match = opts.decoratorRegex.exec(content)) !== null) {
  264. const routePath = match[opts.pathGroup];
  265. let method = opts.defaultMethod;
  266. if (opts.methodGroup && match[opts.methodGroup]) {
  267. method = match[opts.methodGroup]!.toUpperCase();
  268. } else if (opts.methodFromGroup && match[opts.methodFromGroup]) {
  269. const m = match[opts.methodFromGroup]!.match(/['"]([A-Z]+)['"]/i);
  270. if (m) method = m[1]!.toUpperCase();
  271. }
  272. const line = content.slice(0, match.index).split('\n').length;
  273. const name = method ? `${method} ${routePath || '/'}` : (routePath || '/');
  274. const routeNode: Node = {
  275. id: `route:${filePath}:${line}:${method}:${routePath}`,
  276. kind: 'route',
  277. name,
  278. qualifiedName: `${filePath}::${method}:${routePath}`,
  279. filePath,
  280. startLine: line,
  281. endLine: line,
  282. startColumn: 0,
  283. endColumn: match[0].length,
  284. language: opts.language,
  285. updatedAt: now,
  286. };
  287. nodes.push(routeNode);
  288. let handlerName: string | undefined;
  289. if (opts.handlerGroup && match[opts.handlerGroup]) {
  290. handlerName = match[opts.handlerGroup];
  291. } else if (opts.findHandler) {
  292. const tail = content.slice(match.index + match[0].length);
  293. const defMatch = tail.match(/\n\s*(?:async\s+)?def\s+(\w+)/);
  294. if (defMatch) handlerName = defMatch[1];
  295. }
  296. if (handlerName) {
  297. references.push({
  298. fromNodeId: routeNode.id,
  299. referenceName: handlerName,
  300. referenceKind: 'references',
  301. line,
  302. column: 0,
  303. filePath,
  304. language: 'python',
  305. });
  306. }
  307. }
  308. return { nodes, references };
  309. }
  310. /**
  311. * Flask-RESTful: `api.add_resource(ResourceClass, '/path'[, '/path2'])`
  312. * (and variants like redash's `add_org_resource`). The ResourceClass holds the
  313. * HTTP-verb methods (get/post/…), so the route references the class — its verb
  314. * methods resolve as the handlers via the class. Method is ANY (the class
  315. * decides which verbs it serves).
  316. */
  317. function extractFlaskRestful(filePath: string, safe: string): FrameworkExtractionResult {
  318. const nodes: Node[] = [];
  319. const references: UnresolvedRef[] = [];
  320. const now = Date.now();
  321. const re = /\.add\w*[Rr]esource\s*\(\s*(\w+)\s*,\s*((?:['"][^'"]+['"]\s*,?\s*)+)/g;
  322. let m: RegExpExecArray | null;
  323. while ((m = re.exec(safe)) !== null) {
  324. const className = m[1]!;
  325. const paths = (m[2]!.match(/['"]([^'"]+)['"]/g) || []).map((s) => s.slice(1, -1));
  326. const line = safe.slice(0, m.index).split('\n').length;
  327. for (const routePath of paths) {
  328. const routeNode: Node = {
  329. id: `route:${filePath}:${line}:ANY:${routePath}`,
  330. kind: 'route',
  331. name: `ANY ${routePath}`,
  332. qualifiedName: `${filePath}::ANY:${routePath}`,
  333. filePath,
  334. startLine: line,
  335. endLine: line,
  336. startColumn: 0,
  337. endColumn: 0,
  338. language: 'python',
  339. updatedAt: now,
  340. };
  341. nodes.push(routeNode);
  342. references.push({
  343. fromNodeId: routeNode.id,
  344. referenceName: className,
  345. referenceKind: 'references',
  346. line,
  347. column: 0,
  348. filePath,
  349. language: 'python',
  350. });
  351. }
  352. }
  353. return { nodes, references };
  354. }
  355. // Directory patterns
  356. const MODEL_DIRS = ['models', 'app/models', 'src/models'];
  357. const VIEW_DIRS = ['views', 'app/views', 'src/views', 'api/views'];
  358. const FORM_DIRS = ['forms', 'app/forms', 'src/forms'];
  359. const ROUTER_DIRS = ['/routers/', '/api/', '/routes/', '/endpoints/'];
  360. const DEP_DIRS = ['/dependencies/', '/deps/', '/core/'];
  361. const CLASS_KINDS = new Set(['class']);
  362. const VIEW_KINDS = new Set(['class', 'function']);
  363. const VARIABLE_KINDS = new Set(['variable']);
  364. const FUNCTION_KINDS = new Set(['function']);
  365. /**
  366. * Resolve a symbol by name using indexed queries instead of scanning all files.
  367. */
  368. function resolveByNameAndKind(
  369. name: string,
  370. kinds: Set<string>,
  371. preferredDirPatterns: string[],
  372. context: ResolutionContext,
  373. ): string | null {
  374. const candidates = context.getNodesByName(name);
  375. if (candidates.length === 0) return null;
  376. const kindFiltered = candidates.filter((n) => kinds.has(n.kind));
  377. if (kindFiltered.length === 0) return null;
  378. // Prefer candidates in framework-conventional directories
  379. if (preferredDirPatterns.length > 0) {
  380. const preferred = kindFiltered.filter((n) =>
  381. preferredDirPatterns.some((d) => n.filePath.includes(d))
  382. );
  383. if (preferred.length > 0) return preferred[0]!.id;
  384. }
  385. // Fall back to any match
  386. return kindFiltered[0]!.id;
  387. }