python.ts 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419
  1. /**
  2. * Python Framework Resolver
  3. *
  4. * Handles Django, Flask, and FastAPI patterns.
  5. */
  6. import { Node } from '../../types';
  7. import { FrameworkResolver, UnresolvedRef, ResolutionContext, FrameworkExtractionResult } from '../types';
  8. import { stripCommentsForRegex } from '../strip-comments';
  9. export const djangoResolver: FrameworkResolver = {
  10. name: 'django',
  11. languages: ['python'],
  12. detect(context) {
  13. const requirements = context.readFile('requirements.txt');
  14. if (requirements && requirements.toLowerCase().includes('django')) return true;
  15. const setup = context.readFile('setup.py');
  16. if (setup && setup.toLowerCase().includes('django')) return true;
  17. const pyproject = context.readFile('pyproject.toml');
  18. if (pyproject && pyproject.toLowerCase().includes('django')) return true;
  19. return context.fileExists('manage.py');
  20. },
  21. resolve(ref, context) {
  22. if (ref.referenceName.endsWith('Model') || /^[A-Z][a-z]+$/.test(ref.referenceName)) {
  23. const result = resolveByNameAndKind(ref.referenceName, CLASS_KINDS, MODEL_DIRS, context);
  24. if (result) return { original: ref, targetNodeId: result, confidence: 0.8, resolvedBy: 'framework' };
  25. }
  26. if (ref.referenceName.endsWith('View') || ref.referenceName.endsWith('ViewSet')) {
  27. const result = resolveByNameAndKind(ref.referenceName, VIEW_KINDS, VIEW_DIRS, context);
  28. if (result) return { original: ref, targetNodeId: result, confidence: 0.8, resolvedBy: 'framework' };
  29. }
  30. if (ref.referenceName.endsWith('Form')) {
  31. const result = resolveByNameAndKind(ref.referenceName, CLASS_KINDS, FORM_DIRS, context);
  32. if (result) return { original: ref, targetNodeId: result, confidence: 0.8, resolvedBy: 'framework' };
  33. }
  34. // ORM dynamic dispatch: QuerySet._fetch_all (and siblings) call
  35. // `self._iterable_class(self)` — a runtime dispatch to the iterable class
  36. // (default ModelIterable) whose __iter__ runs the SQL compiler. Static
  37. // parsing can't resolve an attribute-as-callable, so it leaves an unresolved
  38. // `_iterable_class` ref and a hole in the QuerySet→compiler chain. Bridge it
  39. // to ModelIterable.__iter__ so the flow actually exists in the graph.
  40. if (ref.referenceName === '_iterable_class') {
  41. const target = resolveModelIterableIter(context);
  42. if (target) return { original: ref, targetNodeId: target, confidence: 0.7, resolvedBy: 'framework' };
  43. }
  44. return null;
  45. },
  46. // Let the ORM dynamic-dispatch ref reach resolve() despite no symbol being
  47. // named `_iterable_class` (it's a QuerySet attribute, not a declared method).
  48. claimsReference(name) {
  49. return name === '_iterable_class';
  50. },
  51. extract(filePath, content) {
  52. if (!filePath.endsWith('.py')) return { nodes: [], references: [] };
  53. const nodes: Node[] = [];
  54. const references: UnresolvedRef[] = [];
  55. const now = Date.now();
  56. const safe = stripCommentsForRegex(content, 'python');
  57. // path('url', handler, name=...) / re_path(r'...', handler) / url(r'...', handler)
  58. // Capture groups: 1=function name, 2=url string, 3=handler expr
  59. // Handler expr may contain one balanced () pair (e.g. View.as_view(), include('x.y'))
  60. const routeRegex = /\b(path|re_path|url)\s*\(\s*r?['"]([^'"]+)['"]\s*,\s*([\w.]+(?:\s*\([^)]*\))?)/g;
  61. let match: RegExpExecArray | null;
  62. while ((match = routeRegex.exec(safe)) !== null) {
  63. const [, _fn, urlPath, handlerExpr] = match;
  64. const line = safe.slice(0, match.index).split('\n').length;
  65. const routeNode: Node = {
  66. id: `route:${filePath}:${line}:${urlPath}`,
  67. kind: 'route',
  68. name: urlPath!,
  69. qualifiedName: `${filePath}::route:${urlPath}`,
  70. filePath,
  71. startLine: line,
  72. endLine: line,
  73. startColumn: 0,
  74. endColumn: match[0].length,
  75. language: 'python',
  76. updatedAt: now,
  77. };
  78. nodes.push(routeNode);
  79. const handler = handlerExpr!.trim();
  80. const target = resolveHandlerName(handler);
  81. if (target) {
  82. references.push({
  83. fromNodeId: routeNode.id,
  84. referenceName: target.name,
  85. referenceKind: target.kind,
  86. line,
  87. column: 0,
  88. filePath,
  89. language: 'python',
  90. });
  91. }
  92. }
  93. // DRF router registration: `router.register(r'articles', ArticleViewSet)` →
  94. // route → the ViewSet class (the core CRUD endpoints, which path()/url() miss).
  95. // The STRING first arg separates this from `admin.site.register(Model, Admin)`
  96. // (whose first arg is a model class, not a string); the View/ViewSet suffix on
  97. // the 2nd arg keeps it to DRF viewsets.
  98. const routerRegex = /\.register\s*\(\s*r?['"]([^'"]+)['"]\s*,\s*([\w.]+)/g;
  99. while ((match = routerRegex.exec(safe)) !== null) {
  100. const prefix = match[1]!.replace(/^\^|\/?\$$/g, '');
  101. const viewset = match[2]!.split('.').pop()!;
  102. if (!/View(Set)?$/.test(viewset)) continue;
  103. const line = safe.slice(0, match.index).split('\n').length;
  104. const routeNode: Node = {
  105. id: `route:${filePath}:${line}:VIEWSET:${prefix}`,
  106. kind: 'route',
  107. name: `VIEWSET /${prefix}`,
  108. qualifiedName: `${filePath}::route:${prefix}`,
  109. filePath, startLine: line, endLine: line, startColumn: 0, endColumn: match[0].length,
  110. language: 'python', updatedAt: now,
  111. };
  112. nodes.push(routeNode);
  113. references.push({
  114. fromNodeId: routeNode.id,
  115. referenceName: viewset,
  116. referenceKind: 'references',
  117. line, column: 0, filePath, language: 'python',
  118. });
  119. }
  120. return { nodes, references };
  121. },
  122. };
  123. /**
  124. * Find ModelIterable.__iter__ — the default iterable QuerySet invokes via
  125. * `self._iterable_class(self)`. Its __iter__ statically calls the SQL compiler,
  126. * so linking the dynamic dispatch here closes the QuerySet→SQL call chain.
  127. * (Over-approximates to the default iterable; .values()/.values_list() swap in
  128. * other BaseIterable subclasses, but ModelIterable is the canonical path.)
  129. */
  130. function resolveModelIterableIter(context: ResolutionContext): string | null {
  131. const cls = context.getNodesByName('ModelIterable').find((n) => n.kind === 'class');
  132. if (!cls) return null;
  133. const iter = context.getNodesByName('__iter__').find(
  134. (n) => n.filePath === cls.filePath && n.startLine >= cls.startLine && n.startLine <= cls.endLine
  135. );
  136. return iter ? iter.id : null;
  137. }
  138. /**
  139. * Parse a Django URL handler expression and return the symbol/module to link.
  140. * Returns null for shapes we can't confidently link (e.g. lambdas).
  141. */
  142. function resolveHandlerName(expr: string): { name: string; kind: 'references' | 'imports' } | null {
  143. // include('module.path')
  144. const includeMatch = expr.match(/^include\s*\(\s*['"]([^'"]+)['"]/);
  145. if (includeMatch) return { name: includeMatch[1]!, kind: 'imports' };
  146. // Strip trailing .as_view(...) or .as_view()
  147. let head = expr.replace(/\.as_view\s*\([^)]*\)\s*$/, '');
  148. // Drop any other trailing method call
  149. head = head.replace(/\.\w+\s*\([^)]*\)\s*$/, '');
  150. const dotted = head.split('.').filter(Boolean);
  151. if (dotted.length === 0) return null;
  152. const last = dotted[dotted.length - 1]!;
  153. if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(last)) return null;
  154. return { name: last, kind: 'references' };
  155. }
  156. export const flaskResolver: FrameworkResolver = {
  157. name: 'flask',
  158. languages: ['python'],
  159. detect(context) {
  160. for (const f of ['requirements.txt', 'pyproject.toml', 'Pipfile', 'setup.py']) {
  161. const c = context.readFile(f);
  162. if (c && /\bflask\b/i.test(c)) return true;
  163. }
  164. // Any app entrypoint (root OR subdir, e.g. conduit/app.py) that imports flask
  165. // and instantiates Flask(...) — covers Flask(__name__), Flask(__name__.split…),
  166. // and the app-factory pattern. Bounded to entrypoint-named files.
  167. const entrypoints = context
  168. .getAllFiles()
  169. .filter((f) => /(?:^|\/)(app|application|main|wsgi|__init__)\.py$/.test(f))
  170. .slice(0, 50);
  171. for (const f of entrypoints) {
  172. const c = context.readFile(f);
  173. if (c && /\bFlask\s*\(/.test(c) && /\bimport\s+flask\b|\bfrom\s+flask\b/.test(c)) return true;
  174. }
  175. return false;
  176. },
  177. resolve(ref, context) {
  178. if (ref.referenceName.endsWith('_bp') || ref.referenceName.endsWith('_blueprint')) {
  179. const result = resolveByNameAndKind(ref.referenceName, VARIABLE_KINDS, [], context);
  180. if (result) return { original: ref, targetNodeId: result, confidence: 0.8, resolvedBy: 'framework' };
  181. }
  182. return null;
  183. },
  184. extract(filePath, content) {
  185. if (!filePath.endsWith('.py')) return { nodes: [], references: [] };
  186. const safe = stripCommentsForRegex(content, 'python');
  187. const decorator = extractDecoratorRoutes(filePath, safe, {
  188. // Flask: @x.route('/path', methods=[...] | (...)) — the handler is the next
  189. // `def`, allowing intervening decorators (@login_required) and stacked
  190. // @x.route() lines. methods may be a list OR a tuple (methods=('GET',)).
  191. decoratorRegex: /@(\w+)\.route\s*\(\s*['"]([^'"]*)['"](?:\s*,\s*methods\s*=\s*[[(]([^\])]+)[\])])?\s*\)/g,
  192. defaultMethod: 'GET',
  193. methodFromGroup: 3,
  194. pathGroup: 2,
  195. findHandler: true,
  196. language: 'python',
  197. });
  198. const restful = extractFlaskRestful(filePath, safe);
  199. return {
  200. nodes: [...decorator.nodes, ...restful.nodes],
  201. references: [...decorator.references, ...restful.references],
  202. };
  203. },
  204. };
  205. export const fastapiResolver: FrameworkResolver = {
  206. name: 'fastapi',
  207. languages: ['python'],
  208. detect(context) {
  209. const requirements = context.readFile('requirements.txt');
  210. if (requirements && /\bfastapi\b/i.test(requirements)) return true;
  211. const pyproject = context.readFile('pyproject.toml');
  212. if (pyproject && /\bfastapi\b/i.test(pyproject)) return true;
  213. for (const file of ['app.py', 'main.py', 'api.py']) {
  214. const content = context.readFile(file);
  215. if (content && content.includes('FastAPI(')) return true;
  216. }
  217. return false;
  218. },
  219. resolve(ref, context) {
  220. if (ref.referenceName.endsWith('_router') || ref.referenceName === 'router') {
  221. const result = resolveByNameAndKind(ref.referenceName, VARIABLE_KINDS, ROUTER_DIRS, context);
  222. if (result) return { original: ref, targetNodeId: result, confidence: 0.8, resolvedBy: 'framework' };
  223. }
  224. if (ref.referenceName.startsWith('get_') || ref.referenceName.startsWith('Depends')) {
  225. const result = resolveByNameAndKind(ref.referenceName, FUNCTION_KINDS, DEP_DIRS, context);
  226. if (result) return { original: ref, targetNodeId: result, confidence: 0.75, resolvedBy: 'framework' };
  227. }
  228. return null;
  229. },
  230. extract(filePath, content) {
  231. if (!filePath.endsWith('.py')) return { nodes: [], references: [] };
  232. return extractDecoratorRoutes(filePath, stripCommentsForRegex(content, 'python'), {
  233. // FastAPI: @x.METHOD('/path') -> handler on the next def line. Path may be
  234. // empty ("") for routes mounted at the router/prefix root.
  235. decoratorRegex: /@(\w+)\.(get|post|put|patch|delete|options|head)\s*\(\s*['"]([^'"]*)['"]/g,
  236. defaultMethod: '',
  237. methodGroup: 2,
  238. pathGroup: 3,
  239. findHandler: true,
  240. language: 'python',
  241. });
  242. },
  243. };
  244. interface DecoratorRouteOpts {
  245. decoratorRegex: RegExp;
  246. defaultMethod: string;
  247. methodGroup?: number;
  248. methodFromGroup?: number; // methods=[...] list
  249. pathGroup: number;
  250. handlerGroup?: number;
  251. findHandler?: boolean;
  252. language: 'python';
  253. }
  254. function extractDecoratorRoutes(filePath: string, content: string, opts: DecoratorRouteOpts): FrameworkExtractionResult {
  255. const nodes: Node[] = [];
  256. const references: UnresolvedRef[] = [];
  257. const now = Date.now();
  258. let match: RegExpExecArray | null;
  259. while ((match = opts.decoratorRegex.exec(content)) !== null) {
  260. const routePath = match[opts.pathGroup];
  261. let method = opts.defaultMethod;
  262. if (opts.methodGroup && match[opts.methodGroup]) {
  263. method = match[opts.methodGroup]!.toUpperCase();
  264. } else if (opts.methodFromGroup && match[opts.methodFromGroup]) {
  265. const m = match[opts.methodFromGroup]!.match(/['"]([A-Z]+)['"]/i);
  266. if (m) method = m[1]!.toUpperCase();
  267. }
  268. const line = content.slice(0, match.index).split('\n').length;
  269. const name = method ? `${method} ${routePath || '/'}` : (routePath || '/');
  270. const routeNode: Node = {
  271. id: `route:${filePath}:${line}:${method}:${routePath}`,
  272. kind: 'route',
  273. name,
  274. qualifiedName: `${filePath}::${method}:${routePath}`,
  275. filePath,
  276. startLine: line,
  277. endLine: line,
  278. startColumn: 0,
  279. endColumn: match[0].length,
  280. language: opts.language,
  281. updatedAt: now,
  282. };
  283. nodes.push(routeNode);
  284. let handlerName: string | undefined;
  285. if (opts.handlerGroup && match[opts.handlerGroup]) {
  286. handlerName = match[opts.handlerGroup];
  287. } else if (opts.findHandler) {
  288. const tail = content.slice(match.index + match[0].length);
  289. const defMatch = tail.match(/\n\s*(?:async\s+)?def\s+(\w+)/);
  290. if (defMatch) handlerName = defMatch[1];
  291. }
  292. if (handlerName) {
  293. references.push({
  294. fromNodeId: routeNode.id,
  295. referenceName: handlerName,
  296. referenceKind: 'references',
  297. line,
  298. column: 0,
  299. filePath,
  300. language: 'python',
  301. });
  302. }
  303. }
  304. return { nodes, references };
  305. }
  306. /**
  307. * Flask-RESTful: `api.add_resource(ResourceClass, '/path'[, '/path2'])`
  308. * (and variants like redash's `add_org_resource`). The ResourceClass holds the
  309. * HTTP-verb methods (get/post/…), so the route references the class — its verb
  310. * methods resolve as the handlers via the class. Method is ANY (the class
  311. * decides which verbs it serves).
  312. */
  313. function extractFlaskRestful(filePath: string, safe: string): FrameworkExtractionResult {
  314. const nodes: Node[] = [];
  315. const references: UnresolvedRef[] = [];
  316. const now = Date.now();
  317. const re = /\.add\w*[Rr]esource\s*\(\s*(\w+)\s*,\s*((?:['"][^'"]+['"]\s*,?\s*)+)/g;
  318. let m: RegExpExecArray | null;
  319. while ((m = re.exec(safe)) !== null) {
  320. const className = m[1]!;
  321. const paths = (m[2]!.match(/['"]([^'"]+)['"]/g) || []).map((s) => s.slice(1, -1));
  322. const line = safe.slice(0, m.index).split('\n').length;
  323. for (const routePath of paths) {
  324. const routeNode: Node = {
  325. id: `route:${filePath}:${line}:ANY:${routePath}`,
  326. kind: 'route',
  327. name: `ANY ${routePath}`,
  328. qualifiedName: `${filePath}::ANY:${routePath}`,
  329. filePath,
  330. startLine: line,
  331. endLine: line,
  332. startColumn: 0,
  333. endColumn: 0,
  334. language: 'python',
  335. updatedAt: now,
  336. };
  337. nodes.push(routeNode);
  338. references.push({
  339. fromNodeId: routeNode.id,
  340. referenceName: className,
  341. referenceKind: 'references',
  342. line,
  343. column: 0,
  344. filePath,
  345. language: 'python',
  346. });
  347. }
  348. }
  349. return { nodes, references };
  350. }
  351. // Directory patterns
  352. const MODEL_DIRS = ['models', 'app/models', 'src/models'];
  353. const VIEW_DIRS = ['views', 'app/views', 'src/views', 'api/views'];
  354. const FORM_DIRS = ['forms', 'app/forms', 'src/forms'];
  355. const ROUTER_DIRS = ['/routers/', '/api/', '/routes/', '/endpoints/'];
  356. const DEP_DIRS = ['/dependencies/', '/deps/', '/core/'];
  357. const CLASS_KINDS = new Set(['class']);
  358. const VIEW_KINDS = new Set(['class', 'function']);
  359. const VARIABLE_KINDS = new Set(['variable']);
  360. const FUNCTION_KINDS = new Set(['function']);
  361. /**
  362. * Resolve a symbol by name using indexed queries instead of scanning all files.
  363. */
  364. function resolveByNameAndKind(
  365. name: string,
  366. kinds: Set<string>,
  367. preferredDirPatterns: string[],
  368. context: ResolutionContext,
  369. ): string | null {
  370. const candidates = context.getNodesByName(name);
  371. if (candidates.length === 0) return null;
  372. const kindFiltered = candidates.filter((n) => kinds.has(n.kind));
  373. if (kindFiltered.length === 0) return null;
  374. // Prefer candidates in framework-conventional directories
  375. if (preferredDirPatterns.length > 0) {
  376. const preferred = kindFiltered.filter((n) =>
  377. preferredDirPatterns.some((d) => n.filePath.includes(d))
  378. );
  379. if (preferred.length > 0) return preferred[0]!.id;
  380. }
  381. // Fall back to any match
  382. return kindFiltered[0]!.id;
  383. }