From b39166973bd6a669720ef0d72ac33af22315a529 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Sat, 9 May 2026 03:20:18 -0700 Subject: [PATCH] fix(python): improve taint receiver resolution --- .../rule_config_python.json | 2 +- src/checker/common/rules-basic-handler.ts | 1 + .../python/python-default-taint-checker.ts | 3 +- .../taint/python/python-taint-checker.ts | 3 +- src/engine/analyzer/common/analyzer.ts | 20 ++- src/engine/analyzer/common/source-line.ts | 9 ++ .../analyzer/python/common/python-analyzer.ts | 144 +++++++++++++++++- 7 files changed, 168 insertions(+), 14 deletions(-) diff --git a/resource/example-rule-config/rule_config_python.json b/resource/example-rule-config/rule_config_python.json index 93de362d..4ae24f02 100644 --- a/resource/example-rule-config/rule_config_python.json +++ b/resource/example-rule-config/rule_config_python.json @@ -1,6 +1,6 @@ [ { - "checkerIds": ["taint_flow_python_input", "taint_flow_python_input_inner", "taint_flow_python_django_input"], + "checkerIds": ["taint_flow_python_input", "taint_flow_python_input_inner", "taint_flow_python_django_input", "taint_flow_python_tornado_input" ], "sources": { "FuncCallReturnValueTaintSource": [ { diff --git a/src/checker/common/rules-basic-handler.ts b/src/checker/common/rules-basic-handler.ts index a13b1474..c0640ed5 100644 --- a/src/checker/common/rules-basic-handler.ts +++ b/src/checker/common/rules-basic-handler.ts @@ -283,6 +283,7 @@ export function matchField(node: any, marray: string[], i: number): boolean { switch (node.type) { case 'MemberAccess': { if (!matchPrefix(el, node.property.name)) return false + if (i === 0) return true return matchField(node.object, marray, i - 1) } case 'Identifier': { diff --git a/src/checker/taint/python/python-default-taint-checker.ts b/src/checker/taint/python/python-default-taint-checker.ts index 6ff8ca84..96ae42a7 100644 --- a/src/checker/taint/python/python-default-taint-checker.ts +++ b/src/checker/taint/python/python-default-taint-checker.ts @@ -136,7 +136,8 @@ class PythonDefaultTaintChecker extends PythonTaintAbstractChecker { for (const fileEntryPoint of fileEntryPoints) { const fullFilePath = `${Config.maindir}${fileEntryPoint.filePath}`.replace('//', '/') - const fileUuid = fileManager[fullFilePath] + const fileEntry = fileManager[fullFilePath] + const fileUuid = typeof fileEntry === 'string' ? fileEntry : fileEntry?.uuid const file = analyzer.symbolTable.get(fileUuid) if (file?.ast?.node?.type === 'CompileUnit') { const entryPoint = new EntryPoint(Constant.ENGIN_START_FILE_BEGIN) diff --git a/src/checker/taint/python/python-taint-checker.ts b/src/checker/taint/python/python-taint-checker.ts index c95ffeac..79d4bf93 100644 --- a/src/checker/taint/python/python-taint-checker.ts +++ b/src/checker/taint/python/python-taint-checker.ts @@ -183,7 +183,8 @@ class PythonTaintChecker extends PythonTaintAbstractChecker { for (const fileEntryPoint of fileEntryPoints) { const fullFilePath = `${Config.maindir}${fileEntryPoint.filePath}`.replace('//', '/') - const fileUuid = fileManager[fullFilePath] + const fileEntry = fileManager[fullFilePath] + const fileUuid = typeof fileEntry === 'string' ? fileEntry : fileEntry?.uuid const file = analyzer.symbolTable.get(fileUuid) if (file?.ast?.node?.type === 'CompileUnit') { const entryPoint = new EntryPoint(Constant.ENGIN_START_FILE_BEGIN) diff --git a/src/engine/analyzer/common/analyzer.ts b/src/engine/analyzer/common/analyzer.ts index b02bd017..6561e825 100644 --- a/src/engine/analyzer/common/analyzer.ts +++ b/src/engine/analyzer/common/analyzer.ts @@ -3499,7 +3499,6 @@ class Analyzer extends BaseAnalyzer { ) { const argvalues = getLegacyArgValues(callInfo) if (logger.isTraceEnabled()) logger.trace(`\nprocessCall: function: ${this.formatScope(fdecl?.id?.name)}`) - // 进入函数调用时重置 inRange,避免 for-range body 中调用函数时嵌套 for-range 被错误抑制 const savedInRange = this.inRange this.inRange = false @@ -3659,6 +3658,21 @@ class Analyzer extends BaseAnalyzer { } }) + const pythonReceiverFallback = callInfo?.callArgs?.receiver || fclos?.getThisObj?.() + if ( + Config.language === 'python' && + pythonReceiverFallback && + fscope?.value && + !Object.prototype.hasOwnProperty.call(fscope.value, 'self') + ) { + this.saveVarInCurrentScope( + fscope, + { type: 'Identifier', name: 'self', loc: fdecl?.loc }, + pythonReceiverFallback, + new_state + ) + } + let objectVal if (node?.callee?.type === 'MemberAccess') { // objectVal = this.processInstruction(scope, node.callee.object, state) @@ -3935,10 +3949,10 @@ class Analyzer extends BaseAnalyzer { const oldThisFClos = this.thisFClos this.thisFClos = obj ctorClos._this = obj - // __new__ 的第一个参数是 cls,需要设置 receiver 使 bindReceiverParam 正确跳过 cls let ctorCallInfo = callInfo - if (ctorClos.__isNewMethod && callInfo?.callArgs) { + if (callInfo?.callArgs) { ctorCallInfo = { + ...callInfo, callArgs: { ...callInfo.callArgs, receiver: obj, diff --git a/src/engine/analyzer/common/source-line.ts b/src/engine/analyzer/common/source-line.ts index b0273dd0..c9938f50 100644 --- a/src/engine/analyzer/common/source-line.ts +++ b/src/engine/analyzer/common/source-line.ts @@ -95,6 +95,9 @@ function addSrcLineInfo(val: any, node: any, sourcefile: any, tag: any, affected const newVal = buildNewCopiedWithTag(globalAnalyzer, val, sig) // @ts-ignore newVal.value = val.value + if ((val as any)?._this) { + ;(newVal as any)._this = (val as any)._this + } for (const eachVal of newVal) { const start_line = node.loc.start?.line const end_line = node.loc.end?.line @@ -124,6 +127,9 @@ function addSrcLineInfo(val: any, node: any, sourcefile: any, tag: any, affected } else { newVal = buildNewCopiedWithTag(globalAnalyzer, val, sig) newVal.value = val.value + if (val?._this) { + newVal._this = val._this + } } // CRITICAL: If traceItem exists and val has tags, add it to val FIRST // This handles the case where val itself has tags (first call where val === res) @@ -136,6 +142,9 @@ function addSrcLineInfo(val: any, node: any, sourcefile: any, tag: any, affected } const newVal = buildNewCopiedWithTag(globalAnalyzer, val, sig) newVal.value = val.value + if (val?._this) { + newVal._this = val._this + } // Pass traceItem to processFieldAndArguments for delayed addition processFieldAndArguments(newVal, newVal, 0, [], node, traceItem) diff --git a/src/engine/analyzer/python/common/python-analyzer.ts b/src/engine/analyzer/python/common/python-analyzer.ts index 13aa764a..8ec8b816 100644 --- a/src/engine/analyzer/python/common/python-analyzer.ts +++ b/src/engine/analyzer/python/common/python-analyzer.ts @@ -38,7 +38,7 @@ const CheckerManager = require('../../common/checker-manager') const BasicRuleHandler = require('../../../../checker/common/rules-basic-handler') const Parser = require('../../../parser/parser') const { - ValueUtil: { Scoped, PrimitiveValue, UndefinedValue, UnionValue, SymbolValue, VoidValue }, + ValueUtil: { ObjectValue, Scoped, PrimitiveValue, UndefinedValue, UnionValue, SymbolValue, VoidValue }, } = require('../../../util/value-util') const logger: import('../../../../util/logger').Logger = require('../../../../util/logger')(__filename) const Config = require('../../../../config') @@ -178,8 +178,10 @@ class PythonAnalyzer extends Analyzer { const fileFullPath = assembleFullPath(entryPoint.filePath, Config.maindir) const sourceNameList = getSourceNameList() + const fileEntry = this.topScope.context.files[fileFullPath] + const fileUuid = typeof fileEntry === 'string' ? fileEntry : fileEntry?.uuid this.refreshCtx(this.topScope.context.modules.members.get(fileFullPath)?.value, sourceNameList) - this.refreshCtx(this.symbolTable.get(this.topScope.context.files[fileFullPath])?.value, sourceNameList) + this.refreshCtx(this.symbolTable.get(fileUuid)?.value, sourceNameList) this.refreshCtx(this.topScope.context.packages.members.get(fileFullPath), sourceNameList) const { filePath } = entryPoint @@ -219,8 +221,10 @@ class PythonAnalyzer extends Analyzer { ) const fileFullPath = assembleFullPath(entryPoint.filePath, Config.maindir) const sourceNameList = getSourceNameList() + const fileEntry = this.topScope.context.files[fileFullPath] + const fileUuid = typeof fileEntry === 'string' ? fileEntry : fileEntry?.uuid this.refreshCtx(this.topScope.context.modules.members.get(fileFullPath)?.value, sourceNameList) - this.refreshCtx(this.symbolTable.get(this.topScope.context.files[fileFullPath])?.value, sourceNameList) + this.refreshCtx(this.symbolTable.get(fileUuid)?.value, sourceNameList) this.refreshCtx(this.topScope.context.packages.members.get(fileFullPath), sourceNameList) this.checkerManager.checkAtSymbolInterpretOfEntryPointBefore(this, null, null, null, null) @@ -320,6 +324,117 @@ class PythonAnalyzer extends Analyzer { return result } + isPythonTypeFactoryInstantiation(node: CallExpression): boolean { + const innerCall: any = node?.callee + const innerCallee = innerCall?.callee + return ( + innerCall?.type === 'CallExpression' && + innerCallee?.type === 'Identifier' && + innerCallee?.name === 'type' + ) + } + + buildPythonTypeFactoryObject(scope: Scope, node: CallExpression): Value { + const innerCall: any = node.callee + const nameArg = innerCall?.arguments?.[0] + const className = + nameArg?.value ?? + nameArg?.raw_value ?? + nameArg?.raw ?? + nameArg?.name ?? + `type_factory_${node.loc?.start?.line ?? 'unknown'}_${node.loc?.start?.column ?? 'unknown'}` + const obj = new ObjectValue(scope.qid, { + sid: String(className), + qid: `${scope.qid}.${String(className)}`, + parent: scope, + ast: node, + }) as Value + ;(obj as any)._this = obj + return obj + } + + recoverFunctionReceiverFromQid(fclos: any): void { + if (!fclos || fclos.vtype !== 'fclos') return + const currentThis = fclos._this || fclos.getThisObj?.() + if (currentThis && currentThis !== this.topScope && currentThis?.sid !== '') return + const qid = typeof fclos.qid === 'string' ? fclos.qid : '' + const lastDot = qid.lastIndexOf('.') + if (lastDot <= 0) return + const receiver = this.resolveByQidFromSymbolTable(qid.substring(0, lastDot)) + if (receiver) { + fclos._this = receiver + } + } + + recoverMemberCallClosure( + scope: Scope, + node: CallExpression, + state: State, + fclos: any, + callInfo: CallInfo + ): any { + if (node?.callee?.type !== 'MemberAccess') return null + const member: any = node.callee + const prop = member.property + if (!prop) return null + + const receiver = this.processInstruction(scope, member.object, state) + const candidates = [ + receiver, + this.resolveByQidFromSymbolTable(receiver?.qid), + this.resolveByQidFromSymbolTable(receiver?.sid), + ].filter(Boolean) + + for (const recv of candidates) { + const method = this.getMemberValue(recv, prop, state) + if (method?.vtype === 'fclos') { + method._this = recv + if (callInfo?.callArgs) callInfo.callArgs.receiver = recv + return method + } + } + + if (fclos?.vtype === 'fclos' && fclos._this && fclos._this !== this.topScope) { + if (callInfo?.callArgs) callInfo.callArgs.receiver = fclos._this + return fclos + } + + return null + } + + resolveByQidFromSymbolTable(qid: any): any { + if (!qid || typeof qid !== 'string') return null + const symbolMap = this.symbolTable?.getMap?.() + if (!symbolMap) return null + + for (const val of symbolMap.values()) { + if (val?.qid === qid || val?.sid === qid) return val + } + + const parts = qid.split('.').filter(Boolean) + for (let i = parts.length - 1; i > 0; i--) { + const baseQid = parts.slice(0, i).join('.') + let current: any = null + for (const val of symbolMap.values()) { + if (val?.qid === baseQid || val?.sid === baseQid) { + current = val + break + } + } + if (!current) continue + for (const fieldName of parts.slice(i)) { + current = + current?.value?.[fieldName] || + current?.members?.get?.(fieldName) || + current?.getFieldValue?.(fieldName, false) + if (!current) break + } + if (current) return current + } + + return null + } + /** * * @param scope @@ -333,8 +448,13 @@ class PythonAnalyzer extends Analyzer { einfo: state.einfo, }) - const fclos = this.processInstruction(scope, node.callee, state) + if (this.isPythonTypeFactoryInstantiation(node)) { + return this.buildPythonTypeFactoryObject(scope, node) + } + + let fclos = this.processInstruction(scope, node.callee, state) if (!fclos) return new UndefinedValue() + this.recoverFunctionReceiverFromQid(fclos) const argvalues: any[] = [] // 参数按原始顺序处理,由 buildPythonCallArgs 标记 kind,bindCallArgs 负责绑定 @@ -349,6 +469,10 @@ class PythonAnalyzer extends Analyzer { // 构建结构化 callInfo,携带 keyword/spread/kwspread 信息 const callInfo: CallInfo = { callArgs: this.buildPythonCallArgs(collectedArgs, argvalues, fclos, node) } + const recoveredFclos = this.recoverMemberCallClosure(scope, node, state, fclos, callInfo) + if (recoveredFclos) { + fclos = recoveredFclos + } if (argvalues && this.checkerManager) { this.checkerManager.checkAtFunctionCallBefore(this, scope, node, state, { @@ -481,10 +605,14 @@ class PythonAnalyzer extends Analyzer { argvalues: Value[], callInfo: CallInfo ): Value { - // 有 __init__ 或 __new__:走完整 buildNewObject(执行构造函数) - // 不含 fclos.ast?.cdef 条件——无 __init__ 的类走 processLibArgToRet 避免 OOM - if (fclos.members?.has('_CTOR_') || fclos.value?.['__new__']) { - const res = this.buildNewObject(fclos.ast.cdef, fclos, state, node, scope, callInfo) + const classAst: any = fclos?.ast?.cdef || fclos?.ast?.fdef || fclos?.ast + const isPythonClass = fclos?.vtype === 'class' && classAst?.type === 'ClassDefinition' + + // Python classes without explicit __init__ still need an instance object. + // Falling back to processLibArgToRet() drops class members/methods and breaks + // chained instance-method resolution (for example b = B(); b.predict(...)). + if (isPythonClass || fclos.members?.has('_CTOR_') || fclos.value?.['__new__']) { + const res = this.buildNewObject(classAst, fclos, state, node, scope, callInfo) if (res && this.checkerManager?.checkAtFunctionCallAfter) { this.checkerManager.checkAtFunctionCallAfter(this, scope, node, state, { callInfo,