Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion resource/example-rule-config/rule_config_python.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[
{
"checkerIds": ["taint_flow_python_input", "taint_flow_python_input_inner", "taint_flow_python_django_input"],
"checkerIds": ["taint_flow_python_input", "taint_flow_python_input_inner", "taint_flow_python_django_input", "taint_flow_python_tornado_input" ],
"sources": {
"FuncCallReturnValueTaintSource": [
{
Expand Down
1 change: 1 addition & 0 deletions src/checker/common/rules-basic-handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ export function matchField(node: any, marray: string[], i: number): boolean {
switch (node.type) {
case 'MemberAccess': {
if (!matchPrefix(el, node.property.name)) return false
if (i === 0) return true

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

This change, which allows suffix matching on member access chains, appears to break an existing test case in test/rules-basic-handler/test-match-field.ts. The test fsig "c" 不匹配 x().c expects matches(member(call(id('x')), 'c'), 'c') to be false, but this change will cause it to return true. This indicates a potential regression or that the test suite has not been updated to reflect this intentional change in matching logic. Please update the tests to align with this new behavior or reconsider the change if it has unintended consequences.

return matchField(node.object, marray, i - 1)
}
case 'Identifier': {
Expand Down
3 changes: 2 additions & 1 deletion src/checker/taint/python/python-default-taint-checker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,8 @@ class PythonDefaultTaintChecker extends PythonTaintAbstractChecker {

for (const fileEntryPoint of fileEntryPoints) {
const fullFilePath = `${Config.maindir}${fileEntryPoint.filePath}`.replace('//', '/')
const fileUuid = fileManager[fullFilePath]
const fileEntry = fileManager[fullFilePath]
const fileUuid = typeof fileEntry === 'string' ? fileEntry : fileEntry?.uuid
const file = analyzer.symbolTable.get(fileUuid)
if (file?.ast?.node?.type === 'CompileUnit') {
const entryPoint = new EntryPoint(Constant.ENGIN_START_FILE_BEGIN)
Expand Down
3 changes: 2 additions & 1 deletion src/checker/taint/python/python-taint-checker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,8 @@ class PythonTaintChecker extends PythonTaintAbstractChecker {

for (const fileEntryPoint of fileEntryPoints) {
const fullFilePath = `${Config.maindir}${fileEntryPoint.filePath}`.replace('//', '/')
const fileUuid = fileManager[fullFilePath]
const fileEntry = fileManager[fullFilePath]
const fileUuid = typeof fileEntry === 'string' ? fileEntry : fileEntry?.uuid
const file = analyzer.symbolTable.get(fileUuid)
if (file?.ast?.node?.type === 'CompileUnit') {
const entryPoint = new EntryPoint(Constant.ENGIN_START_FILE_BEGIN)
Expand Down
20 changes: 17 additions & 3 deletions src/engine/analyzer/common/analyzer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3499,7 +3499,6 @@ class Analyzer extends BaseAnalyzer {
) {
const argvalues = getLegacyArgValues(callInfo)
if (logger.isTraceEnabled()) logger.trace(`\nprocessCall: function: ${this.formatScope(fdecl?.id?.name)}`)

// 进入函数调用时重置 inRange,避免 for-range body 中调用函数时嵌套 for-range 被错误抑制
const savedInRange = this.inRange
this.inRange = false
Expand Down Expand Up @@ -3659,6 +3658,21 @@ class Analyzer extends BaseAnalyzer {
}
})

const pythonReceiverFallback = callInfo?.callArgs?.receiver || fclos?.getThisObj?.()
if (
Config.language === 'python' &&
pythonReceiverFallback &&
fscope?.value &&
!Object.prototype.hasOwnProperty.call(fscope.value, 'self')
) {
this.saveVarInCurrentScope(
fscope,
{ type: 'Identifier', name: 'self', loc: fdecl?.loc },
pythonReceiverFallback,
new_state
)
}

let objectVal
if (node?.callee?.type === 'MemberAccess') {
// objectVal = this.processInstruction(scope, node.callee.object, state)
Expand Down Expand Up @@ -3935,10 +3949,10 @@ class Analyzer extends BaseAnalyzer {
const oldThisFClos = this.thisFClos
this.thisFClos = obj
ctorClos._this = obj
// __new__ 的第一个参数是 cls,需要设置 receiver 使 bindReceiverParam 正确跳过 cls
let ctorCallInfo = callInfo
if (ctorClos.__isNewMethod && callInfo?.callArgs) {
if (callInfo?.callArgs) {
ctorCallInfo = {
...callInfo,
callArgs: {
...callInfo.callArgs,
receiver: obj,
Expand Down
9 changes: 9 additions & 0 deletions src/engine/analyzer/common/source-line.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ function addSrcLineInfo(val: any, node: any, sourcefile: any, tag: any, affected
const newVal = buildNewCopiedWithTag(globalAnalyzer, val, sig)
// @ts-ignore
newVal.value = val.value
if ((val as any)?._this) {
;(newVal as any)._this = (val as any)._this
}
for (const eachVal of newVal) {
const start_line = node.loc.start?.line
const end_line = node.loc.end?.line
Expand Down Expand Up @@ -124,6 +127,9 @@ function addSrcLineInfo(val: any, node: any, sourcefile: any, tag: any, affected
} else {
newVal = buildNewCopiedWithTag(globalAnalyzer, val, sig)
newVal.value = val.value
if (val?._this) {
newVal._this = val._this
}
}
// CRITICAL: If traceItem exists and val has tags, add it to val FIRST
// This handles the case where val itself has tags (first call where val === res)
Expand All @@ -136,6 +142,9 @@ function addSrcLineInfo(val: any, node: any, sourcefile: any, tag: any, affected
}
const newVal = buildNewCopiedWithTag(globalAnalyzer, val, sig)
newVal.value = val.value
if (val?._this) {
newVal._this = val._this
}

// Pass traceItem to processFieldAndArguments for delayed addition
processFieldAndArguments(newVal, newVal, 0, [], node, traceItem)
Expand Down
144 changes: 136 additions & 8 deletions src/engine/analyzer/python/common/python-analyzer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ const CheckerManager = require('../../common/checker-manager')
const BasicRuleHandler = require('../../../../checker/common/rules-basic-handler')
const Parser = require('../../../parser/parser')
const {
ValueUtil: { Scoped, PrimitiveValue, UndefinedValue, UnionValue, SymbolValue, VoidValue },
ValueUtil: { ObjectValue, Scoped, PrimitiveValue, UndefinedValue, UnionValue, SymbolValue, VoidValue },
} = require('../../../util/value-util')
const logger: import('../../../../util/logger').Logger = require('../../../../util/logger')(__filename)
const Config = require('../../../../config')
Expand Down Expand Up @@ -178,8 +178,10 @@ class PythonAnalyzer extends Analyzer {

const fileFullPath = assembleFullPath(entryPoint.filePath, Config.maindir)
const sourceNameList = getSourceNameList()
const fileEntry = this.topScope.context.files[fileFullPath]
const fileUuid = typeof fileEntry === 'string' ? fileEntry : fileEntry?.uuid
this.refreshCtx(this.topScope.context.modules.members.get(fileFullPath)?.value, sourceNameList)
this.refreshCtx(this.symbolTable.get(this.topScope.context.files[fileFullPath])?.value, sourceNameList)
this.refreshCtx(this.symbolTable.get(fileUuid)?.value, sourceNameList)
this.refreshCtx(this.topScope.context.packages.members.get(fileFullPath), sourceNameList)

const { filePath } = entryPoint
Expand Down Expand Up @@ -219,8 +221,10 @@ class PythonAnalyzer extends Analyzer {
)
const fileFullPath = assembleFullPath(entryPoint.filePath, Config.maindir)
const sourceNameList = getSourceNameList()
const fileEntry = this.topScope.context.files[fileFullPath]
const fileUuid = typeof fileEntry === 'string' ? fileEntry : fileEntry?.uuid
this.refreshCtx(this.topScope.context.modules.members.get(fileFullPath)?.value, sourceNameList)
this.refreshCtx(this.symbolTable.get(this.topScope.context.files[fileFullPath])?.value, sourceNameList)
this.refreshCtx(this.symbolTable.get(fileUuid)?.value, sourceNameList)
this.refreshCtx(this.topScope.context.packages.members.get(fileFullPath), sourceNameList)

this.checkerManager.checkAtSymbolInterpretOfEntryPointBefore(this, null, null, null, null)
Expand Down Expand Up @@ -320,6 +324,117 @@ class PythonAnalyzer extends Analyzer {
return result
}

isPythonTypeFactoryInstantiation(node: CallExpression): boolean {
const innerCall: any = node?.callee
const innerCallee = innerCall?.callee
return (
innerCall?.type === 'CallExpression' &&
innerCallee?.type === 'Identifier' &&
innerCallee?.name === 'type'
)
}

buildPythonTypeFactoryObject(scope: Scope, node: CallExpression): Value {
const innerCall: any = node.callee
const nameArg = innerCall?.arguments?.[0]
const className =
nameArg?.value ??
nameArg?.raw_value ??
nameArg?.raw ??
nameArg?.name ??
`type_factory_${node.loc?.start?.line ?? 'unknown'}_${node.loc?.start?.column ?? 'unknown'}`
const obj = new ObjectValue(scope.qid, {
sid: String(className),
qid: `${scope.qid}.${String(className)}`,
parent: scope,
ast: node,
}) as Value
;(obj as any)._this = obj
return obj
}

recoverFunctionReceiverFromQid(fclos: any): void {
if (!fclos || fclos.vtype !== 'fclos') return
const currentThis = fclos._this || fclos.getThisObj?.()
if (currentThis && currentThis !== this.topScope && currentThis?.sid !== '<global>') return
const qid = typeof fclos.qid === 'string' ? fclos.qid : ''
const lastDot = qid.lastIndexOf('.')
if (lastDot <= 0) return
const receiver = this.resolveByQidFromSymbolTable(qid.substring(0, lastDot))
if (receiver) {
fclos._this = receiver
}
}

recoverMemberCallClosure(
scope: Scope,
node: CallExpression,
state: State,
fclos: any,
callInfo: CallInfo
): any {
if (node?.callee?.type !== 'MemberAccess') return null
const member: any = node.callee
const prop = member.property
if (!prop) return null

const receiver = this.processInstruction(scope, member.object, state)
const candidates = [
receiver,
this.resolveByQidFromSymbolTable(receiver?.qid),
this.resolveByQidFromSymbolTable(receiver?.sid),
].filter(Boolean)

for (const recv of candidates) {
const method = this.getMemberValue(recv, prop, state)
if (method?.vtype === 'fclos') {
method._this = recv
if (callInfo?.callArgs) callInfo.callArgs.receiver = recv
return method
}
}

if (fclos?.vtype === 'fclos' && fclos._this && fclos._this !== this.topScope) {
if (callInfo?.callArgs) callInfo.callArgs.receiver = fclos._this
return fclos
}

return null
}

resolveByQidFromSymbolTable(qid: any): any {
if (!qid || typeof qid !== 'string') return null
const symbolMap = this.symbolTable?.getMap?.()
if (!symbolMap) return null

for (const val of symbolMap.values()) {
if (val?.qid === qid || val?.sid === qid) return val
}

const parts = qid.split('.').filter(Boolean)
for (let i = parts.length - 1; i > 0; i--) {
const baseQid = parts.slice(0, i).join('.')
let current: any = null
for (const val of symbolMap.values()) {
if (val?.qid === baseQid || val?.sid === baseQid) {
current = val
break
}
}
if (!current) continue
for (const fieldName of parts.slice(i)) {
current =
current?.value?.[fieldName] ||
current?.members?.get?.(fieldName) ||
current?.getFieldValue?.(fieldName, false)
if (!current) break
}
if (current) return current
}

return null
}
Comment on lines +405 to +436

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The function resolveByQidFromSymbolTable contains a nested loop structure that iterates over all values in symbolMap within another loop. This can lead to significant performance degradation, especially when the symbol table is large, as the complexity would be O(N*M) where N is the number of parts in the qid and M is the number of symbols. This function is also called multiple times in recoverMemberCallClosure, potentially amplifying the performance impact. Consider optimizing this by using a more direct lookup method if available, or by creating an index for qid/sid to avoid iterating through all symbols repeatedly.


/**
*
* @param scope
Expand All @@ -333,8 +448,13 @@ class PythonAnalyzer extends Analyzer {
einfo: state.einfo,
})

const fclos = this.processInstruction(scope, node.callee, state)
if (this.isPythonTypeFactoryInstantiation(node)) {
return this.buildPythonTypeFactoryObject(scope, node)
}

let fclos = this.processInstruction(scope, node.callee, state)
if (!fclos) return new UndefinedValue()
this.recoverFunctionReceiverFromQid(fclos)

const argvalues: any[] = []
// 参数按原始顺序处理,由 buildPythonCallArgs 标记 kind,bindCallArgs 负责绑定
Expand All @@ -349,6 +469,10 @@ class PythonAnalyzer extends Analyzer {

// 构建结构化 callInfo,携带 keyword/spread/kwspread 信息
const callInfo: CallInfo = { callArgs: this.buildPythonCallArgs(collectedArgs, argvalues, fclos, node) }
const recoveredFclos = this.recoverMemberCallClosure(scope, node, state, fclos, callInfo)
if (recoveredFclos) {
fclos = recoveredFclos
}

if (argvalues && this.checkerManager) {
this.checkerManager.checkAtFunctionCallBefore(this, scope, node, state, {
Expand Down Expand Up @@ -481,10 +605,14 @@ class PythonAnalyzer extends Analyzer {
argvalues: Value[],
callInfo: CallInfo
): Value {
// 有 __init__ 或 __new__:走完整 buildNewObject(执行构造函数)
// 不含 fclos.ast?.cdef 条件——无 __init__ 的类走 processLibArgToRet 避免 OOM
if (fclos.members?.has('_CTOR_') || fclos.value?.['__new__']) {
const res = this.buildNewObject(fclos.ast.cdef, fclos, state, node, scope, callInfo)
const classAst: any = fclos?.ast?.cdef || fclos?.ast?.fdef || fclos?.ast
const isPythonClass = fclos?.vtype === 'class' && classAst?.type === 'ClassDefinition'

// Python classes without explicit __init__ still need an instance object.
// Falling back to processLibArgToRet() drops class members/methods and breaks
// chained instance-method resolution (for example b = B(); b.predict(...)).
if (isPythonClass || fclos.members?.has('_CTOR_') || fclos.value?.['__new__']) {
const res = this.buildNewObject(classAst, fclos, state, node, scope, callInfo)
if (res && this.checkerManager?.checkAtFunctionCallAfter) {
this.checkerManager.checkAtFunctionCallAfter(this, scope, node, state, {
callInfo,
Expand Down
Loading