Skip to content

Commit 32ec328

Browse files
committed
feat(protobuf): ✨ extract protobuf descriptors from APKs
Add functionality to extract Google protobuf FileDescriptorProto definitions from APK files containing DEX files. This enables asynchronous and parallel extraction of protobuf descriptors by analyzing DEX bytecode, supporting efficient reconstruction of proto definitions for further processing. Uses AlphaOmega.Debug for APK/DEX parsing and Google.Protobuf.Reflection for descriptor handling.
1 parent 6f6b341 commit 32ec328

1 file changed

Lines changed: 181 additions & 0 deletions

File tree

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
using AlphaOmega.Debug;
2+
using Google.Protobuf.Reflection;
3+
using System.Runtime.CompilerServices;
4+
using System.Text;
5+
using System.Threading.Channels;
6+
7+
namespace EcoFlow.Mqtt.Api.Protobuf.Extraction;
8+
9+
public static class ProtosReader
10+
{
11+
public static async Task<FileDescriptorSet> GetProtoSetAsync(string apkFileName, CancellationToken cancellationToken = default)
12+
{
13+
var set = new FileDescriptorSet();
14+
15+
await foreach (var fileDescriptorProto in Enumerate(apkFileName, cancellationToken))
16+
set.File.Add(fileDescriptorProto);
17+
18+
return set;
19+
}
20+
21+
public static async IAsyncEnumerable<FileDescriptorProto> Enumerate(string apkFileName, [EnumeratorCancellation] CancellationToken cancellationToken = default)
22+
{
23+
var outputChannel = Channel.CreateBounded<FileDescriptorProto>(new BoundedChannelOptions(100)
24+
{
25+
SingleWriter = false,
26+
SingleReader = true
27+
});
28+
29+
var writingTask = Task.Run(async () =>
30+
{
31+
try
32+
{
33+
var dexFiles = ZipReader.EnumerateFilesRecursively(apkFileName, fileName => fileName.EndsWith(".dex", StringComparison.OrdinalIgnoreCase));
34+
var parallelOptions = new ParallelOptions { CancellationToken = cancellationToken };
35+
36+
await Parallel.ForEachAsync(dexFiles, parallelOptions, async (value, token) =>
37+
{
38+
var (filePath, fileStream) = value;
39+
using var dexFile = new DexFile(new StreamLoader(fileStream));
40+
41+
foreach (var fileDescriptorProto in Enumerate(dexFile))
42+
await outputChannel.Writer.WriteAsync(fileDescriptorProto, token);
43+
});
44+
45+
outputChannel.Writer.Complete();
46+
}
47+
catch (Exception exception)
48+
{
49+
outputChannel.Writer.Complete(exception);
50+
}
51+
}, cancellationToken);
52+
53+
await foreach (var fileDescriptorProto in outputChannel.Reader.ReadAllAsync(cancellationToken))
54+
yield return fileDescriptorProto;
55+
56+
await writingTask;
57+
}
58+
59+
public static IEnumerable<FileDescriptorProto> Enumerate(DexFile dex)
60+
{
61+
foreach (var typeIdRow in dex.TYPE_ID_ITEM)
62+
{
63+
var classDefRow = dex.CLASS_DEF_ITEM.FirstOrDefault(classDefRow => classDefRow.class_idx.TypeDescriptor == typeIdRow.TypeDescriptor);
64+
65+
if (classDefRow == null)
66+
continue;
67+
68+
if (classDefRow.class_data_off is not { } classDataRow)
69+
continue;
70+
71+
if (!classDataRow.static_fields.Any(encodedFieldRow => dex.FIELD_ID_ITEM[encodedFieldRow.field_idx_diff] is { name_idx.data: "descriptor", type_idx.TypeDescriptor: "com.google.protobuf.Descriptors$FileDescriptor;" }))
72+
continue;
73+
74+
foreach (var encodedMethodRow in classDataRow.direct_methods)
75+
{
76+
var methodIdRow = dex.METHOD_ID_ITEM[encodedMethodRow.method_idx_diff];
77+
78+
if (methodIdRow.name_idx.data is not "<clinit>")
79+
continue;
80+
81+
var instructions = encodedMethodRow.code_off.insns;
82+
var registers = new string[byte.MaxValue];
83+
84+
for (int index = 0; index < instructions.Length; index++)
85+
{
86+
var instruction = instructions[index];
87+
var opcode = (byte)instruction;
88+
89+
switch (opcode)
90+
{
91+
case 0x1A: // const-string
92+
var register = (byte)(instruction >> 8);
93+
94+
var stringIndex = instructions[++index];
95+
var stringValue = dex.STRING_DATA_ITEM[stringIndex];
96+
97+
registers[register] = stringValue.data;
98+
break;
99+
case 0x24: // filled-new-array
100+
var format = (byte)(instruction >> 8);
101+
var registerCount = (format & 0xF0) >> 4;
102+
var fifthRegisterIndex = format & 0x0F;
103+
104+
var typeIndex = instructions[++index];
105+
var argumentRegistersEncoded = instructions[++index];
106+
107+
var firstRegisterIndex = argumentRegistersEncoded & 0x0F;
108+
var secondRegisterIndex = (argumentRegistersEncoded >> 4) & 0x0F;
109+
var thirdRegisterIndex = (argumentRegistersEncoded >> 8) & 0x0F;
110+
var fourthRegisterIndex = (argumentRegistersEncoded >> 12) & 0x0F;
111+
112+
var registerIndexes = new int[]
113+
{
114+
firstRegisterIndex,
115+
secondRegisterIndex,
116+
thirdRegisterIndex,
117+
fourthRegisterIndex,
118+
fifthRegisterIndex
119+
};
120+
121+
var stringBuilder = new StringBuilder();
122+
123+
for (int i = 0; i < registerCount; i++)
124+
{
125+
var currentRegisterIndex = registerIndexes[i];
126+
127+
if (currentRegisterIndex < registers.Length)
128+
{
129+
var registerContent = registers[currentRegisterIndex];
130+
131+
if (registerContent is null)
132+
continue;
133+
134+
stringBuilder.Append(registerContent);
135+
}
136+
}
137+
138+
yield return ParseStringBuilder(stringBuilder);
139+
140+
index = instructions.Length;
141+
break;
142+
143+
case 0x25: // filled-new-array/range
144+
var rangeRegisterCount = (byte)(instruction >> 8);
145+
146+
var rangeTypeIndex = instructions[++index];
147+
var startRegisterIndex = instructions[++index];
148+
149+
var rangeStringBuilder = new StringBuilder();
150+
151+
for (int i = 0; i < rangeRegisterCount; i++)
152+
{
153+
var currentRegisterIndex = startRegisterIndex + i;
154+
155+
if (currentRegisterIndex < registers.Length)
156+
{
157+
var registerContent = registers[currentRegisterIndex];
158+
159+
if (registerContent is null)
160+
continue;
161+
162+
rangeStringBuilder.Append(registerContent);
163+
}
164+
}
165+
166+
yield return ParseStringBuilder(rangeStringBuilder);
167+
168+
index = instructions.Length;
169+
break;
170+
}
171+
}
172+
}
173+
}
174+
175+
static FileDescriptorProto ParseStringBuilder(StringBuilder stringBuilder)
176+
{
177+
var latin1bytes = Encoding.Latin1.GetBytes(stringBuilder.ToString());
178+
return FileDescriptorProto.Parser.ParseFrom(latin1bytes);
179+
}
180+
}
181+
}

0 commit comments

Comments
 (0)