-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser.js
More file actions
121 lines (103 loc) · 2.59 KB
/
parser.js
File metadata and controls
121 lines (103 loc) · 2.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"use strict";
import { execFileSync } from "child_process";
import * as fs from "fs";
import * as path from "path";
import { stringifyCsv } from "./csv.js";
import {
formatCents,
makeTxFingerprint,
parseEurToCents,
parseStatementText,
} from "./parser-core.js";
function pdftotextLayout(pdfPath) {
return execFileSync("pdftotext", ["-layout", pdfPath, "-"], {
encoding: "utf8",
});
}
function parseStatement(pdfPath) {
const text = pdftotextLayout(pdfPath);
return parseStatementText(text, path.basename(pdfPath));
}
function parseStatements(pdfPaths, options = {}) {
const dedupe = options.dedupe !== false;
const rows = [];
const validations = [];
const seen = new Set();
for (const pdfPath of pdfPaths) {
const result = parseStatement(pdfPath);
validations.push(result.validation);
if (dedupe) {
for (const row of result.rows) {
if (seen.has(row.tx_fingerprint)) {
continue;
}
seen.add(row.tx_fingerprint);
rows.push(row);
}
} else {
rows.push(...result.rows);
}
}
return { rows, validations };
}
function writeCsv(filePath, rows) {
if (!rows.length) {
throw new Error("No rows to write.");
}
const headers = Object.keys(rows[0]);
const csv = stringifyCsv(rows, headers);
fs.writeFileSync(filePath, csv, "utf8");
}
function parseArgs(argv) {
const args = {
pdfs: [],
out: "pbz_statements.csv",
validationOut: "pbz_validation.csv",
dedupe: true,
};
for (let i = 0; i < argv.length; i += 1) {
const arg = argv[i];
if (arg === "--out") {
args.out = argv[i + 1];
i += 1;
} else if (arg === "--validation-out") {
args.validationOut = argv[i + 1];
i += 1;
} else if (arg === "--no-dedupe") {
args.dedupe = false;
} else if (arg.startsWith("-")) {
throw new Error(`Unknown option: ${arg}`);
} else {
args.pdfs.push(arg);
}
}
return args;
}
function main() {
const args = parseArgs(Bun.argv.slice(2));
if (!args.pdfs.length) {
throw new Error("Usage: bun parser.js <pdfs...> [--out file] [--validation-out file]");
}
const { rows, validations } = parseStatements(args.pdfs, { dedupe: args.dedupe });
if (!rows.length) {
throw new Error("No transactions parsed.");
}
writeCsv(args.out, rows);
writeCsv(args.validationOut, validations);
}
if (import.meta.main) {
try {
main();
} catch (err) {
console.error(err.message || err);
process.exit(1);
}
}
export {
parseStatementText,
parseStatement,
parseStatements,
makeTxFingerprint,
parseEurToCents,
formatCents,
};