Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 90 additions & 11 deletions client/electron/backend/convertController.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { ensureTempDir } from "./utils/pathHelper";
import { convertFileViaService } from "./utils/fileConversion";
import { configManager } from "../configManager";
import { httpGetJson } from "./utils/httpClient";
import { checkPandocAvailable, getPandocFormats, convertFileWithPandoc, mapToPandocFormat } from "./utils/pandocConverter";

type FormatsData = {
inputs: string[];
Expand Down Expand Up @@ -246,12 +247,56 @@ function normalizeFormats(list: unknown): string[] {
return normalized;
}

async function loadLocalPandocFormats(): Promise<FormatsData> {
const cfg = configManager.getConfig();
const pandocPath = cfg.pandocPath;

// Check if pandoc is available
const available = await checkPandocAvailable(pandocPath);
if (!available) {
const err: ServiceError = new Error("pandoc_not_available");
err.code = "PANDOC_NOT_AVAILABLE";
throw err;
}

// Get supported formats from pandoc
const formats = await getPandocFormats(pandocPath);
const inputs = normalizeFormats(formats.inputs);
const outputs = normalizeFormats(formats.outputs);
const combined = Array.from(new Set([...inputs, ...outputs])).sort();
const defaultDir = await ensureTempDir();

const data: FormatsData = {
inputs,
outputs,
input_formats: inputs,
output_formats: outputs,
combined,
service_endpoint: null,
default_output_directory: defaultDir,
pandoc_available: true,
markitdown_available: outputs.some((fmt) => fmt === "md" || fmt === "markdown"),
};

return data;
}

async function loadServiceFormats(): Promise<FormatsData> {
if (cachedFormats && Date.now() - cachedFormats.ts < CACHE_TTL_MS) {
return cachedFormats.data;
}

const cfg = configManager.getConfig();
const mode = cfg.fileConvertMode || 'remote';

// Use local pandoc mode
if (mode === 'local') {
const data = await loadLocalPandocFormats();
cachedFormats = { data, ts: Date.now() };
return data;
}

// Use remote service mode
const baseRaw = (cfg.fileConvertEndpoint || "").trim();
if (!baseRaw) {
const err: ServiceError = new Error("converter_service_not_configured");
Expand Down Expand Up @@ -308,22 +353,32 @@ export function registerConversionRoutes(appExp: Express): void {
} catch (err) {
const serviceErr = err as ServiceError;
const code = serviceErr.code || "REMOTE_FETCH_FAILED";
const status = code === "SERVICE_NOT_CONFIGURED" ? 503 : 502;
let status = 502;
let message = "fetch_failed";
let errorMessage = "Failed to retrieve formats from converter service.";

if (code === "SERVICE_NOT_CONFIGURED") {
status = 503;
message = "service_not_configured";
errorMessage = "File converter service endpoint not configured.";
} else if (code === "PANDOC_NOT_AVAILABLE") {
status = 503;
message = "pandoc_not_available";
errorMessage = "Pandoc is not available. Please install pandoc or configure the correct path.";
}

logger.error("/api/files/convert/formats failed", {
code,
status,
error: String(serviceErr?.message || err),
});
res.status(status).json({
success: false,
message: code === "SERVICE_NOT_CONFIGURED" ? "service_not_configured" : "fetch_failed",
message,
data: null,
error: {
code,
message:
code === "SERVICE_NOT_CONFIGURED"
? "File converter service endpoint not configured."
: "Failed to retrieve formats from converter service.",
message: errorMessage,
details: serviceErr?.status ?? null,
},
timestamp: new Date().toISOString(),
Expand Down Expand Up @@ -579,13 +634,26 @@ export function registerConversionRoutes(appExp: Express): void {

let finalOut = "";
try {
const cfg = configManager.getConfig();
const mode = cfg.fileConvertMode || 'remote';
const srcExt = path.extname(filePath).replace(/^\./, "").toLowerCase() || "txt";
const tempResultPath = await convertFileViaService(filePath, srcExt, targetFormat);
await fsp.copyFile(tempResultPath, outPath);
finalOut = outPath;

if (mode === 'local') {
// Use local pandoc
const srcFmt = mapToPandocFormat(srcExt);
const tgtFmt = mapToPandocFormat(targetFormat);
const tempResultPath = await convertFileWithPandoc(filePath, srcFmt, tgtFmt, cfg.pandocPath);
await fsp.copyFile(tempResultPath, outPath);
finalOut = outPath;
} else {
// Use remote service
const tempResultPath = await convertFileViaService(filePath, srcExt, targetFormat);
await fsp.copyFile(tempResultPath, outPath);
finalOut = outPath;
}
} catch (e) {
const messageText = e instanceof Error ? e.message : String(e);
logger.error("/api/files/convert conversion service failed", {
logger.error("/api/files/convert conversion failed", {
source: filePath,
targetFormat,
error: messageText,
Expand All @@ -601,11 +669,22 @@ export function registerConversionRoutes(appExp: Express): void {
});
return;
}
if (messageText.toLowerCase().includes("pandoc")) {
res.status(503).json({
success: false,
message: "pandoc_not_available",
data: null,
error: { code: "PANDOC_NOT_AVAILABLE", message: "Pandoc is not available or not configured correctly.", details: messageText },
timestamp: new Date().toISOString(),
request_id: "",
});
return;
}
res.status(502).json({
success: false,
message: "conversion_failed",
data: null,
error: { code: "CONVERSION_FAILED", message: "Failed to convert file via remote service", details: messageText },
error: { code: "CONVERSION_FAILED", message: "Failed to convert file", details: messageText },
timestamp: new Date().toISOString(),
request_id: "",
});
Expand Down
205 changes: 205 additions & 0 deletions client/electron/backend/utils/pandocConverter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
import { spawn } from "child_process";
import { promises as fsp } from "fs";
import path from "path";
import { logger } from "../../logger";
import { configManager } from "../../configManager";
import { ensureTempDir } from "./pathHelper";

/**
* Check if pandoc is available at the specified path or in system PATH
*/
export async function checkPandocAvailable(pandocPath?: string): Promise<boolean> {
return new Promise((resolve) => {
const cmd = pandocPath || "pandoc";
const proc = spawn(cmd, ["--version"], { shell: true });

let hasOutput = false;
proc.stdout.on("data", () => {
hasOutput = true;
});

proc.on("error", () => {
resolve(false);
});

proc.on("close", (code) => {
resolve(code === 0 && hasOutput);
});

// Timeout after 5 seconds
setTimeout(() => {
proc.kill();
resolve(false);
}, 5000);
});
}

/**
* Get list of supported formats from pandoc
*/
export async function getPandocFormats(pandocPath?: string): Promise<{ inputs: string[]; outputs: string[] }> {
const cmd = pandocPath || "pandoc";

return new Promise((resolve, reject) => {
const proc = spawn(cmd, ["--list-input-formats"], { shell: true });

let stdout = "";
let stderr = "";

proc.stdout.on("data", (data) => {
stdout += data.toString();
});

proc.stderr.on("data", (data) => {
stderr += data.toString();
});

proc.on("error", (err) => {
reject(err);
});

proc.on("close", (code) => {
if (code !== 0) {
reject(new Error(`pandoc --list-input-formats failed: ${stderr}`));
return;
}

const inputs = stdout
.split("\n")
.map((line) => line.trim())
.filter((line) => line.length > 0);

// Get output formats
const procOut = spawn(cmd, ["--list-output-formats"], { shell: true });
let stdoutOut = "";
let stderrOut = "";

procOut.stdout.on("data", (data) => {
stdoutOut += data.toString();
});

procOut.stderr.on("data", (data) => {
stderrOut += data.toString();
});

procOut.on("error", (err) => {
reject(err);
});

procOut.on("close", (codeOut) => {
if (codeOut !== 0) {
reject(new Error(`pandoc --list-output-formats failed: ${stderrOut}`));
return;
}

const outputs = stdoutOut
.split("\n")
.map((line) => line.trim())
.filter((line) => line.length > 0);

resolve({ inputs, outputs });
});
});
});
}

/**
* Convert a file using local pandoc
*/
export async function convertFileWithPandoc(
filePath: string,
sourceFormat: string,
targetFormat: string,
pandocPath?: string
): Promise<string> {
const cmd = pandocPath || "pandoc";

// Determine output extension
const outExt = targetFormat === "markdown" ? "md" : targetFormat;

// Create output path in temp directory
const tempDir = await ensureTempDir();
const baseName = path.basename(filePath, path.extname(filePath));
const outputPath = path.join(tempDir, `${baseName}_${Date.now()}.${outExt}`);

// Build pandoc command
const args = [
"-f", sourceFormat,
"-t", targetFormat,
"-o", outputPath,
filePath
];

logger.info("Converting file with pandoc", {
filePath,
sourceFormat,
targetFormat,
outputPath,
cmd,
args
});

return new Promise((resolve, reject) => {
const proc = spawn(cmd, args, { shell: true });

let stderr = "";

proc.stderr.on("data", (data) => {
stderr += data.toString();
});

proc.on("error", (err) => {
logger.error("Pandoc process error", { err, filePath });
reject(err);
});

proc.on("close", async (code) => {
if (code !== 0) {
const error = new Error(`Pandoc conversion failed: ${stderr}`);
logger.error("Pandoc conversion failed", { code, stderr, filePath });
reject(error);
return;
}

// Verify output file exists
try {
await fsp.access(outputPath);
logger.info("Pandoc conversion successful", { outputPath });
resolve(outputPath);
} catch (err) {
logger.error("Output file not found after conversion", { outputPath });
reject(new Error("Pandoc conversion completed but output file not found"));
}
});
});
}

/**
* Map common format names to pandoc format identifiers
*/
export function mapToPandocFormat(fmt: string): string {
const f = (fmt || "").toLowerCase();
const map: Record<string, string> = {
md: "markdown",
markdown: "markdown",
txt: "plain",
htm: "html",
html: "html",
xhtml: "html",
doc: "doc",
docx: "docx",
odt: "odt",
rtf: "rtf",
pdf: "pdf",
epub: "epub",
latex: "latex",
tex: "latex",
rst: "rst",
org: "org",
mediawiki: "mediawiki",
textile: "textile",
asciidoc: "asciidoc",
json: "json",
};
return map[f] || f;
}
6 changes: 6 additions & 0 deletions client/electron/configManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ export interface AppConfig {
bailianVisionModel?: string;
/** Optional HTTP endpoint for third-party file conversion service */
fileConvertEndpoint?: string;
/** File conversion mode: 'local' uses local pandoc, 'remote' uses remote service */
fileConvertMode?: 'local' | 'remote';
/** Path to local pandoc executable (only used when fileConvertMode is 'local') */
pandocPath?: string;
/** Relative or absolute path to the local SQLite database file */
sqliteDbPath: string;
/** UI language preference, e.g., 'en' | 'zh' */
Expand Down Expand Up @@ -241,6 +245,8 @@ const DEFAULT_CONFIG: AppConfig = {
llamacppServerArgs: undefined,
},
fileConvertEndpoint: "https://converter.pegamob.com",
fileConvertMode: 'remote',
pandocPath: undefined,
// Default to repository-standard SQLite location; can be overridden in config.json
sqliteDbPath: "database/files.db",
// App defaults (override in config.json)
Expand Down
Loading