2222 per-run ``agentic_prompt_version`` table and the child's ``parent_version``
2323 resolves through it. ``project`` is bound to the target id.
2424 - **schemas**: bound to the target ``project`` and created.
25+ - **documents**: source uploads re-uploaded to the target project (skipping
26+ filenames already present); they live in their own store, not the Prompt
27+ Studio document table.
28+ - **verified data**: ground-truth rows re-pointed to the cloned document by
29+ filename. Extracted/comparison data is regenerable and not cloned.
25303. **Registry**: if the project has an active schema + prompt, republish its
2631 registry entry via the ``export`` action (mirror of custom_tool) and
2732 record an ``agentic_studio_registry`` remap. Projects with no source
3540from __future__ import annotations
3641
3742import logging
43+ import mimetypes
3844import threading
3945from typing import Any
4046
@@ -170,6 +176,8 @@ def _clone_project(
170176 self ._replicate_share (src , name , tgt_project_id , result , lock )
171177 self ._clone_prompt_versions (name , src_project_id , tgt_project_id , result , lock )
172178 self ._clone_schemas (name , src_project_id , tgt_project_id , result , lock )
179+ self ._clone_documents (name , src_project_id , tgt_project_id , result , lock )
180+ self ._clone_verified_data (name , src_project_id , tgt_project_id , result , lock )
173181 self ._republish_registry (name , src_project_id , tgt_project_id , result , lock )
174182
175183 def _build_project_payload (
@@ -422,6 +430,201 @@ def _clone_schemas(
422430 with lock :
423431 result .created += 1
424432
433+ # ----- documents -----
434+
435+ def _clone_documents (
436+ self ,
437+ name : str ,
438+ src_project_id : str ,
439+ tgt_project_id : str ,
440+ result : PhaseResult ,
441+ lock : threading .Lock ,
442+ ) -> None :
443+ try :
444+ src_docs = self .ctx .source .list_agentic_documents (src_project_id )
445+ except Exception as e :
446+ logger .exception ("agentic '%s': document listing failed: %s" , name , e )
447+ with lock :
448+ result .failed += 1
449+ result .errors .append (f"agentic { name } list documents: { e } " )
450+ return
451+ if not src_docs :
452+ return
453+
454+ # Honour the file strategy: 'skip' means no binary transfer, like the
455+ # files and lookups phases. Operator re-uploads on target.
456+ if self .ctx .options .file_strategy == "skip" :
457+ with lock :
458+ result .skipped += len (src_docs )
459+ result .warnings .append (
460+ f"agentic { name } : { len (src_docs )} document(s) not copied "
461+ "(file_strategy=skip) — upload them manually on target"
462+ )
463+ return
464+
465+ try :
466+ tgt_docs = self .ctx .target .list_agentic_documents (tgt_project_id )
467+ except Exception as e :
468+ logger .warning (
469+ "agentic '%s': target document listing failed "
470+ "(re-run may duplicate): %s" ,
471+ name ,
472+ e ,
473+ )
474+ tgt_docs = []
475+ target_names = {d .get ("original_filename" ) for d in tgt_docs }
476+
477+ for src in src_docs :
478+ file_name = src .get ("original_filename" )
479+ src_doc_id = src .get ("id" )
480+ if not file_name or not src_doc_id :
481+ continue
482+ if file_name in target_names :
483+ with lock :
484+ result .skipped += 1
485+ logger .info (
486+ "agentic '%s': document '%s' already on target — skipping" ,
487+ name ,
488+ file_name ,
489+ )
490+ continue
491+ self ._clone_one_document (
492+ name , tgt_project_id , src_doc_id , file_name , result , lock
493+ )
494+
495+ def _clone_one_document (
496+ self ,
497+ name : str ,
498+ tgt_project_id : str ,
499+ src_doc_id : str ,
500+ file_name : str ,
501+ result : PhaseResult ,
502+ lock : threading .Lock ,
503+ ) -> None :
504+ try :
505+ raw = self .ctx .source .download_agentic_document (src_doc_id )
506+ except Exception as e :
507+ logger .exception (
508+ "agentic '%s': document '%s' download failed: %s" , name , file_name , e
509+ )
510+ with lock :
511+ result .failed += 1
512+ result .errors .append (f"agentic { name } download { file_name } : { e } " )
513+ return
514+
515+ if len (raw ) > self .ctx .options .max_file_size :
516+ with lock :
517+ result .skipped += 1
518+ result .warnings .append (
519+ f"agentic { name } : document { file_name } exceeds size cap — "
520+ "upload it manually on target"
521+ )
522+ return
523+
524+ mime = mimetypes .guess_type (file_name )[0 ] or "application/pdf"
525+ try :
526+ self .ctx .target .upload_agentic_document (
527+ tgt_project_id , file_name , raw , mime
528+ )
529+ except Exception as e :
530+ logger .exception (
531+ "agentic '%s': document '%s' upload failed: %s" , name , file_name , e
532+ )
533+ with lock :
534+ result .failed += 1
535+ result .errors .append (f"agentic { name } upload { file_name } : { e } " )
536+ return
537+ with lock :
538+ result .created += 1
539+ logger .info ("agentic '%s': uploaded document '%s'" , name , file_name )
540+
541+ # ----- verified (ground-truth) data -----
542+
543+ def _clone_verified_data (
544+ self ,
545+ name : str ,
546+ src_project_id : str ,
547+ tgt_project_id : str ,
548+ result : PhaseResult ,
549+ lock : threading .Lock ,
550+ ) -> None :
551+ try :
552+ src_rows = self .ctx .source .list_agentic_verified_data (src_project_id )
553+ except Exception as e :
554+ logger .exception ("agentic '%s': verified-data listing failed: %s" , name , e )
555+ with lock :
556+ result .failed += 1
557+ result .errors .append (f"agentic { name } list verified-data: { e } " )
558+ return
559+ if not src_rows :
560+ return
561+
562+ # Verified data FKs a document; under 'skip' no documents are copied,
563+ # so skip the rows too — matching the plan and _clone_documents.
564+ if self .ctx .options .file_strategy == "skip" :
565+ with lock :
566+ result .skipped += len (src_rows )
567+ result .warnings .append (
568+ f"agentic { name } : { len (src_rows )} verified-data row(s) not "
569+ "copied (file_strategy=skip)"
570+ )
571+ return
572+
573+ # Verified data FKs a document; map source rows to target docs by
574+ # filename, the only identity stable across orgs.
575+ try :
576+ tgt_docs = self .ctx .target .list_agentic_documents (tgt_project_id )
577+ tgt_existing = self .ctx .target .list_agentic_verified_data (tgt_project_id )
578+ except Exception as e :
579+ logger .warning (
580+ "agentic '%s': target verified-data lookup failed "
581+ "(re-run may duplicate): %s" ,
582+ name ,
583+ e ,
584+ )
585+ tgt_docs , tgt_existing = [], []
586+ doc_id_by_name = {d .get ("original_filename" ): d .get ("id" ) for d in tgt_docs }
587+ verified_doc_ids = {r .get ("document" ) for r in tgt_existing }
588+
589+ for src in src_rows :
590+ file_name = src .get ("document_name" )
591+ tgt_doc_id = doc_id_by_name .get (file_name )
592+ if not tgt_doc_id :
593+ with lock :
594+ result .skipped += 1
595+ result .warnings .append (
596+ f"agentic { name } : verified data for '{ file_name } ' skipped — "
597+ "document not on target"
598+ )
599+ continue
600+ if tgt_doc_id in verified_doc_ids :
601+ with lock :
602+ result .skipped += 1
603+ continue
604+ payload = {
605+ "project" : tgt_project_id ,
606+ "document" : tgt_doc_id ,
607+ "data" : src .get ("data" ),
608+ }
609+ try :
610+ self .ctx .target .create_agentic_verified_data (payload )
611+ except Exception as e :
612+ logger .exception (
613+ "agentic '%s': verified data for '%s' create failed: %s" ,
614+ name ,
615+ file_name ,
616+ e ,
617+ )
618+ with lock :
619+ result .failed += 1
620+ result .errors .append (
621+ f"agentic { name } create verified-data { file_name } : { e } "
622+ )
623+ continue
624+ with lock :
625+ result .created += 1
626+ logger .info ("agentic '%s': cloned verified data for '%s'" , name , file_name )
627+
425628 # ----- registry -----
426629
427630 def _republish_registry (
@@ -507,8 +710,9 @@ def _plan_children(
507710 result : PhaseResult ,
508711 lock : threading .Lock ,
509712 ) -> None :
510- """Dry-run: count source prompt versions + schemas as planned and
511- record planned prompt-version ids so downstream resolves don't miss.
713+ """Dry-run: count source prompt versions + schemas + documents as
714+ planned and record planned prompt-version ids so downstream resolves
715+ don't miss.
512716 """
513717 try :
514718 src_versions = self .ctx .source .list_agentic_prompt_versions (
@@ -522,11 +726,27 @@ def _plan_children(
522726 )
523727 except Exception :
524728 src_schemas = []
729+ try :
730+ src_docs = self .ctx .source .list_agentic_documents (src_project_id )
731+ except Exception :
732+ src_docs = []
733+ try :
734+ src_verified = self .ctx .source .list_agentic_verified_data (src_project_id )
735+ except Exception :
736+ src_verified = []
525737 with lock :
526738 for v in src_versions :
527739 self .ctx .remap .record_planned ("agentic_prompt_version" , v ["id" ])
528740 result .created += 1
529741 result .created += len (src_schemas )
742+ # Documents move only when the file strategy copies binaries.
743+ # Verified data FKs a document, so skipping files strands it too.
744+ if self .ctx .options .file_strategy == "skip" :
745+ result .skipped += len (src_docs )
746+ result .skipped += len (src_verified )
747+ else :
748+ result .created += len (src_docs )
749+ result .created += len (src_verified )
530750
531751 # ----- settings -----
532752
0 commit comments