Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 138 additions & 37 deletions lib/Extended_ast.ml
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,22 @@
open Ocamlformat_parser_extended
include Parsetree

let equal_core_type : core_type -> core_type -> bool = Poly.equal

type use_file = toplevel_phrase list

type repl_file = repl_phrase list

module Std_parsetree = Ocamlformat_parser_standard.Parsetree

type ('a, 'b) paired = {extended: 'a; std: 'b}

type 'a t =
| Structure : structure t
| Signature : signature t
| Use_file : use_file t
| Core_type : core_type t
| Module_type : module_type t
| Expression : expression t
| Pattern : pattern t
| Structure : (structure, Std_parsetree.structure) paired t
| Signature : (signature, Std_parsetree.signature) paired t
| Use_file : (use_file, Std_parsetree.toplevel_phrase list) paired t
| Core_type : (core_type, Std_parsetree.core_type) paired t
| Module_type : (module_type, Std_parsetree.module_type) paired t
| Expression : (expression, Std_parsetree.expression) paired t
| Pattern : (pattern, Std_parsetree.pattern) paired t
| Repl_file : repl_file t
| Documentation : Ocamlformat_odoc_parser.Ast.t t

Expand All @@ -42,17 +44,16 @@ let of_syntax = function
| Repl_file -> Any Repl_file
| Documentation -> Any Documentation

let equal (type a) (_ : a t) : a -> a -> bool = Poly.equal

let map (type a) (x : a t) (m : Ast_mapper.mapper) : a -> a =
match x with
| Structure -> m.structure m
| Signature -> m.signature m
| Use_file -> List.map ~f:(m.toplevel_phrase m)
| Core_type -> m.typ m
| Module_type -> m.module_type m
| Expression -> m.expr m
| Pattern -> m.pat m
| Structure -> fun v -> {v with extended= m.structure m v.extended}
| Signature -> fun v -> {v with extended= m.signature m v.extended}
| Use_file ->
fun v -> {v with extended= List.map ~f:(m.toplevel_phrase m) v.extended}
| Core_type -> fun v -> {v with extended= m.typ m v.extended}
| Module_type -> fun v -> {v with extended= m.module_type m v.extended}
| Expression -> fun v -> {v with extended= m.expr m v.extended}
| Pattern -> fun v -> {v with extended= m.pat m v.extended}
| Repl_file -> List.map ~f:(m.repl_phrase m)
| Documentation -> Fn.id

Expand Down Expand Up @@ -346,23 +347,51 @@ module Parse = struct

let ast (type a) (fg : a t) ~ocaml_version ~preserve_beginend
~prefer_let_puns ~input_name str : a =
map fg
(normalize_mapper ~ocaml_version ~preserve_beginend ~prefer_let_puns)
@@
let nm =
normalize_mapper ~ocaml_version ~preserve_beginend ~prefer_let_puns
in
let lexbuf = Lexing.from_string str in
let ocaml_version =
let ocaml_version_pair =
Some Ocaml_version.(major ocaml_version, minor ocaml_version)
in
Location.init_info lexbuf input_name ;
let parse_std std_fg =
(* Suppress warnings during raw std parse to avoid duplicate w50
warnings — w50 handling is done separately by parse_ocaml *)
Warning.with_warning_filter
~filter_warning:(fun _loc _warn -> false)
~filter_alert:(fun _loc _alert -> false)
~f:(fun () -> Std_ast.Parse.ast std_fg ~ocaml_version ~input_name str)
in
let paired normalize parse_ext std_fg =
let extended =
normalize nm (parse_ext ~ocaml_version:ocaml_version_pair lexbuf)
in
{extended; std= parse_std std_fg}
in
match fg with
| Structure -> Parse.implementation ~ocaml_version lexbuf
| Signature -> Parse.interface ~ocaml_version lexbuf
| Use_file -> Parse.use_file ~ocaml_version lexbuf
| Core_type -> Parse.core_type ~ocaml_version lexbuf
| Module_type -> Parse.module_type ~ocaml_version lexbuf
| Expression -> Parse.expression ~ocaml_version lexbuf
| Pattern -> Parse.pattern ~ocaml_version lexbuf
| Repl_file -> Toplevel_lexer.repl_file ~ocaml_version lexbuf
| Structure ->
paired
(fun nm -> nm.structure nm)
Parse.implementation Std_ast.Structure
| Signature ->
paired (fun nm -> nm.signature nm) Parse.interface Std_ast.Signature
| Use_file ->
paired
(fun nm -> List.map ~f:(nm.toplevel_phrase nm))
Parse.use_file Std_ast.Use_file
| Core_type ->
paired (fun nm -> nm.typ nm) Parse.core_type Std_ast.Core_type
| Module_type ->
paired
(fun nm -> nm.module_type nm)
Parse.module_type Std_ast.Module_type
| Expression ->
paired (fun nm -> nm.expr nm) Parse.expression Std_ast.Expression
| Pattern -> paired (fun nm -> nm.pat nm) Parse.pattern Std_ast.Pattern
| Repl_file ->
List.map ~f:(nm.repl_phrase nm)
(Toplevel_lexer.repl_file ~ocaml_version:ocaml_version_pair lexbuf)
| Documentation ->
let pos = (Location.curr lexbuf).loc_start in
let pos = {pos with pos_fname= input_name} in
Expand All @@ -377,13 +406,13 @@ module Printast = struct
let repl_file = Format.pp_print_list repl_phrase

let ast (type a) : a t -> _ -> a -> _ = function
| Structure -> implementation
| Signature -> interface
| Use_file -> use_file
| Core_type -> core_type
| Module_type -> module_type
| Expression -> expression
| Pattern -> pattern
| Structure -> fun fmt v -> implementation fmt v.extended
| Signature -> fun fmt v -> interface fmt v.extended
| Use_file -> fun fmt v -> use_file fmt v.extended
| Core_type -> fun fmt v -> core_type fmt v.extended
| Module_type -> fun fmt v -> module_type fmt v.extended
| Expression -> fun fmt v -> expression fmt v.extended
| Pattern -> fun fmt v -> pattern fmt v.extended
| Repl_file -> repl_file
| Documentation -> Docstring.dump
end
Expand All @@ -395,3 +424,75 @@ module Asttypes = struct

let is_recursive = function Recursive -> true | Nonrecursive -> false
end

type std_value = Std_value : 'a Std_ast.t * 'a -> std_value

let get_std (type a) (fg : a t) (v : a) : std_value option =
match fg with
| Structure -> Some (Std_value (Structure, v.std))
| Signature -> Some (Std_value (Signature, v.std))
| Use_file -> Some (Std_value (Use_file, v.std))
| Core_type -> Some (Std_value (Core_type, v.std))
| Module_type -> Some (Std_value (Module_type, v.std))
| Expression -> Some (Std_value (Expression, v.std))
| Pattern -> Some (Std_value (Pattern, v.std))
| Repl_file -> None
| Documentation -> None

type std_pair = Std_pair : 'a Std_ast.t * 'a * 'a -> std_pair

let get_std_pair (type a) (fg : a t) (v1 : a) (v2 : a) : std_pair option =
match fg with
| Structure -> Some (Std_pair (Structure, v1.std, v2.std))
| Signature -> Some (Std_pair (Signature, v1.std, v2.std))
| Use_file -> Some (Std_pair (Use_file, v1.std, v2.std))
| Core_type -> Some (Std_pair (Core_type, v1.std, v2.std))
| Module_type -> Some (Std_pair (Module_type, v1.std, v2.std))
| Expression -> Some (Std_pair (Expression, v1.std, v2.std))
| Pattern -> Some (Std_pair (Pattern, v1.std, v2.std))
| Repl_file -> None
| Documentation -> None

let dump (type a) (fg : a t) fmt (v : a) =
match get_std fg v with
| Some (Std_value (std_fg, std_v)) -> Std_ast.Printast.ast std_fg fmt std_v
| None -> Printast.ast fg fmt v

let dump_normalized (type a) (fg : a t) ~normalize_code conf fmt (v : a) =
match get_std fg v with
| Some (Std_value (std_fg, std_v)) ->
Std_ast.Printast.ast std_fg fmt
(Normalize_std_ast.ast std_fg ~normalize_code conf std_v)
| None -> Printast.ast fg fmt v

type ast_check_result =
| Ast_preserved
| Docstrings_moved of Cmt.error list
| Ast_changed

let equivalent (type a) (fg : a t) ~normalize_code conf (old_v : a)
(new_v : a) : ast_check_result =
match get_std_pair fg old_v new_v with
| None ->
(* TODO: Repl_file and Documentation have no std AST, so we skip the
equivalence check.

- Repl_file: could validate each toplevel phrase individually.

- Documentation: could check each formatted code block for AST
preservation. *)
Ast_preserved
| Some (Std_pair (std_fg, old_std, new_std)) ->
if
Normalize_std_ast.equal std_fg ~normalize_code
~ignore_doc_comments:(not conf.Conf.opr_opts.comment_check.v)
conf old_std new_std
then Ast_preserved
else if
Normalize_std_ast.equal std_fg ~normalize_code
~ignore_doc_comments:true conf old_std new_std
then
Docstrings_moved
(Normalize_std_ast.moved_docstrings ~normalize_code std_fg conf
old_std new_std )
else Ast_changed
78 changes: 59 additions & 19 deletions lib/Extended_ast.mli
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,39 @@ type use_file = toplevel_phrase list

type repl_file = repl_phrase list

module Std_parsetree = Ocamlformat_parser_standard.Parsetree

(** Pairs an extended AST (['a]) with its standard-library counterpart
(['b]). Parsing produces both ASTs so that the extended one can be used
for formatting while the standard one is used for equivalence checking. *)
type ('a, 'b) paired = {extended: 'a; std: 'b}
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure to understand what this type provides. Do we want to keep the two AST paired anywhere outside of Translation_unit ?
What are the benefits from moving the code from Parse_with_comments into Extended_ast ?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Parse_with_comments.parse should only be used for "ocaml code", code parsed by the ocaml parser/lexer. It should not be used for Documentation for instance (it's wrong in the main branch).

We need to dispatch on the syntax to call it only when appropriate, the dispatch is already done in Extended_ast, it feels natural to move it there.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure to understand what this type provides. Do we want to keep the two AST paired anywhere outside of Translation_unit ?

I've been trying to minimize behavior changes in the PR.
We could probably parse the Std_ast only when needed in equivalent, saving the parsing for files already properly formatted. If we go that direction, we would need to have access to the source as string so that we can parse the std_ast.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do have the source as string in Translation_unit, both before and after formatting. Perhaps Parse_with_comments could be extended with the dispatch ? (and possibly renamed)

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can possible move back some parsing logic into Parse_with_comments. But I'm not sure it gives much benefit.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added back Parse_with_comments.. Let me know what you think

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure to see the point of keeping both Parse_with_comments and the paired type. I think I prefer to keep Parse_with_comments because otherwise, we just add a lot of code into Extended_ast that in fact work with something else than the extended ast.

I don't think the equivalent function make much sense. It should be 'a Std_ast.t -> ... -> 'a -> 'a -> ast_check_result and not work with the extended AST at all.
It also can reasonably be string -> string -> .. and we could unify Std_ast.t and Extended_ast.t:

val ext_parse : ('ext, 'std) t -> string -> 'ext
val std_parse : ('ext, 'std) t -> string -> 'std

val equivalent :
     ('ext, 'std) t
  -> ...
  -> string
  -> string
  (* or 'std -> 'std -> *)
  -> ast_check_result

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The equivalent signature you propose forces to parse the same string multiple time when iterating to find a fix point. This is something Ive optimized long time ago in the early days of ocamlformat.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • equivalent can't take a 'std because not all format have one.
  • Extracting comment is very much linked to the implementation of the parser. It seems unnatural to have the logic spread across to files.
  • Parse_with_comment and paired are orthogonal.

because otherwise, we just add a lot of code into Extended_ast that in fact work with something else than the extended ast.

The Std_ast is pretty much an implementation detail of the check of some of the file format

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Julow, think more about this. I wonder if we should inline fields from the "with_comment" record into Extended_ast.t.

  • the prefix is only relevent for use_file (and structure maybe)
  • comments is not relevent for documentation I think
  • Source is a bit weird because it deal with "ocaml" token. which we don't exactly have/want for mll mly files for instance.

The change would mean that an Extended_ast.t contains every thing needed for printing and checking.


(** Fragment types. OCaml fragments carry both extended and standard ASTs.
[Repl_file] has no standard parser counterpart. [Documentation] uses
the odoc parser directly and does not need a paired representation. *)
type 'a t =
| Structure : structure t
| Signature : signature t
| Use_file : use_file t
| Core_type : core_type t
| Module_type : module_type t
| Expression : expression t
| Pattern : pattern t
| Structure : (structure, Std_parsetree.structure) paired t
| Signature : (signature, Std_parsetree.signature) paired t
| Use_file : (use_file, Std_parsetree.toplevel_phrase list) paired t
| Core_type : (core_type, Std_parsetree.core_type) paired t
| Module_type : (module_type, Std_parsetree.module_type) paired t
| Expression : (expression, Std_parsetree.expression) paired t
| Pattern : (pattern, Std_parsetree.pattern) paired t
| Repl_file : repl_file t
| Documentation : Ocamlformat_odoc_parser.Ast.t t

type any_t = Any : 'a t -> any_t [@@unboxed]

val of_syntax : Syntax.t -> any_t

val map : 'a t -> Ast_mapper.mapper -> 'a -> 'a

module Printast : sig
include module type of Printast

val ast : 'a t -> Format.formatter -> 'a -> unit
end

module Parse : sig
val ast :
'a t
Expand All @@ -43,22 +61,44 @@ module Parse : sig
-> 'a
end

val equal_core_type : core_type -> core_type -> bool

val equal : 'a t -> 'a -> 'a -> bool

val map : 'a t -> Ast_mapper.mapper -> 'a -> 'a

module Printast : sig
include module type of Printast

val ast : 'a t -> Format.formatter -> 'a -> unit
end

module Asttypes : sig
include module type of Asttypes

val is_override : override_flag -> bool

val is_recursive : rec_flag -> bool
end

type std_value = Std_value : 'a Std_ast.t * 'a -> std_value

val get_std : 'a t -> 'a -> std_value option
(** Extract the std AST with its [Std_ast.t] witness, or [None] for
[Repl_file] and [Documentation]. *)

val dump : 'a t -> Format.formatter -> 'a -> unit
(** Print the std AST for debug output. Falls back to extended AST for
[Repl_file] and [Documentation]. *)

val dump_normalized :
'a t
-> normalize_code:(string -> string)
-> Conf.t
-> Format.formatter
-> 'a
-> unit
(** Print the normalized std AST for debug output. *)

type ast_check_result =
| Ast_preserved
| Docstrings_moved of Cmt.error list
| Ast_changed

val equivalent :
'a t
-> normalize_code:(string -> string)
-> Conf.t
-> 'a
-> 'a
-> ast_check_result
(** Check whether formatting preserved the standard AST. For [Repl_file]
and [Documentation], always returns [Ast_preserved]. *)
20 changes: 12 additions & 8 deletions lib/Fmt_ast.ml
Original file line number Diff line number Diff line change
Expand Up @@ -5101,18 +5101,22 @@ let fmt_file (type a) ~ctx ~fmt_code ~debug (fragment : a Extended_ast.t)
source cmts conf (itms : a) =
let c = {source; cmts; conf; debug; fmt_code} in
match (fragment, itms) with
| Structure, [] | Signature, [] | Use_file, [] ->
| Structure, {extended= []; _}
|Signature, {extended= []; _}
|Use_file, {extended= []; _} ->
Cmts.fmt_after ~pro:noop c Location.none
| Structure, l -> Chunk.split_and_fmt Structure c ctx l
| Signature, l -> Chunk.split_and_fmt Signature c ctx l
| Use_file, l -> Chunk.split_and_fmt Use_file c ctx l
| Core_type, ty -> fmt_core_type c (sub_typ ~ctx:(Pld (PTyp ty)) ty)
| Module_type, mty ->
| Structure, {extended; _} -> Chunk.split_and_fmt Structure c ctx extended
| Signature, {extended; _} -> Chunk.split_and_fmt Signature c ctx extended
| Use_file, {extended; _} -> Chunk.split_and_fmt Use_file c ctx extended
| Core_type, {extended= ty; _} ->
fmt_core_type c (sub_typ ~ctx:(Pld (PTyp ty)) ty)
| Module_type, {extended= mty; _} ->
compose_module ~f:Fn.id
(fmt_module_type c (sub_mty ~ctx:(Mty mty) mty))
| Expression, e ->
| Expression, {extended= e; _} ->
fmt_expression c (sub_exp ~ctx:(Str (Ast_helper.Str.eval e)) e)
| Pattern, p -> fmt_pattern c (sub_pat ~ctx:(Pld (PPat (p, None))) p)
| Pattern, {extended= p; _} ->
fmt_pattern c (sub_pat ~ctx:(Pld (PPat (p, None))) p)
| Repl_file, l -> fmt_repl_file c ctx l
| Documentation, d ->
(* TODO: [source] and [cmts] should have never been computed when
Expand Down
12 changes: 1 addition & 11 deletions lib/Normalize_extended_ast.ml
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,7 @@ let normalize_code ~normalize_cmt conf (m : Ast_mapper.mapper) txt =
let normalize_cmt = normalize_cmt conf in
match Parse_with_comments.parse_toplevel conf ~input_name ~source:txt with
| First {ast; comments; _} ->
normalize_parse_result ~normalize_cmt Use_file
(List.map ~f:(m.toplevel_phrase m) ast)
normalize_parse_result ~normalize_cmt Use_file (map Use_file m ast)
comments
| Second {ast; comments; _} ->
normalize_parse_result ~normalize_cmt Repl_file
Expand Down Expand Up @@ -186,11 +185,6 @@ let normalize_code (conf : Conf.t) code =
let n = normalize_cmt conf in
n#code code

let ast fragment ~ignore_doc_comments c =
let normalize_cmt = normalize_cmt c in
map fragment
(make_mapper ~ignore_doc_comments ~normalize_doc:normalize_cmt#doc)

module Normalized_cmt = struct
type t =
{ cmt_kind: [`Comment | `Doc_comment]
Expand Down Expand Up @@ -244,7 +238,3 @@ let diff_cmts (conf : Conf.t) x y =
Set.of_list (module Normalized_cmt.Comparator) (List.map ~f z)
in
diff ~f x y

let equal fragment ~ignore_doc_comments c ast1 ast2 =
let map = ast fragment c ~ignore_doc_comments in
equal fragment (map ast1) (map ast2)
3 changes: 0 additions & 3 deletions lib/Normalize_extended_ast.mli
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@ type 'a t = 'a Extended_ast.t
val dedup_cmts : 'a t -> 'a -> Cmt.t list -> Cmt.t list
(** Remove comments that duplicate docstrings (or other comments). *)

val equal : 'a t -> ignore_doc_comments:bool -> Conf.t -> 'a -> 'a -> bool
(** Compare fragments for equality up to normalization. *)

val diff_cmts :
Conf.t -> Cmt.t list -> Cmt.t list -> (unit, Cmt.error list) Result.t
(** Difference between two lists of comments. *)
Expand Down
Loading
Loading