From ff9a53d556437484636c3857c15f3ebfa1ca894a Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Mon, 1 Oct 2018 17:23:49 +0000 Subject: [PATCH 01/32] Use explicit types for some integer constants The following constant expression: 1024 * 1024 should be evaluated by the compiler at runtime as though it were: (int)1024 * (int)1024 and one would expect the result to be: (int)1048576 However, strictly speaking, an `int' is only required to be able to represent at least the number 32767. However, a `long int' must be able to represent at least `2147483647'. Thus, strictly speaking, proper C code should specify that at least one of the integer constants is to be represented as a `long int': 1024L * 1024 Consider, also, the following program: #include // UINT_MAX #include // printf #define PRINT_SIZE_OF(type) \ printf("sizeof(" #type ") = %u\n", (unsigned int)sizeof(type)) #define PRINT_UNSIGNED_LONG_LONG_INT(expr) \ printf(#expr " = %llu\n", (unsigned long long int)(expr)) int main() { PRINT_SIZE_OF(int); PRINT_SIZE_OF(long long int); PRINT_UNSIGNED_LONG_LONG_INT(UINT_MAX + 1); PRINT_UNSIGNED_LONG_LONG_INT(UINT_MAX + 1ULL); PRINT_UNSIGNED_LONG_LONG_INT(65536U * 65536); PRINT_UNSIGNED_LONG_LONG_INT(65536ULL * 65536); } That program might produce the following output: sizeof(int) = 4 sizeof(long long int) = 8 UINT_MAX + 1 = 0 UINT_MAX + 1ULL = 4294967296 65536U * 65536 = 0 65536ULL * 65536 = 4294967296 As you can see, specifying the type of an integer constant can be important. Take, for example, one of the above calculations: 65536U * 65536 Without the "U" suffix to specify that the integer constant has an unsigned type, the compiler would compute: (int)65536 * (int)65536 On the same machine, this would result in overflow of a signed integer type, which invokes undefined behavior; a good compiler would probably warn about it, or even produce an error. --- lib/cfg.c | 4 ++-- lib/formats/progressbar.c | 2 +- lib/hash-utility.c | 4 ++-- lib/md-scheduler.c | 4 ++-- lib/session.c | 2 +- lib/shredder.c | 8 ++++---- lib/utilities.c | 6 +++--- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/lib/cfg.c b/lib/cfg.c index 4724f47db..46f75100b 100644 --- a/lib/cfg.c +++ b/lib/cfg.c @@ -87,8 +87,8 @@ void rm_cfg_set_default(RmCfg *cfg) { * 32k => 15.8 seconds */ cfg->read_buf_len = 16 * 1024; - cfg->total_mem = (RmOff)1024 * 1024 * 1024; - cfg->sweep_size = 1024 * 1024 * 1024; + cfg->total_mem = 1024L * 1024 * 1024; + cfg->sweep_size = 1024L * 1024 * 1024; cfg->sweep_count = 1024 * 16; cfg->skip_start_factor = 0.0; diff --git a/lib/formats/progressbar.c b/lib/formats/progressbar.c index 57e9a1ae0..16d2dc3ca 100644 --- a/lib/formats/progressbar.c +++ b/lib/formats/progressbar.c @@ -132,7 +132,7 @@ static char *rm_fmt_progress_get_cached_eta( gint64 now = g_get_real_time(); if(self->last_eta[0] != 0) { - if(ABS(now - self->last_eta_update) <= 500 * 1000) { + if(ABS(now - self->last_eta_update) <= 500L * 1000) { return self->last_eta; } } diff --git a/lib/hash-utility.c b/lib/hash-utility.c index 68395b159..f79b9cd30 100644 --- a/lib/hash-utility.c +++ b/lib/hash-utility.c @@ -119,7 +119,7 @@ int rm_hasher_main(int argc, const char **argv) { /* Digest type */ tag.digest_type = RM_DEFAULT_DIGEST; gint threads = 8; - gint64 buffer_mbytes = 256; + guint64 buffer_mbytes = 256; guint64 increment = 4096; ////////////// Option Parsing /////////////// @@ -213,7 +213,7 @@ int rm_hasher_main(int argc, const char **argv) { threads, FALSE, increment, - 1024 * 1024 * buffer_mbytes, + 1024L * 1024 * buffer_mbytes, (RmHasherCallback)rm_hasher_callback, &tag); diff --git a/lib/md-scheduler.c b/lib/md-scheduler.c index 5cb44bf10..d6ec4bb88 100644 --- a/lib/md-scheduler.c +++ b/lib/md-scheduler.c @@ -30,9 +30,9 @@ * debug messages by continually recycling back to the joiner. */ #if _RM_MDS_DEBUG -#define MDS_EMPTYQUEUE_SLEEP_US (60 * 1000 * 1000) /* 60 seconds */ +#define MDS_EMPTYQUEUE_SLEEP_US (60L * 1000 * 1000) /* 60 seconds */ #else -#define MDS_EMPTYQUEUE_SLEEP_US (50 * 1000) /* 0.05 second */ +#define MDS_EMPTYQUEUE_SLEEP_US (50L * 1000) /* 0.05 second */ #endif /////////////////////////////////////// diff --git a/lib/session.c b/lib/session.c index 3ccaa33ea..0bc716ea3 100644 --- a/lib/session.c +++ b/lib/session.c @@ -230,7 +230,7 @@ int rm_session_dedupe_main(RmCfg *cfg) { /* clang-format on */ /* a poorly-documented limit for dedupe ioctl's */ - static const gint64 max_dedupe_chunk = 16 * 1024 * 1024; + static const gint64 max_dedupe_chunk = 16L * 1024 * 1024; /* how fine a resolution to use once difference detected; * use btrfs default node size (16k): */ diff --git a/lib/shredder.c b/lib/shredder.c index 0371f4ff2..73840d3de 100644 --- a/lib/shredder.c +++ b/lib/shredder.c @@ -275,14 +275,14 @@ #define SHRED_PAGE_SIZE (sysconf(_SC_PAGESIZE)) #define SHRED_MAX_READ_FACTOR \ - ((256 * 1024 * 1024) / SHRED_BALANCED_PAGES / SHRED_PAGE_SIZE) + ((256L * 1024 * 1024) / SHRED_BALANCED_PAGES / SHRED_PAGE_SIZE) /* Maximum increment size for paranoid digests. This is smaller than for other * digest types due to memory management issues. * 16MB should be big enough buffer size to make seek time fairly insignificant * relative to sequential read time, eg 16MB read at typical 100 MB/s read * rate = 160ms read vs typical seek time 10ms*/ -#define SHRED_PARANOID_BYTES (16 * 1024 * 1024) +#define SHRED_PARANOID_BYTES (16L * 1024 * 1024) /* When paranoid hashing, if a file increments is larger * than SHRED_PREMATCH_THRESHOLD, we take a guess at the likely @@ -296,7 +296,7 @@ #define SHRED_AVERAGE_MEM_PER_FILE (100) /* Maximum number of bytes before worth_waiting becomes false */ -#define SHRED_TOO_MANY_BYTES_TO_WAIT (64 * 1024 * 1024) +#define SHRED_TOO_MANY_BYTES_TO_WAIT (64L * 1024 * 1024) /////////////////////////////////////////////////////////////////////// // INTERNAL STRUCTURES, WITH THEIR INITIALISERS AND DESTROYERS // @@ -1742,7 +1742,7 @@ void rm_shred_run(RmSession *session) { /* estimate mem used for RmFiles and allocate any leftovers to read buffer and/or * paranoid mem */ RmOff mem_used = SHRED_AVERAGE_MEM_PER_FILE * session->shred_files_remaining; - RmOff read_buffer_mem = MAX(1024 * 1024, (gint64)cfg->total_mem - (gint64)mem_used); + RmOff read_buffer_mem = MAX(1024L * 1024, cfg->total_mem - mem_used); if(cfg->checksum_type == RM_DIGEST_PARANOID) { /* allocate any spare mem for paranoid hashing */ diff --git a/lib/utilities.c b/lib/utilities.c index bf302ccfe..164b5dd0a 100644 --- a/lib/utilities.c +++ b/lib/utilities.c @@ -374,9 +374,9 @@ char *rm_util_get_groupname(void) { void rm_util_size_to_human_readable(RmOff num, char *in, gsize len) { if(num < 512) { snprintf(in, len, "%" LLU " B", num); - } else if(num < 512 * 1024) { + } else if(num < 512L * 1024) { snprintf(in, len, "%.2f KB", num / 1024.0); - } else if(num < 512 * 1024 * 1024) { + } else if(num < 512L * 1024 * 1024) { snprintf(in, len, "%.2f MB", num / (1024.0 * 1024.0)); } else { snprintf(in, len, "%.2f GB", num / (1024.0 * 1024.0 * 1024.0)); @@ -1342,7 +1342,7 @@ bool rm_iso8601_format(time_t stamp, char *buf, gsize buf_size) { return false; } -#define SECONDS_PER_DAY (24 * 60 * 60) +#define SECONDS_PER_DAY (24L * 60 * 60) #define SECONDS_PER_HOUR (60 * 60) #define SECONDS_PER_MINUTE (60) From d085572abae216a105356b9a6417c32d103bd29a Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Mon, 15 Oct 2018 16:57:11 +0000 Subject: [PATCH 02/32] lib/cmdline.c: Fix error message The main problem was that the error message used `Negativ' rather than `Negative'. However, the message was also a little too playful; this commit changes the entire message to something a little more dour. --- lib/cmdline.c | 2 +- po/de.po | 2 +- po/es.po | 2 +- po/fr.po | 2 +- po/rmlint.pot | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/cmdline.c b/lib/cmdline.c index 2998da559..d67ef6c90 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -284,7 +284,7 @@ static RmOff rm_cmd_size_string_to_bytes(const char *size_spec, GError **error) g_set_error(error, RM_ERROR_QUARK, 0, _("This does not look like a number")); return 0; } else if(decimal < 0) { - g_set_error(error, RM_ERROR_QUARK, 0, _("Negativ sizes are no good idea")); + g_set_error(error, RM_ERROR_QUARK, 0, _("A size must be non-negative")); return 0; } else if(*format) { format = g_strstrip(format); diff --git a/po/de.po b/po/de.po index 3cc46a098..98ae4afd0 100644 --- a/po/de.po +++ b/po/de.po @@ -535,7 +535,7 @@ msgid "This does not look like a number" msgstr "Das schaut nicht wie eine Zahl aus" #: lib/cmdline.c -msgid "Negativ sizes are no good idea" +msgid "A size must be non-negative" msgstr "Negative Größen sind keine gute Idee" #: lib/cmdline.c diff --git a/po/es.po b/po/es.po index 7b6dd5c36..fe042d37f 100644 --- a/po/es.po +++ b/po/es.po @@ -525,7 +525,7 @@ msgid "This does not look like a number" msgstr "Estto no parece un número" #: lib/cmdline.c -msgid "Negativ sizes are no good idea" +msgid "A size must be non-negative" msgstr "Tamaños negativos no son una buena idea" #: lib/cmdline.c diff --git a/po/fr.po b/po/fr.po index 229eee5c7..df80c47ad 100644 --- a/po/fr.po +++ b/po/fr.po @@ -513,7 +513,7 @@ msgid "This does not look like a number" msgstr "Cela ne semble pas correspondre à un chiffre" #: lib/cmdline.c -msgid "Negativ sizes are no good idea" +msgid "A size must be non-negative" msgstr "Une taille négative n'est pas une bonne idée" #: lib/cmdline.c diff --git a/po/rmlint.pot b/po/rmlint.pot index eda2e2022..a57e4de21 100644 --- a/po/rmlint.pot +++ b/po/rmlint.pot @@ -503,7 +503,7 @@ msgid "This does not look like a number" msgstr "" #: lib/cmdline.c -msgid "Negativ sizes are no good idea" +msgid "A size must be non-negative" msgstr "" #: lib/cmdline.c From a3e9d5c9c335a2f1f19255b9fde800a49648b4cb Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Wed, 3 Oct 2018 15:28:12 +0000 Subject: [PATCH 03/32] lib/cfg.*: Clean up `rm_cfg_free_paths()' This simply introduces `const' discipline. Also, `g_assert(cfg);' has been inserted. --- lib/cfg.c | 3 ++- lib/cfg.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/cfg.c b/lib/cfg.c index 46f75100b..566b0c4a2 100644 --- a/lib/cfg.c +++ b/lib/cfg.c @@ -140,7 +140,8 @@ guint rm_cfg_add_path(RmCfg *cfg, bool is_prefd, const char *path) { return 1; } -void rm_cfg_free_paths(RmCfg *cfg) { +void rm_cfg_free_paths(RmCfg *const cfg) { + g_assert(cfg); g_slist_free_full(cfg->paths, (GDestroyNotify)rm_path_free); cfg->paths = NULL; g_slist_free_full(cfg->json_paths, (GDestroyNotify)rm_path_free); diff --git a/lib/cfg.h b/lib/cfg.h index 5b1e7e4aa..0d70b10db 100644 --- a/lib/cfg.h +++ b/lib/cfg.h @@ -199,6 +199,6 @@ guint rm_cfg_add_path(RmCfg *cfg, bool is_prefd, const char *path); /** * @brief free all data associated with cfg->paths. */ -void rm_cfg_free_paths(RmCfg *cfg); +void rm_cfg_free_paths(RmCfg *const cfg); #endif /* end of include guard */ From 93541fc4e60a27bbc5d2bac32aa597fdeb1e573f Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Sat, 29 Sep 2018 21:00:04 +0000 Subject: [PATCH 04/32] lib/cfg.*: `rm_cfg_add_path()' now returns `bool' It was already returning values as though it did. --- lib/cfg.c | 10 +++++----- lib/cfg.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/cfg.c b/lib/cfg.c index 566b0c4a2..5818fbd95 100644 --- a/lib/cfg.c +++ b/lib/cfg.c @@ -103,7 +103,7 @@ void rm_cfg_set_default(RmCfg *cfg) { rm_trie_init(&cfg->file_trie); } -guint rm_cfg_add_path(RmCfg *cfg, bool is_prefd, const char *path) { +bool rm_cfg_add_path(RmCfg *cfg, bool is_prefd, const char *path) { int rc = 0; #if HAVE_FACCESSAT @@ -115,14 +115,14 @@ guint rm_cfg_add_path(RmCfg *cfg, bool is_prefd, const char *path) { if(rc != 0) { rm_log_warning_line(_("Can't open directory or file \"%s\": %s"), path, strerror(errno)); - return 0; + return false; } char *real_path = realpath(path, NULL); if(real_path == NULL) { rm_log_warning_line(_("Can't get real path for directory or file \"%s\": %s"), path, strerror(errno)); - return 0; + return false; } RmPath *rmpath = g_slice_new(RmPath); @@ -133,11 +133,11 @@ guint rm_cfg_add_path(RmCfg *cfg, bool is_prefd, const char *path) { if(cfg->replay && g_str_has_suffix(rmpath->path, ".json")) { cfg->json_paths = g_slist_prepend(cfg->json_paths, rmpath); - return 1; + return true; } cfg->paths = g_slist_prepend(cfg->paths, rmpath); - return 1; + return true; } void rm_cfg_free_paths(RmCfg *const cfg) { diff --git a/lib/cfg.h b/lib/cfg.h index 0d70b10db..95e9430ed 100644 --- a/lib/cfg.h +++ b/lib/cfg.h @@ -194,7 +194,7 @@ void rm_cfg_set_default(RmCfg *cfg); /** * @brief check and add a path to cfg->paths. */ -guint rm_cfg_add_path(RmCfg *cfg, bool is_prefd, const char *path); +bool rm_cfg_add_path(RmCfg *cfg, bool is_prefd, const char *path); /** * @brief free all data associated with cfg->paths. From 9c51e9d0d95a4d67f607d3c1f63fa6c3e8729eb3 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Sun, 30 Sep 2018 01:28:31 +0000 Subject: [PATCH 05/32] lib/cmdline.c: return the status of `rm_cfg_add_path()' Before, `rm_cmd_parse_replay()' simply ignored whether there was a failure; now, it both returns a status and sets an error message. --- lib/cmdline.c | 20 +++++++++++++++++--- po/de.po | 7 ++++++- po/es.po | 7 ++++++- po/fr.po | 7 ++++++- po/rmlint.pot | 9 +++++++-- 5 files changed, 42 insertions(+), 8 deletions(-) diff --git a/lib/cmdline.c b/lib/cmdline.c index d67ef6c90..d86e39bed 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -1097,11 +1097,25 @@ static gboolean rm_cmd_parse_rankby(_UNUSED const char *option_name, static gboolean rm_cmd_parse_replay(_UNUSED const char *option_name, const gchar *json_path, RmSession *session, - _UNUSED GError **error) { + GError **error) { + g_assert(session); + g_assert(session->cfg); session->cfg->replay = true; session->cfg->cache_file_structs = true; - rm_cfg_add_path(session->cfg, false, json_path); - return true; + + if(rm_cfg_add_path(session->cfg, false, json_path)) { + return true; + } + + g_assert(error); + g_assert(*error == NULL); + + g_set_error( + error, RM_ERROR_QUARK, 0, + _("Failed to include this replay file: %s"), json_path + ); + + return false; } static gboolean rm_cmd_parse_equal(_UNUSED const char *option_name, diff --git a/po/de.po b/po/de.po index 98ae4afd0..b4acccbe2 100644 --- a/po/de.po +++ b/po/de.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: rmlint 2.0.0\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2018-10-04 18:46+0000\n" +"POT-Creation-Date: 2019-01-21 21:40+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -1028,6 +1028,11 @@ msgstr "" msgid "No stamp file at `%s`, will create one after this run." msgstr "" +#: lib/cmdline.c +#, c-format +msgid "Failed to include this replay file: %s" +msgstr "" + #~ msgid "%s%15" #~ msgstr "%s%15" diff --git a/po/es.po b/po/es.po index fe042d37f..2dde9d564 100644 --- a/po/es.po +++ b/po/es.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: rmlint 2.4.0\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2018-10-04 18:46+0000\n" +"POT-Creation-Date: 2019-01-21 21:40+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -1005,6 +1005,11 @@ msgstr "" msgid "No stamp file at `%s`, will create one after this run." msgstr "" +#: lib/cmdline.c +#, c-format +msgid "Failed to include this replay file: %s" +msgstr "" + #~ msgid "caching is not supported due to missing json-glib library." #~ msgstr "" #~ "el cacheo no es soportado debido a la librería inexistente json-glib" diff --git a/po/fr.po b/po/fr.po index df80c47ad..fb446e2b4 100644 --- a/po/fr.po +++ b/po/fr.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: rmlint 2.0.0\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2018-10-04 18:46+0000\n" +"POT-Creation-Date: 2019-01-21 21:40+0000\n" "PO-Revision-Date: 2014-12-02 13:37+0100\n" "Last-Translator: F. \n" "Language-Team: none\n" @@ -1001,6 +1001,11 @@ msgstr "" msgid "No stamp file at `%s`, will create one after this run." msgstr "" +#: lib/cmdline.c +#, c-format +msgid "Failed to include this replay file: %s" +msgstr "" + #~ msgid "caching is not supported due to missing json-glib library." #~ msgstr "Cache non supporté, librairie json-glib manquante." diff --git a/po/rmlint.pot b/po/rmlint.pot index a57e4de21..eed65cc06 100644 --- a/po/rmlint.pot +++ b/po/rmlint.pot @@ -6,9 +6,9 @@ #, fuzzy msgid "" msgstr "" -"Project-Id-Version: rmlint 2.6.2\n" +"Project-Id-Version: rmlint 2.7.0\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2018-10-04 18:46+0000\n" +"POT-Creation-Date: 2019-01-21 21:40+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -949,3 +949,8 @@ msgstr "" #, c-format msgid "No stamp file at `%s`, will create one after this run." msgstr "" + +#: lib/cmdline.c +#, c-format +msgid "Failed to include this replay file: %s" +msgstr "" From f8629f1db4132577b65d992100d94ca4d9d59288 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Mon, 1 Oct 2018 15:25:05 +0000 Subject: [PATCH 06/32] lib/cfg.*: Refactor `rm_cfg_add_path()' This commit introduces a number of finer-grained functions related to resolving and verifying paths, and then uses those functions not only to refactor `rm_cfg_add_path()', but also to introduce a new function, `rm_cfg_prepend_json()'. The finer-grained functions are defined in a new source file: lib/path-funcs.h Similarly, `RmPath' has been moved to its own header: lib/path.h Additionally, some of the `#include' directives have been cleaned up as part of this refactoring; if source file `X' uses an identifier whose declaration comes from source file `Y', then `X' *always* includes `Y' explicitly (even if `Y' is included indirectly through some other source file), and each `#include' directive is associated with a comment that lists each such identifier that it provides. --- lib/cfg.c | 80 ++++++++++++------------ lib/cfg.h | 48 +++++--------- lib/cmdline.c | 7 ++- lib/formats/_equal.c | 1 + lib/path-funcs.h | 146 +++++++++++++++++++++++++++++++++++++++++++ lib/path.h | 38 +++++++++++ lib/replay.c | 1 + lib/session.c | 1 + lib/traverse.c | 1 + lib/treemerge.c | 1 + po/de.po | 13 +++- po/es.po | 13 +++- po/fr.po | 13 +++- po/rmlint.pot | 13 +++- 14 files changed, 294 insertions(+), 82 deletions(-) create mode 100644 lib/path-funcs.h create mode 100644 lib/path.h diff --git a/lib/cfg.c b/lib/cfg.c index 5818fbd95..b4651e3ae 100644 --- a/lib/cfg.c +++ b/lib/cfg.c @@ -22,16 +22,15 @@ * Hosted on http://github.com/sahib/rmlint **/ -#include -#include -#include +#include // memset +#include // bool, true, false +#include // PATH_MAX (maybe) +#include // G_MAXUINT64, g_strdup, G_LOG_LEVEL_INFO, g_slist_free_full -#include "cfg.h" - -static void rm_path_free(RmPath *rmpath) { - free(rmpath->path); - g_slice_free(RmPath, rmpath); -} +#include "config.h" // RM_DEFAULT_DIGEST, PATH_MAX (maybe), RmOff +#include "cfg.h" // RmCfg +#include "pathtricia.h" // rm_trie_init +#include "path-funcs.h" // rm_path_free, rm_path_is_valid, rm_path_is_json, rm_path_prepend /* Options not specified by commandline get a default option - * this is usually called before rm_cmd_parse_args */ @@ -103,41 +102,42 @@ void rm_cfg_set_default(RmCfg *cfg) { rm_trie_init(&cfg->file_trie); } -bool rm_cfg_add_path(RmCfg *cfg, bool is_prefd, const char *path) { - int rc = 0; - -#if HAVE_FACCESSAT - rc = faccessat(AT_FDCWD, path, R_OK, AT_EACCESS); -#else - rc = access(path, R_OK); -#endif - - if(rc != 0) { - rm_log_warning_line(_("Can't open directory or file \"%s\": %s"), path, - strerror(errno)); - return false; - } - - char *real_path = realpath(path, NULL); - if(real_path == NULL) { - rm_log_warning_line(_("Can't get real path for directory or file \"%s\": %s"), - path, strerror(errno)); - return false; +bool rm_cfg_prepend_json( + RmCfg *const cfg, + const char *const path +) { + g_assert(cfg); + char *real_path; + if(rm_path_is_valid(path, &real_path) && rm_path_is_json(real_path)) { + rm_path_prepend( + &cfg->json_paths, + real_path, + cfg->path_count++, + false /* not preferred */ + ); + return true; } + return false; +} - RmPath *rmpath = g_slice_new(RmPath); - rmpath->path = real_path; - rmpath->is_prefd = is_prefd; - rmpath->idx = cfg->path_count++; - rmpath->treat_as_single_vol = strncmp(path, "//", 2) == 0; - - if(cfg->replay && g_str_has_suffix(rmpath->path, ".json")) { - cfg->json_paths = g_slist_prepend(cfg->json_paths, rmpath); +bool rm_cfg_add_path( + RmCfg *const cfg, + const bool preferred, + const char *const path +) { + g_assert(cfg); + char *real_path; + if(rm_path_is_valid(path, &real_path)) { + rm_path_prepend( + (cfg->replay && rm_path_is_json(path)) ? + &cfg->json_paths : &cfg->paths, + real_path, + cfg->path_count++, + preferred + ); return true; } - - cfg->paths = g_slist_prepend(cfg->paths, rmpath); - return true; + return false; } void rm_cfg_free_paths(RmCfg *const cfg) { diff --git a/lib/cfg.h b/lib/cfg.h index 95e9430ed..b577de3f1 100644 --- a/lib/cfg.h +++ b/lib/cfg.h @@ -22,29 +22,15 @@ * Hosted on http://github.com/sahib/rmlint **/ -#ifndef RM_SETTINGS_H -#define RM_SETTINGS_H +#ifndef RM_CFG_H +#define RM_CFG_H -#include +#include // bool +#include // gboolean, gdouble, gint, GSList, guint -#include "checksum.h" -#include "pathtricia.h" -#include "utilities.h" - -/* Struct for paths passed to rmlint from command line (or stdin) */ -typedef struct RmPath { - /* the RealPath of the passed string */ - char *path; - - /* index number (command line order) */ - guint idx; - - /* whether path was tagged as preferred path */ - bool is_prefd; - - /* whether to treat all files under path as one filesystem */ - bool treat_as_single_vol; -} RmPath; +#include "config.h" // RmOff +#include "pathtricia.h" // RmTrie +#include "checksum.h" // RmDigestType /* Storage struct for all options settable in cmdline. */ typedef struct RmCfg { @@ -186,19 +172,19 @@ typedef struct RmCfg { } RmCfg; -/** - * @brief Reset RmCfg to default cfg and all other vars to 0. - */ void rm_cfg_set_default(RmCfg *cfg); -/** - * @brief check and add a path to cfg->paths. - */ -bool rm_cfg_add_path(RmCfg *cfg, bool is_prefd, const char *path); +bool rm_cfg_prepend_json( + RmCfg *const cfg, + const char *const path +); + +bool rm_cfg_add_path( + RmCfg *const cfg, + const bool preferred, + const char *const path +); -/** - * @brief free all data associated with cfg->paths. - */ void rm_cfg_free_paths(RmCfg *const cfg); #endif /* end of include guard */ diff --git a/lib/cmdline.c b/lib/cmdline.c index d86e39bed..2f09076f3 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -49,6 +49,7 @@ #include "traverse.h" #include "treemerge.h" #include "utilities.h" +#include "path.h" /* define paranoia levels */ static const RmDigestType RM_PARANOIA_LEVELS[] = {RM_DIGEST_METRO, @@ -1096,14 +1097,14 @@ static gboolean rm_cmd_parse_rankby(_UNUSED const char *option_name, } static gboolean rm_cmd_parse_replay(_UNUSED const char *option_name, - const gchar *json_path, RmSession *session, + const gchar *path, RmSession *session, GError **error) { g_assert(session); g_assert(session->cfg); session->cfg->replay = true; session->cfg->cache_file_structs = true; - if(rm_cfg_add_path(session->cfg, false, json_path)) { + if(rm_cfg_prepend_json(session->cfg, path)) { return true; } @@ -1112,7 +1113,7 @@ static gboolean rm_cmd_parse_replay(_UNUSED const char *option_name, g_set_error( error, RM_ERROR_QUARK, 0, - _("Failed to include this replay file: %s"), json_path + _("Failed to include this replay file: %s"), path ); return false; diff --git a/lib/formats/_equal.c b/lib/formats/_equal.c index b67f26634..a06612681 100644 --- a/lib/formats/_equal.c +++ b/lib/formats/_equal.c @@ -26,6 +26,7 @@ #include "../formats.h" #include "../utilities.h" #include "../preprocess.h" +#include "../path.h" #include #include diff --git a/lib/path-funcs.h b/lib/path-funcs.h new file mode 100644 index 000000000..76ec98a93 --- /dev/null +++ b/lib/path-funcs.h @@ -0,0 +1,146 @@ +/** +* This file is part of rmlint. +* +* rmlint is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* rmlint is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with rmlint. If not, see . +* +* Authors: +* +* - Christopher Pahl 2010-2017 (https://github.com/sahib) +* - Daniel T. 2014-2017 (https://github.com/SeeSpotRun) +* - Michael Witten 2018-2018 +* +* Hosted on http://github.com/sahib/rmlint +**/ + +#ifndef RM_PATH_FUNCS_H +#define RM_PATH_FUNCS_H + +#include // errno +#include // strerror +#include // bool, true, false +#include // free, realpath +#include // access, faccessat, R_OK +#include // struct stat, stat, S_ISREG +#include // g_assert, g_str_has_suffix, GSList, + // g_slist_prepend, GDestroyNotify +#if HAVE_FACCESSAT + #include // AT_FDCWD, AT_EACCESS +#endif + +#include "path.h" // RmPath +#include "config.h" // _, rm_log_warning_line + +static INLINE +void rm_path_free(RmPath *const p) { + g_assert(p); + free(p->path); + g_slice_free(RmPath, p); +} + +static INLINE +bool rm_path_is_real( + const char *const path, + char **const real_path +) { + g_assert(path); + g_assert(real_path); + + if((*real_path = realpath(path, 0))) { + return true; + } + + rm_log_warning_line( + _("Can't get real path for directory or file \"%s\": %s"), + path, strerror(errno) + ); + + return false; +} + +#if HAVE_FACCESSAT + #define NOT_ACCESSIBLE(path) faccessat(AT_FDCWD, path, R_OK, AT_EACCESS) +#else + #define NOT_ACCESSIBLE(path) access(path, R_OK) +#endif + +static INLINE +bool rm_path_is_accessible(const char *const path) { + g_assert(path); + if(NOT_ACCESSIBLE(path)) { + rm_log_warning_line( + _("Can't open directory or file \"%s\": %s"), + path, strerror(errno) + ); + return false; + } + return true; +} + +#undef NOT_ACCESSIBLE + +static INLINE +bool rm_path_is_valid( + const char *const path, + char **const real_path +) { + g_assert(path); + g_assert(real_path); + + if(rm_path_is_real(path, real_path)) { + return rm_path_is_accessible(*real_path); + } + + rm_log_warning_line(_("Invalid path \"%s\""), path); + return false; +} + +static INLINE +bool rm_path_is_file(const char *const path) { + g_assert(path); + struct stat s; + if(stat(path, &s)) { + rm_log_warning_line( + _("Could not get metadata for path \"%s\": %s"), + path, strerror(errno) + ); + return false; + } + return S_ISREG(s.st_mode); +} + +static INLINE +bool rm_path_is_json(const char *const path) { + g_assert(path); + return rm_path_is_file(path) && g_str_has_suffix(path, ".json"); +} + +static INLINE +void rm_path_prepend( + GSList **const list, + char *const path, + const unsigned int index, + const bool preferred +) { + g_assert(path); + + RmPath *p = g_slice_new(RmPath); + p->path = path; + p->idx = index; + p->is_prefd = preferred; + p->treat_as_single_vol = (path[0] == '/') && (path[1] == '/'); + + *list = g_slist_prepend(*list, p); +} + +#endif /* end of include guard */ diff --git a/lib/path.h b/lib/path.h new file mode 100644 index 000000000..6bff264a2 --- /dev/null +++ b/lib/path.h @@ -0,0 +1,38 @@ +/** +* This file is part of rmlint. +* +* rmlint is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* rmlint is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with rmlint. If not, see . +* +* Authors: +* +* - Christopher Pahl 2010-2017 (https://github.com/sahib) +* - Daniel T. 2014-2017 (https://github.com/SeeSpotRun) +* - Michael Witten 2018-2018 +* +* Hosted on http://github.com/sahib/rmlint +**/ + +#ifndef RM_PATH_H +#define RM_PATH_H + +#include // bool + +typedef struct RmPath { + char *path; // result of `realpath()' of + unsigned int idx; // command line order, followed by stdin order + bool is_prefd; // whether path was tagged as preferred path + bool treat_as_single_vol; // treat this directory as one file system +} RmPath; + +#endif /* end of include guard */ diff --git a/lib/replay.c b/lib/replay.c index fad162941..c83e92523 100644 --- a/lib/replay.c +++ b/lib/replay.c @@ -31,6 +31,7 @@ #include "preprocess.h" #include "session.h" #include "shredder.h" +#include "path.h" /* External libraries */ #include diff --git a/lib/session.c b/lib/session.c index 0bc716ea3..bf4c4712f 100644 --- a/lib/session.c +++ b/lib/session.c @@ -32,6 +32,7 @@ #include "preprocess.h" #include "session.h" #include "traverse.h" +#include "path.h" #if HAVE_BTRFS_H #include diff --git a/lib/traverse.c b/lib/traverse.c index 73f4b9f2b..70e69665d 100644 --- a/lib/traverse.c +++ b/lib/traverse.c @@ -38,6 +38,7 @@ #include "preprocess.h" #include "utilities.h" #include "xattr.h" +#include "path.h" #include "fts/fts.h" diff --git a/lib/treemerge.c b/lib/treemerge.c index d7818a823..ab0bd7f91 100644 --- a/lib/treemerge.c +++ b/lib/treemerge.c @@ -82,6 +82,7 @@ #include "preprocess.h" #include "shredder.h" #include "treemerge.h" +#include "path.h" #include "fts/fts.h" diff --git a/po/de.po b/po/de.po index b4acccbe2..8690df9a6 100644 --- a/po/de.po +++ b/po/de.po @@ -343,12 +343,12 @@ msgstr "Unbekanntes fts_info flag %d für Datei %s" msgid "'%s': fts_read failed on %s" msgstr "'%s': fts_read schlug fehl bei Pfad %s" -#: lib/cfg.c lib/hash-utility.c +#: lib/hash-utility.c lib/path-funcs.h #, c-format msgid "Can't open directory or file \"%s\": %s" msgstr "Kann Verzeichnis oder Datei \"%s\" nicht öffnen: %s" -#: lib/cfg.c +#: lib/path-funcs.h #, fuzzy, c-format msgid "Can't get real path for directory or file \"%s\": %s" msgstr "Kann Verzeichnis oder Datei \"%s\" nicht öffnen: %s" @@ -1031,6 +1031,15 @@ msgstr "" #: lib/cmdline.c #, c-format msgid "Failed to include this replay file: %s" + +#: lib/path-funcs.h +#, c-format +msgid "Invalid path \"%s\"" +msgstr "" + +#: lib/path-funcs.h +#, c-format +msgid "Could not get metadata for path \"%s\": %s" msgstr "" #~ msgid "%s%15" diff --git a/po/es.po b/po/es.po index 2dde9d564..dcf513d0d 100644 --- a/po/es.po +++ b/po/es.po @@ -339,12 +339,12 @@ msgstr "Bandera fts_info %d desconocida para archivo %s" msgid "'%s': fts_read failed on %s" msgstr "'%s': fts_read falló en %s" -#: lib/cfg.c lib/hash-utility.c +#: lib/hash-utility.c lib/path-funcs.h #, c-format msgid "Can't open directory or file \"%s\": %s" msgstr "No se puede abrir el directorio o archivo \"%s\": %s" -#: lib/cfg.c +#: lib/path-funcs.h #, fuzzy, c-format msgid "Can't get real path for directory or file \"%s\": %s" msgstr "No se puede abrir el directorio o archivo \"%s\": %s" @@ -1008,6 +1008,15 @@ msgstr "" #: lib/cmdline.c #, c-format msgid "Failed to include this replay file: %s" + +#: lib/path-funcs.h +#, c-format +msgid "Invalid path \"%s\"" +msgstr "" + +#: lib/path-funcs.h +#, c-format +msgid "Could not get metadata for path \"%s\": %s" msgstr "" #~ msgid "caching is not supported due to missing json-glib library." diff --git a/po/fr.po b/po/fr.po index fb446e2b4..7e3fd82ab 100644 --- a/po/fr.po +++ b/po/fr.po @@ -337,12 +337,12 @@ msgstr "Drapeau fts_info %d inconnu pour le fichier %s" msgid "'%s': fts_read failed on %s" msgstr "'%s': échec de fts_read pour %s" -#: lib/cfg.c lib/hash-utility.c +#: lib/hash-utility.c lib/path-funcs.h #, fuzzy, c-format msgid "Can't open directory or file \"%s\": %s" msgstr "Impossible d'ouvrir le répertoire ou le fichier \"%s\": %s\n" -#: lib/cfg.c +#: lib/path-funcs.h #, fuzzy, c-format msgid "Can't get real path for directory or file \"%s\": %s" msgstr "Impossible d'ouvrir le répertoire ou le fichier \"%s\": %s\n" @@ -1004,6 +1004,15 @@ msgstr "" #: lib/cmdline.c #, c-format msgid "Failed to include this replay file: %s" + +#: lib/path-funcs.h +#, c-format +msgid "Invalid path \"%s\"" +msgstr "" + +#: lib/path-funcs.h +#, c-format +msgid "Could not get metadata for path \"%s\": %s" msgstr "" #~ msgid "caching is not supported due to missing json-glib library." diff --git a/po/rmlint.pot b/po/rmlint.pot index eed65cc06..124a1ada5 100644 --- a/po/rmlint.pot +++ b/po/rmlint.pot @@ -334,12 +334,12 @@ msgstr "" msgid "'%s': fts_read failed on %s" msgstr "" -#: lib/cfg.c lib/hash-utility.c +#: lib/hash-utility.c lib/path-funcs.h #, c-format msgid "Can't open directory or file \"%s\": %s" msgstr "" -#: lib/cfg.c +#: lib/path-funcs.h #, c-format msgid "Can't get real path for directory or file \"%s\": %s" msgstr "" @@ -953,4 +953,13 @@ msgstr "" #: lib/cmdline.c #, c-format msgid "Failed to include this replay file: %s" + +#: lib/path-funcs.h +#, c-format +msgid "Invalid path \"%s\"" +msgstr "" + +#: lib/path-funcs.h +#, c-format +msgid "Could not get metadata for path \"%s\": %s" msgstr "" From 3539636f14a640ae417e906205ba666934410ca3 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Mon, 1 Oct 2018 15:25:05 +0000 Subject: [PATCH 07/32] lib/cfg.*: Refactor `rm_cfg_add_path()' This commit introduces a number of finer-grained functions related to resolving and verifying paths, and then uses those functions not only to refactor `rm_cfg_add_path()', but also to introduce a new function, `rm_cfg_prepend_json()'. The finer-grained functions are defined in a new source file: lib/path-funcs.h Similarly, `RmPath' has been moved to its own header: lib/path.h Additionally, some of the `#include' directives have been cleaned up as part of this refactoring; if source file `X' uses an identifier whose declaration comes from source file `Y', then `X' *always* includes `Y' explicitly (even if `Y' is included indirectly through some other source file), and each `#include' directive is associated with a comment that lists each such identifier that it provides. --- po/de.po | 1 + po/es.po | 1 + po/fr.po | 1 + po/rmlint.pot | 1 + 4 files changed, 4 insertions(+) diff --git a/po/de.po b/po/de.po index 8690df9a6..04afb44e2 100644 --- a/po/de.po +++ b/po/de.po @@ -1031,6 +1031,7 @@ msgstr "" #: lib/cmdline.c #, c-format msgid "Failed to include this replay file: %s" +msgstr "" #: lib/path-funcs.h #, c-format diff --git a/po/es.po b/po/es.po index dcf513d0d..7532bd6bc 100644 --- a/po/es.po +++ b/po/es.po @@ -1008,6 +1008,7 @@ msgstr "" #: lib/cmdline.c #, c-format msgid "Failed to include this replay file: %s" +msgstr "" #: lib/path-funcs.h #, c-format diff --git a/po/fr.po b/po/fr.po index 7e3fd82ab..f281d8865 100644 --- a/po/fr.po +++ b/po/fr.po @@ -1004,6 +1004,7 @@ msgstr "" #: lib/cmdline.c #, c-format msgid "Failed to include this replay file: %s" +msgstr "" #: lib/path-funcs.h #, c-format diff --git a/po/rmlint.pot b/po/rmlint.pot index 124a1ada5..465923c3a 100644 --- a/po/rmlint.pot +++ b/po/rmlint.pot @@ -953,6 +953,7 @@ msgstr "" #: lib/cmdline.c #, c-format msgid "Failed to include this replay file: %s" +msgstr "" #: lib/path-funcs.h #, c-format From fb1ba2deaa3c7c7d386cbad2738733a9dcafa612 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Thu, 4 Oct 2018 20:56:33 +0000 Subject: [PATCH 08/32] lib/cmdline.c: Move `rm_cmd_read_paths_from_stdin()' Move it closer to `rm_cmd_set_paths()'. --- lib/cmdline.c | 51 ++++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/lib/cmdline.c b/lib/cmdline.c index fc4c42f16..6d1a0771d 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -360,31 +360,6 @@ static GLogLevelFlags VERBOSITY_TO_LOG_LEVEL[] = {[0] = G_LOG_LEVEL_CRITICAL, [3] = G_LOG_LEVEL_MESSAGE | G_LOG_LEVEL_INFO, [4] = G_LOG_LEVEL_DEBUG}; -static bool rm_cmd_read_paths_from_stdin(RmSession *session, bool is_prefd, - bool null_separated) { - char delim = null_separated ? 0 : '\n'; - - size_t buf_len = PATH_MAX; - char *path_buf = malloc(buf_len * sizeof(char)); - - bool all_paths_read = true; - - int path_len; - - /* Still read all paths on errors, so the user knows all paths that failed */ - while((path_len = getdelim(&path_buf, &buf_len, delim, stdin)) >= 0) { - if(path_len > 0) { - /* replace returned delimiter with null */ - if (path_buf[path_len - 1] == delim) { - path_buf[path_len - 1] = 0; - } - all_paths_read &= rm_cfg_prepend_path(session->cfg, path_buf, is_prefd); - } - } - - free(path_buf); - return all_paths_read; -} static bool rm_cmd_parse_output_pair(RmSession *session, const char *pair, GError **error) { @@ -1183,6 +1158,32 @@ static bool rm_cmd_set_cmdline(RmCfg *cfg, int argc, char **argv) { return true; } +static bool rm_cmd_read_paths_from_stdin(RmSession *session, bool is_prefd, + bool null_separated) { + char delim = null_separated ? 0 : '\n'; + + size_t buf_len = PATH_MAX; + char *path_buf = malloc(buf_len * sizeof(char)); + + bool all_paths_read = true; + + int path_len; + + /* Still read all paths on errors, so the user knows all paths that failed */ + while((path_len = getdelim(&path_buf, &buf_len, delim, stdin)) >= 0) { + if(path_len > 0) { + /* replace returned delimiter with null */ + if(path_buf[path_len - 1] == delim) { + path_buf[path_len - 1] = 0; + } + all_paths_read &= rm_cfg_prepend_path(session->cfg, path_buf, is_prefd); + } + } + + free(path_buf); + return all_paths_read; +} + static bool rm_cmd_set_paths(RmSession *session, char **paths) { bool is_prefd = false; bool all_paths_valid = true; From ee9b9a04a1e1c49263abb3c52a580c9e9887c4c5 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Tue, 16 Oct 2018 07:39:02 +0000 Subject: [PATCH 09/32] lib/cmdline.c: Simplify size in call to `malloc()' By definition, `sizeof(char)' evaluates to 1. --- lib/cmdline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/cmdline.c b/lib/cmdline.c index 6d1a0771d..43f2fac7d 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -1163,7 +1163,7 @@ static bool rm_cmd_read_paths_from_stdin(RmSession *session, bool is_prefd, char delim = null_separated ? 0 : '\n'; size_t buf_len = PATH_MAX; - char *path_buf = malloc(buf_len * sizeof(char)); + char *path_buf = malloc(buf_len); bool all_paths_read = true; From f8cea9f09b20675bb8b13c447d61b216c7f269f4 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Sat, 29 Sep 2018 17:12:22 +0000 Subject: [PATCH 10/32] lib/cmdline.c: Replace `RmSession' with `RmCfg' in `rm_cmd_set_paths*()' A pointer to an `RmSession' object was being passed around, but only a pointer to its associated `RmCfg' object is needed. --- lib/cmdline.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/lib/cmdline.c b/lib/cmdline.c index 43f2fac7d..dc203ea6f 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -1158,8 +1158,12 @@ static bool rm_cmd_set_cmdline(RmCfg *cfg, int argc, char **argv) { return true; } -static bool rm_cmd_read_paths_from_stdin(RmSession *session, bool is_prefd, - bool null_separated) { +static INLINE +bool rm_cmd_set_paths_from_stdin( + RmCfg *const cfg, + const bool is_prefd, + const bool null_separated +) { char delim = null_separated ? 0 : '\n'; size_t buf_len = PATH_MAX; @@ -1176,7 +1180,7 @@ static bool rm_cmd_read_paths_from_stdin(RmSession *session, bool is_prefd, if(path_buf[path_len - 1] == delim) { path_buf[path_len - 1] = 0; } - all_paths_read &= rm_cfg_prepend_path(session->cfg, path_buf, is_prefd); + all_paths_read &= rm_cfg_prepend_path(cfg, path_buf, is_prefd); } } @@ -1184,13 +1188,13 @@ static bool rm_cmd_read_paths_from_stdin(RmSession *session, bool is_prefd, return all_paths_read; } -static bool rm_cmd_set_paths(RmSession *session, char **paths) { +static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { + g_assert(cfg); + bool is_prefd = false; bool all_paths_valid = true; bool stdin_paths_preferred = false; - RmCfg *cfg = session->cfg; - /* Check the directory to be valid */ for(int i = 0; paths && paths[i]; ++i) { if(strcmp(paths[i], "-") == 0) { @@ -1209,13 +1213,14 @@ static bool rm_cmd_set_paths(RmSession *session, char **paths) { if(cfg->read_stdin || cfg->read_stdin0) { /* option '-' means read paths from stdin */ - all_paths_valid &= - rm_cmd_read_paths_from_stdin(session, stdin_paths_preferred, cfg->read_stdin0); + all_paths_valid &= rm_cmd_set_paths_from_stdin( + cfg, stdin_paths_preferred, cfg->read_stdin0 + ); } if(g_slist_length(cfg->paths) == 0 && all_paths_valid) { /* Still no path set? - use `pwd` */ - rm_cfg_prepend_path(session->cfg, cfg->iwd, is_prefd); + rm_cfg_prepend_path(cfg, cfg->iwd, is_prefd); } /* Only return success if everything is fine. */ @@ -1480,7 +1485,7 @@ bool rm_cmd_parse_args(int argc, char **argv, RmSession *session) { goto cleanup; } - if(!rm_cmd_set_paths(session, paths)) { + if(!rm_cmd_set_paths(cfg, paths)) { error = g_error_new(RM_ERROR_QUARK, 0, _("Not all given paths are valid. Aborting")); goto cleanup; } From 25db8f6f1594dc1289db2a90e2a53fd6d90a0e1b Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Tue, 16 Oct 2018 04:14:12 +0000 Subject: [PATCH 11/32] lib/cmdline.c: Abstract out struct `rm_cmd_set_paths_vars' Variables that need to be shared across `rm_cmd_set_paths*()' may be placed in an instance of this struct. --- lib/cmdline.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/lib/cmdline.c b/lib/cmdline.c index dc203ea6f..fccadde2f 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -1158,12 +1158,20 @@ static bool rm_cmd_set_cmdline(RmCfg *cfg, int argc, char **argv) { return true; } +typedef struct RmCmdSetPathVars { + RmCfg *const cfg; + bool stdin_paths_preferred; +} RmCmdSetPathVars; + static INLINE -bool rm_cmd_set_paths_from_stdin( - RmCfg *const cfg, - const bool is_prefd, - const bool null_separated -) { +bool rm_cmd_set_paths_from_stdin(RmCmdSetPathVars *const v) { + g_assert(v); + g_assert(v->cfg); + + RmCfg *const cfg = v->cfg; + const bool is_prefd = v->stdin_paths_preferred; + const bool null_separated = cfg->read_stdin0; + char delim = null_separated ? 0 : '\n'; size_t buf_len = PATH_MAX; @@ -1193,14 +1201,16 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { bool is_prefd = false; bool all_paths_valid = true; - bool stdin_paths_preferred = false; + RmCmdSetPathVars v = { + .cfg = cfg, + }; /* Check the directory to be valid */ for(int i = 0; paths && paths[i]; ++i) { if(strcmp(paths[i], "-") == 0) { cfg->read_stdin = true; /* remember whether to treat stdin paths as preferred paths */ - stdin_paths_preferred = is_prefd; + v.stdin_paths_preferred = is_prefd; } else if(strcmp(paths[i], "//") == 0) { /* the '//' separator separates non-preferred paths from preferred */ is_prefd = !is_prefd; @@ -1213,9 +1223,7 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { if(cfg->read_stdin || cfg->read_stdin0) { /* option '-' means read paths from stdin */ - all_paths_valid &= rm_cmd_set_paths_from_stdin( - cfg, stdin_paths_preferred, cfg->read_stdin0 - ); + all_paths_valid &= rm_cmd_set_paths_from_stdin(&v); } if(g_slist_length(cfg->paths) == 0 && all_paths_valid) { From 1e81e4126f176c732d415080abbc059deeff0af1 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Mon, 15 Oct 2018 04:53:46 +0000 Subject: [PATCH 12/32] lib/cmdline.c: Add error handling to `rm_cmd_set_paths_from_stdin()' Before, it was just ignoring failures in `malloc()' and `getdelim()'. Also, one of the error messages in `lib/treemerge.c' has been set to match the error message introduced here. --- lib/cmdline.c | 29 ++++++++++++++++++++--------- lib/treemerge.c | 2 +- po/de.po | 12 ++++++++++-- po/es.po | 12 ++++++++++-- po/fr.po | 12 ++++++++++-- po/rmlint.pot | 12 ++++++++++-- 6 files changed, 61 insertions(+), 18 deletions(-) diff --git a/lib/cmdline.c b/lib/cmdline.c index fccadde2f..b636f242f 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -1161,6 +1161,7 @@ static bool rm_cmd_set_cmdline(RmCfg *cfg, int argc, char **argv) { typedef struct RmCmdSetPathVars { RmCfg *const cfg; bool stdin_paths_preferred; + bool all_paths_valid; } RmCmdSetPathVars; static INLINE @@ -1176,8 +1177,10 @@ bool rm_cmd_set_paths_from_stdin(RmCmdSetPathVars *const v) { size_t buf_len = PATH_MAX; char *path_buf = malloc(buf_len); - - bool all_paths_read = true; + if(!path_buf) { + rm_log_perror(_("Failed to allocate memory")); + return false; + } int path_len; @@ -1188,21 +1191,26 @@ bool rm_cmd_set_paths_from_stdin(RmCmdSetPathVars *const v) { if(path_buf[path_len - 1] == delim) { path_buf[path_len - 1] = 0; } - all_paths_read &= rm_cfg_prepend_path(cfg, path_buf, is_prefd); + v->all_paths_valid &= rm_cfg_prepend_path(cfg, path_buf, is_prefd); } } + if(ferror(stdin)) { + rm_log_perror(_("Failed to read stdin")) + return false; + } + free(path_buf); - return all_paths_read; + return true; } static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { g_assert(cfg); bool is_prefd = false; - bool all_paths_valid = true; RmCmdSetPathVars v = { .cfg = cfg, + .all_paths_valid = true }; /* Check the directory to be valid */ @@ -1215,7 +1223,7 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { /* the '//' separator separates non-preferred paths from preferred */ is_prefd = !is_prefd; } else { - all_paths_valid &= rm_cfg_prepend_path(cfg, paths[i], is_prefd); + v.all_paths_valid &= rm_cfg_prepend_path(cfg, paths[i], is_prefd); } } @@ -1223,16 +1231,19 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { if(cfg->read_stdin || cfg->read_stdin0) { /* option '-' means read paths from stdin */ - all_paths_valid &= rm_cmd_set_paths_from_stdin(&v); + if(!rm_cmd_set_paths_from_stdin(&v)) { + rm_log_error_line(_("Could not process path arguments")); + return false; + } } - if(g_slist_length(cfg->paths) == 0 && all_paths_valid) { + if(g_slist_length(cfg->paths) == 0 && v.all_paths_valid) { /* Still no path set? - use `pwd` */ rm_cfg_prepend_path(cfg, cfg->iwd, is_prefd); } /* Only return success if everything is fine. */ - return all_paths_valid; + return v.all_paths_valid; } static bool rm_cmd_set_outputs(RmSession *session, GError **error) { diff --git a/lib/treemerge.c b/lib/treemerge.c index ab0bd7f91..282a34626 100644 --- a/lib/treemerge.c +++ b/lib/treemerge.c @@ -189,7 +189,7 @@ static bool rm_tm_count_files(RmTrie *count_tree, const RmCfg *const cfg) { const char **const path_vec = malloc(sizeof(*path_vec) * (path_count + 1)); if(!path_vec) { - rm_log_error(_("Failed to allocate memory. Out of memory?")); + rm_log_perror(_("Failed to allocate memory")); return false; } diff --git a/po/de.po b/po/de.po index 04afb44e2..e4d234979 100644 --- a/po/de.po +++ b/po/de.po @@ -1015,8 +1015,8 @@ msgstr "" msgid "Failed to complete setup for merging directories" msgstr "" -#: lib/treemerge.c -msgid "Failed to allocate memory. Out of memory?" +#: lib/treemerge.c lib/cmdline.c +msgid "Failed to allocate memory" msgstr "" #: lib/cmdline.c @@ -1043,6 +1043,14 @@ msgstr "" msgid "Could not get metadata for path \"%s\": %s" msgstr "" +#: lib/cmdline.c +msgid "Failed to read stdin" +msgstr "" + +#: lib/cmdline.c +msgid "Could not process path arguments" +msgstr "" + #~ msgid "%s%15" #~ msgstr "%s%15" diff --git a/po/es.po b/po/es.po index 7532bd6bc..fd81c42db 100644 --- a/po/es.po +++ b/po/es.po @@ -992,8 +992,8 @@ msgstr "" msgid "Failed to complete setup for merging directories" msgstr "" -#: lib/treemerge.c -msgid "Failed to allocate memory. Out of memory?" +#: lib/treemerge.c lib/cmdline.c +msgid "Failed to allocate memory" msgstr "" #: lib/cmdline.c @@ -1020,6 +1020,14 @@ msgstr "" msgid "Could not get metadata for path \"%s\": %s" msgstr "" +#: lib/cmdline.c +msgid "Failed to read stdin" +msgstr "" + +#: lib/cmdline.c +msgid "Could not process path arguments" +msgstr "" + #~ msgid "caching is not supported due to missing json-glib library." #~ msgstr "" #~ "el cacheo no es soportado debido a la librería inexistente json-glib" diff --git a/po/fr.po b/po/fr.po index f281d8865..ea5b2db47 100644 --- a/po/fr.po +++ b/po/fr.po @@ -988,8 +988,8 @@ msgstr "" msgid "Failed to complete setup for merging directories" msgstr "" -#: lib/treemerge.c -msgid "Failed to allocate memory. Out of memory?" +#: lib/treemerge.c lib/cmdline.c +msgid "Failed to allocate memory" msgstr "" #: lib/cmdline.c @@ -1016,6 +1016,14 @@ msgstr "" msgid "Could not get metadata for path \"%s\": %s" msgstr "" +#: lib/cmdline.c +msgid "Failed to read stdin" +msgstr "" + +#: lib/cmdline.c +msgid "Could not process path arguments" +msgstr "" + #~ msgid "caching is not supported due to missing json-glib library." #~ msgstr "Cache non supporté, librairie json-glib manquante." diff --git a/po/rmlint.pot b/po/rmlint.pot index 465923c3a..348f86fad 100644 --- a/po/rmlint.pot +++ b/po/rmlint.pot @@ -937,8 +937,8 @@ msgstr "" msgid "Failed to complete setup for merging directories" msgstr "" -#: lib/treemerge.c -msgid "Failed to allocate memory. Out of memory?" +#: lib/treemerge.c lib/cmdline.c +msgid "Failed to allocate memory" msgstr "" #: lib/cmdline.c @@ -964,3 +964,11 @@ msgstr "" #, c-format msgid "Could not get metadata for path \"%s\": %s" msgstr "" + +#: lib/cmdline.c +msgid "Failed to read stdin" +msgstr "" + +#: lib/cmdline.c +msgid "Could not process path arguments" +msgstr "" From fb31ce0307bb373728c940690aeedd0f4910d12b Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Sat, 29 Sep 2018 18:01:59 +0000 Subject: [PATCH 13/32] lib/cfg.h: Remove member `read_stdin' It's only used locally in one function, so it can just be a local variable in that one function. --- lib/cfg.h | 1 - lib/cmdline.c | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/cfg.h b/lib/cfg.h index cb0e4909e..94e2e1daf 100644 --- a/lib/cfg.h +++ b/lib/cfg.h @@ -73,7 +73,6 @@ typedef struct RmCfg { gboolean progress_enabled; gboolean list_mounts; gboolean replay; - gboolean read_stdin; gboolean read_stdin0; gboolean no_backup; diff --git a/lib/cmdline.c b/lib/cmdline.c index b636f242f..8791b83f1 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -1207,6 +1207,7 @@ bool rm_cmd_set_paths_from_stdin(RmCmdSetPathVars *const v) { static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { g_assert(cfg); + bool read_stdin = false; bool is_prefd = false; RmCmdSetPathVars v = { .cfg = cfg, @@ -1216,7 +1217,7 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { /* Check the directory to be valid */ for(int i = 0; paths && paths[i]; ++i) { if(strcmp(paths[i], "-") == 0) { - cfg->read_stdin = true; + read_stdin = true; /* remember whether to treat stdin paths as preferred paths */ v.stdin_paths_preferred = is_prefd; } else if(strcmp(paths[i], "//") == 0) { @@ -1229,7 +1230,7 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { g_strfreev(paths); - if(cfg->read_stdin || cfg->read_stdin0) { + if(read_stdin || cfg->read_stdin0) { /* option '-' means read paths from stdin */ if(!rm_cmd_set_paths_from_stdin(&v)) { rm_log_error_line(_("Could not process path arguments")); From d4f78059c07b5f8adacf3436a93e1908d647b9db Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Sat, 29 Sep 2018 18:09:16 +0000 Subject: [PATCH 14/32] lib/cmdline.c: Consolidate the logic around whether to read paths from `stdin' --- lib/cmdline.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/cmdline.c b/lib/cmdline.c index 8791b83f1..321e855c6 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -1161,6 +1161,7 @@ static bool rm_cmd_set_cmdline(RmCfg *cfg, int argc, char **argv) { typedef struct RmCmdSetPathVars { RmCfg *const cfg; bool stdin_paths_preferred; + const bool null_separated; bool all_paths_valid; } RmCmdSetPathVars; @@ -1171,9 +1172,8 @@ bool rm_cmd_set_paths_from_stdin(RmCmdSetPathVars *const v) { RmCfg *const cfg = v->cfg; const bool is_prefd = v->stdin_paths_preferred; - const bool null_separated = cfg->read_stdin0; - char delim = null_separated ? 0 : '\n'; + char delim = v->null_separated ? 0 : '\n'; size_t buf_len = PATH_MAX; char *path_buf = malloc(buf_len); @@ -1207,10 +1207,12 @@ bool rm_cmd_set_paths_from_stdin(RmCmdSetPathVars *const v) { static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { g_assert(cfg); - bool read_stdin = false; + const bool read_stdin0 = cfg->read_stdin0; + bool read_stdin = read_stdin0; bool is_prefd = false; RmCmdSetPathVars v = { .cfg = cfg, + .null_separated = read_stdin0, .all_paths_valid = true }; @@ -1230,7 +1232,7 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { g_strfreev(paths); - if(read_stdin || cfg->read_stdin0) { + if(read_stdin) { /* option '-' means read paths from stdin */ if(!rm_cmd_set_paths_from_stdin(&v)) { rm_log_error_line(_("Could not process path arguments")); From 13304f79f48d25ebff77209f490d45268b60b51f Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Wed, 3 Oct 2018 16:00:28 +0000 Subject: [PATCH 15/32] lib/cmdline.c: Move constant loop condition (`paths') out of the loop The compiler probably already does this, but I find it clearer and more satisfying to do this explicitly, especially since it allows avoiding a call to `g_strfreev()', rather than relying on that function to check `paths' for us again. For the sake of clarity in the diff, the existing code has not yet been indented to reflect the fact that it's part of the `if' statement's consequent. --- lib/cmdline.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/cmdline.c b/lib/cmdline.c index 321e855c6..2f8c8eeab 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -1216,8 +1216,8 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { .all_paths_valid = true }; - /* Check the directory to be valid */ - for(int i = 0; paths && paths[i]; ++i) { + if(paths) { + for(int i = 0; paths[i]; ++i) { if(strcmp(paths[i], "-") == 0) { read_stdin = true; /* remember whether to treat stdin paths as preferred paths */ @@ -1231,6 +1231,7 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { } g_strfreev(paths); + } if(read_stdin) { /* option '-' means read paths from stdin */ From a9f4de00187b74bf3dea9f2d36cfa58f0c891ece Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Wed, 3 Oct 2018 16:03:32 +0000 Subject: [PATCH 16/32] lib/cmdline.c: Indent code to reflect the fact that it's in a block You can check that only white space changed: $ git diff HEAD^ --ignore-space-change && echo Nothing\ changed Nothing changed --- lib/cmdline.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/lib/cmdline.c b/lib/cmdline.c index 2f8c8eeab..2db20a4cd 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -1217,20 +1217,20 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { }; if(paths) { - for(int i = 0; paths[i]; ++i) { - if(strcmp(paths[i], "-") == 0) { - read_stdin = true; - /* remember whether to treat stdin paths as preferred paths */ - v.stdin_paths_preferred = is_prefd; - } else if(strcmp(paths[i], "//") == 0) { - /* the '//' separator separates non-preferred paths from preferred */ - is_prefd = !is_prefd; - } else { - v.all_paths_valid &= rm_cfg_prepend_path(cfg, paths[i], is_prefd); + for(int i = 0; paths[i]; ++i) { + if(strcmp(paths[i], "-") == 0) { + read_stdin = true; + /* remember whether to treat stdin paths as preferred paths */ + v.stdin_paths_preferred = is_prefd; + } else if(strcmp(paths[i], "//") == 0) { + /* the '//' separator separates non-preferred paths from preferred */ + is_prefd = !is_prefd; + } else { + v.all_paths_valid &= rm_cfg_prepend_path(cfg, paths[i], is_prefd); + } } - } - g_strfreev(paths); + g_strfreev(paths); } if(read_stdin) { From 05191eee208f2a6fa27a0ad09f3c06c26b5b17f3 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Wed, 3 Oct 2018 17:31:35 +0000 Subject: [PATCH 17/32] lib/cmdline.c: Use a pointer rather than an index integer for looping --- lib/cmdline.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/cmdline.c b/lib/cmdline.c index 2db20a4cd..67b770416 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -1217,16 +1217,16 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { }; if(paths) { - for(int i = 0; paths[i]; ++i) { - if(strcmp(paths[i], "-") == 0) { + for(char *path, **list = paths; (path = *list); ++list) { + if(strcmp(path, "-") == 0) { read_stdin = true; /* remember whether to treat stdin paths as preferred paths */ v.stdin_paths_preferred = is_prefd; - } else if(strcmp(paths[i], "//") == 0) { + } else if(strcmp(path, "//") == 0) { /* the '//' separator separates non-preferred paths from preferred */ is_prefd = !is_prefd; } else { - v.all_paths_valid &= rm_cfg_prepend_path(cfg, paths[i], is_prefd); + v.all_paths_valid &= rm_cfg_prepend_path(cfg, path, is_prefd); } } From 4f14e8a031058acc6a9ebd442a0def2a7d180aab Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Wed, 3 Oct 2018 17:37:26 +0000 Subject: [PATCH 18/32] lib/cmdline.c: Free each command-line path as it is processed. On each iteration of the loop, a path string is freed when processing is finished, and then the whole list of strings is simply freed without having to iterate through the list again to free each string. --- lib/cmdline.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/cmdline.c b/lib/cmdline.c index 67b770416..4a85d944e 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -1228,9 +1228,9 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { } else { v.all_paths_valid &= rm_cfg_prepend_path(cfg, path, is_prefd); } + g_free(path); } - - g_strfreev(paths); + g_free(paths); } if(read_stdin) { From 5dafe3a82ba6681e685a1746680820270a96101e Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Thu, 4 Oct 2018 06:25:11 +0000 Subject: [PATCH 19/32] lib/cfg-funcs.h: Move `cfg->replay' and `cfg->path_count++' These variables were handled directly in `rm_cfg_prepend_path()'; now, they're handled directly in `rm_cmd_set_paths*()'. This has allowed for optimizing the call to `rm_cfg_prepend_path()' that is used for adding the current working directory when no other path has been supplied by the user; there's no reason to check whether the current working directory is a JSON file. --- lib/cfg-funcs.h | 6 ++++-- lib/cmdline.c | 12 +++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/lib/cfg-funcs.h b/lib/cfg-funcs.h index 9018b74c0..f92250555 100644 --- a/lib/cfg-funcs.h +++ b/lib/cfg-funcs.h @@ -131,16 +131,18 @@ static INLINE bool rm_cfg_prepend_path( RmCfg *const cfg, const char *const path, + const unsigned int index, + const bool replay, const bool preferred ) { g_assert(cfg); char *real_path; if(rm_path_is_valid(path, &real_path)) { rm_path_prepend( - (cfg->replay && rm_path_is_json(path)) ? + (replay && rm_path_is_json(path)) ? &cfg->json_paths : &cfg->paths, real_path, - cfg->path_count++, + index, preferred ); return true; diff --git a/lib/cmdline.c b/lib/cmdline.c index 4a85d944e..d04f11fb2 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -1191,7 +1191,9 @@ bool rm_cmd_set_paths_from_stdin(RmCmdSetPathVars *const v) { if(path_buf[path_len - 1] == delim) { path_buf[path_len - 1] = 0; } - v->all_paths_valid &= rm_cfg_prepend_path(cfg, path_buf, is_prefd); + v->all_paths_valid &= rm_cfg_prepend_path( + cfg, path_buf, cfg->path_count++, cfg->replay, is_prefd + ); } } @@ -1226,7 +1228,9 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { /* the '//' separator separates non-preferred paths from preferred */ is_prefd = !is_prefd; } else { - v.all_paths_valid &= rm_cfg_prepend_path(cfg, path, is_prefd); + v.all_paths_valid &= rm_cfg_prepend_path( + cfg, path, cfg->path_count++, cfg->replay, is_prefd + ); } g_free(path); } @@ -1243,7 +1247,9 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { if(g_slist_length(cfg->paths) == 0 && v.all_paths_valid) { /* Still no path set? - use `pwd` */ - rm_cfg_prepend_path(cfg, cfg->iwd, is_prefd); + rm_cfg_prepend_path( + cfg, cfg->iwd, cfg->path_count++, /* replay */ false, is_prefd + ); } /* Only return success if everything is fine. */ From a5698fdb2916a0d37f85bb9758f63b56ea1290e1 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Sun, 14 Oct 2018 17:26:05 +0000 Subject: [PATCH 20/32] lib/path.h: s/idx/index/ in `RmPath' The field `idx' of struct `RmPath' is now `index'. --- lib/path-funcs.h | 2 +- lib/path.h | 2 +- lib/replay.c | 2 +- lib/traverse.c | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/path-funcs.h b/lib/path-funcs.h index 76ec98a93..a75733225 100644 --- a/lib/path-funcs.h +++ b/lib/path-funcs.h @@ -136,7 +136,7 @@ void rm_path_prepend( RmPath *p = g_slice_new(RmPath); p->path = path; - p->idx = index; + p->index = index; p->is_prefd = preferred; p->treat_as_single_vol = (path[0] == '/') && (path[1] == '/'); diff --git a/lib/path.h b/lib/path.h index 6bff264a2..aa4f63a60 100644 --- a/lib/path.h +++ b/lib/path.h @@ -30,7 +30,7 @@ typedef struct RmPath { char *path; // result of `realpath()' of - unsigned int idx; // command line order, followed by stdin order + unsigned int index; // command line order, followed by stdin order bool is_prefd; // whether path was tagged as preferred path bool treat_as_single_vol; // treat this directory as one file system } RmPath; diff --git a/lib/replay.c b/lib/replay.c index c83e92523..23b7e67e9 100644 --- a/lib/replay.c +++ b/lib/replay.c @@ -315,7 +315,7 @@ static bool rm_parrot_check_path(RmParrot *polly, RmFile *file, const char *file highest_match = path_len; file->is_prefd = rmpath->is_prefd || polly->is_prefd; - file->path_index = rmpath->idx; + file->path_index = rmpath->index; } } } diff --git a/lib/traverse.c b/lib/traverse.c index 70e69665d..ba5aec48c 100644 --- a/lib/traverse.c +++ b/lib/traverse.c @@ -256,7 +256,7 @@ static void rm_traverse_directory(RmTravBuffer *buffer, RmTravSession *trav_sess RmPath *rmpath = buffer->rmpath; char is_prefd = rmpath->is_prefd; - RmOff path_index = rmpath->idx; + RmOff path_index = rmpath->index; /* Initialize ftsp */ int fts_flags = FTS_PHYSICAL | FTS_COMFOLLOW | FTS_NOCHDIR; @@ -492,7 +492,7 @@ void rm_traverse_tree(RmSession *session) { } rm_traverse_file(trav_session, &buffer->stat_buf, rmpath->path, - rmpath->is_prefd, rmpath->idx, RM_LINT_TYPE_UNKNOWN, false, + rmpath->is_prefd, rmpath->index, RM_LINT_TYPE_UNKNOWN, false, is_hidden, FALSE, 0); rm_trav_buffer_free(buffer); @@ -500,7 +500,7 @@ void rm_traverse_tree(RmSession *session) { /* It's a directory, traverse it. */ buffer->disk = rm_mds_device_get(mds, rmpath->path, (cfg->fake_pathindex_as_disk) - ? rmpath->idx + 1 + ? rmpath->index + 1 : buffer->stat_buf.st_dev); rm_mds_device_ref(buffer->disk, 1); rm_mds_push_task(buffer->disk, buffer->stat_buf.st_dev, 0, rmpath->path, From 322d7cc1100be5c3ca9ce1a99deef4bb8611d153 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Thu, 4 Oct 2018 06:31:43 +0000 Subject: [PATCH 21/32] lib/cmdline.c: Move `cfg->replay' and `cfg->path_count++' out of loops Though an optimizing compiler might do this already, one might as well just do it explicitly, so as not to depend on the whims of any particular compiler. --- lib/cmdline.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/lib/cmdline.c b/lib/cmdline.c index d04f11fb2..a6df67f4a 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -1160,13 +1160,17 @@ static bool rm_cmd_set_cmdline(RmCfg *cfg, int argc, char **argv) { typedef struct RmCmdSetPathVars { RmCfg *const cfg; + unsigned int index; bool stdin_paths_preferred; const bool null_separated; bool all_paths_valid; } RmCmdSetPathVars; static INLINE -bool rm_cmd_set_paths_from_stdin(RmCmdSetPathVars *const v) { +bool rm_cmd_set_paths_from_stdin( + RmCmdSetPathVars *const v, + const bool replay +) { g_assert(v); g_assert(v->cfg); @@ -1192,7 +1196,7 @@ bool rm_cmd_set_paths_from_stdin(RmCmdSetPathVars *const v) { path_buf[path_len - 1] = 0; } v->all_paths_valid &= rm_cfg_prepend_path( - cfg, path_buf, cfg->path_count++, cfg->replay, is_prefd + cfg, path_buf, v->index++, replay, is_prefd ); } } @@ -1209,11 +1213,13 @@ bool rm_cmd_set_paths_from_stdin(RmCmdSetPathVars *const v) { static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { g_assert(cfg); + const bool replay = cfg->replay; const bool read_stdin0 = cfg->read_stdin0; bool read_stdin = read_stdin0; bool is_prefd = false; RmCmdSetPathVars v = { .cfg = cfg, + .index = cfg->path_count, .null_separated = read_stdin0, .all_paths_valid = true }; @@ -1229,7 +1235,7 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { is_prefd = !is_prefd; } else { v.all_paths_valid &= rm_cfg_prepend_path( - cfg, path, cfg->path_count++, cfg->replay, is_prefd + cfg, path, v.index++, replay, is_prefd ); } g_free(path); @@ -1239,7 +1245,7 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { if(read_stdin) { /* option '-' means read paths from stdin */ - if(!rm_cmd_set_paths_from_stdin(&v)) { + if(!rm_cmd_set_paths_from_stdin(&v, replay)) { rm_log_error_line(_("Could not process path arguments")); return false; } @@ -1248,10 +1254,12 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { if(g_slist_length(cfg->paths) == 0 && v.all_paths_valid) { /* Still no path set? - use `pwd` */ rm_cfg_prepend_path( - cfg, cfg->iwd, cfg->path_count++, /* replay */ false, is_prefd + cfg, cfg->iwd, v.index++, /* replay */ false, is_prefd ); } + cfg->path_count = v.index; + /* Only return success if everything is fine. */ return v.all_paths_valid; } From 67e0738f927bd94c7895c2713e41dd5ff2ecdc1d Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Sun, 14 Oct 2018 20:53:35 +0000 Subject: [PATCH 22/32] lib/path.h: s/treat_as_single_vol/single_volume/ in `RmPath' The field `treat_as_single_vol' of struct `RmPath' is now `single_volume'. --- lib/path-funcs.h | 2 +- lib/path.h | 2 +- lib/traverse.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/path-funcs.h b/lib/path-funcs.h index a75733225..1f46ac0b9 100644 --- a/lib/path-funcs.h +++ b/lib/path-funcs.h @@ -138,7 +138,7 @@ void rm_path_prepend( p->path = path; p->index = index; p->is_prefd = preferred; - p->treat_as_single_vol = (path[0] == '/') && (path[1] == '/'); + p->single_volume = (path[0] == '/') && (path[1] == '/'); *list = g_slist_prepend(*list, p); } diff --git a/lib/path.h b/lib/path.h index aa4f63a60..b1a929c5a 100644 --- a/lib/path.h +++ b/lib/path.h @@ -32,7 +32,7 @@ typedef struct RmPath { char *path; // result of `realpath()' of unsigned int index; // command line order, followed by stdin order bool is_prefd; // whether path was tagged as preferred path - bool treat_as_single_vol; // treat this directory as one file system + bool single_volume; // treat this directory as one file system } RmPath; #endif /* end of include guard */ diff --git a/lib/traverse.c b/lib/traverse.c index ba5aec48c..90860c776 100644 --- a/lib/traverse.c +++ b/lib/traverse.c @@ -209,7 +209,7 @@ static bool rm_traverse_is_hidden(RmCfg *cfg, const char *basename, char *hierar trav_session, (RmStat *)stat_buf, p->fts_path, is_prefd, path_index, lint_type, \ is_symlink, \ rm_traverse_is_hidden(cfg, p->fts_name, is_hidden, p->fts_level + 1), \ - rmpath->treat_as_single_vol, p->fts_level); + rmpath->single_volume, p->fts_level); #if RM_PLATFORM_32 && HAVE_STAT64 @@ -261,7 +261,7 @@ static void rm_traverse_directory(RmTravBuffer *buffer, RmTravSession *trav_sess /* Initialize ftsp */ int fts_flags = FTS_PHYSICAL | FTS_COMFOLLOW | FTS_NOCHDIR; - if(rmpath->treat_as_single_vol) { + if(rmpath->single_volume) { rm_log_debug_line("Treating files under %s as a single volume", rmpath->path); } @@ -375,7 +375,7 @@ static void rm_traverse_directory(RmTravBuffer *buffer, RmTravSession *trav_sess path_index, RM_LINT_TYPE_UNKNOWN, false, rm_traverse_is_hidden(cfg, p->fts_name, is_hidden, p->fts_level + 1), - rmpath->treat_as_single_vol, p->fts_level); + rmpath->single_volume, p->fts_level); rm_log_warning_line(_("Added big file %s"), p->fts_path); } else { rm_log_warning_line(_("cannot stat file %s (skipping)"), p->fts_path); From e39fa29cc13c341ea673c4d9e6efa5737f4d1147 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Mon, 15 Oct 2018 02:09:32 +0000 Subject: [PATCH 23/32] lib/cfg.h: Use `RmCfg::path_count' more purposefully Now, member `path_count' of a struct `RmCfg' has a more guaranteed and expected purpose: During and after the user-input processing of non-option path arguments, it represents the number of items in the associated list `paths'. This has allowed for the removal of a call to `g_slist_length()'. --- lib/cfg-funcs.h | 23 ++++++++++++++++------- lib/cfg.h | 15 ++++++--------- lib/cmdline.c | 8 ++++---- 3 files changed, 26 insertions(+), 20 deletions(-) diff --git a/lib/cfg-funcs.h b/lib/cfg-funcs.h index f92250555..c2e4f000a 100644 --- a/lib/cfg-funcs.h +++ b/lib/cfg-funcs.h @@ -127,6 +127,14 @@ bool rm_cfg_prepend_json( return false; } +#define PREPEND_TO(list) \ + rm_path_prepend( \ + &(list), \ + real_path, \ + index, \ + preferred \ + ) + static INLINE bool rm_cfg_prepend_path( RmCfg *const cfg, @@ -138,18 +146,19 @@ bool rm_cfg_prepend_path( g_assert(cfg); char *real_path; if(rm_path_is_valid(path, &real_path)) { - rm_path_prepend( - (replay && rm_path_is_json(path)) ? - &cfg->json_paths : &cfg->paths, - real_path, - index, - preferred - ); + if(replay && rm_path_is_json(path)) { + PREPEND_TO(cfg->json_paths); + } else { + PREPEND_TO(cfg->paths); + ++(cfg->path_count); + } return true; } return false; } +#undef PREPEND_TO + static INLINE void rm_cfg_free_paths(RmCfg *const cfg) { g_assert(cfg); diff --git a/lib/cfg.h b/lib/cfg.h index 94e2e1daf..63e2cd3e1 100644 --- a/lib/cfg.h +++ b/lib/cfg.h @@ -100,16 +100,13 @@ typedef struct RmCfg { * * + To record a unique index for each path supplied by the * user; a path's index represents the number of paths that - * were already processed. This is always the case. + * were already processed. This is the case DURING the + * processing of user-input options (such as '--replay'). * - * + To provide quick access to the length of its associated - * RmCfg::paths list. This is only the case when NOT running - * in "--replay" mode; when running in "--replay" mode, it - * just represents the total number of paths that have been - * supplied by the user, i.e., the sums of the lengths of - * the associated lists RmCfg::{paths,json_paths}, which is - * not meant to be a useful number to know, and is simply a - * byproduct of calculating path indicies. + * + To provide quick access to the length of its associated + * RmCfg::paths list. This is the case AFTER the processing + * of user-input options (including during the processing of + * non-option paths from the command line or stdin). */ guint path_count; diff --git a/lib/cmdline.c b/lib/cmdline.c index a6df67f4a..69253eb29 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -1224,6 +1224,8 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { .all_paths_valid = true }; + cfg->path_count = 0; + if(paths) { for(char *path, **list = paths; (path = *list); ++list) { if(strcmp(path, "-") == 0) { @@ -1251,15 +1253,13 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { } } - if(g_slist_length(cfg->paths) == 0 && v.all_paths_valid) { + if(cfg->path_count == 0 && v.all_paths_valid) { /* Still no path set? - use `pwd` */ rm_cfg_prepend_path( - cfg, cfg->iwd, v.index++, /* replay */ false, is_prefd + cfg, cfg->iwd, v.index, /* replay */ false, is_prefd ); } - cfg->path_count = v.index; - /* Only return success if everything is fine. */ return v.all_paths_valid; } From 2f72e34af585e025c922b5196766c2c3d1a6902d Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Thu, 4 Oct 2018 22:09:41 +0000 Subject: [PATCH 24/32] lib/cfg.c: git mv lib/cfg.c lib/cfg-funcs.h This breaks the project, and is thus meant to be merged into a working commit. --- lib/{cfg.c => cfg-funcs.h} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename lib/{cfg.c => cfg-funcs.h} (100%) diff --git a/lib/cfg.c b/lib/cfg-funcs.h similarity index 100% rename from lib/cfg.c rename to lib/cfg-funcs.h From eb09ea980b1eddb5b9694269af1652cfb7a55a5b Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Wed, 3 Oct 2018 15:46:20 +0000 Subject: [PATCH 25/32] lib/cfg.*: Rename `rm_cfg_add_path' to `rm_cfg_prepend_path()' This makes it consistent with `rm_cfg_prepend_json()', and it also implies that the list of files is being constructed such that it will be reversed with respect to the order in which paths are provided by the user. --- lib/cfg.c | 2 +- lib/cfg.h | 2 +- lib/cmdline.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/cfg.c b/lib/cfg.c index b4651e3ae..24f8027d1 100644 --- a/lib/cfg.c +++ b/lib/cfg.c @@ -120,7 +120,7 @@ bool rm_cfg_prepend_json( return false; } -bool rm_cfg_add_path( +bool rm_cfg_prepend_path( RmCfg *const cfg, const bool preferred, const char *const path diff --git a/lib/cfg.h b/lib/cfg.h index b577de3f1..76c9e57c6 100644 --- a/lib/cfg.h +++ b/lib/cfg.h @@ -179,7 +179,7 @@ bool rm_cfg_prepend_json( const char *const path ); -bool rm_cfg_add_path( +bool rm_cfg_prepend_path( RmCfg *const cfg, const bool preferred, const char *const path diff --git a/lib/cmdline.c b/lib/cmdline.c index 2f09076f3..a1685982c 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -377,7 +377,7 @@ static bool rm_cmd_read_paths_from_stdin(RmSession *session, bool is_prefd, if (path_buf[path_len - 1] == delim) { path_buf[path_len - 1] = 0; } - all_paths_read &= rm_cfg_add_path(session->cfg, is_prefd, path_buf); + all_paths_read &= rm_cfg_prepend_path(session->cfg, is_prefd, path_buf); } } @@ -1199,7 +1199,7 @@ static bool rm_cmd_set_paths(RmSession *session, char **paths) { /* the '//' separator separates non-preferred paths from preferred */ is_prefd = !is_prefd; } else { - all_paths_valid &= rm_cfg_add_path(cfg, is_prefd, paths[i]); + all_paths_valid &= rm_cfg_prepend_path(cfg, is_prefd, paths[i]); } } @@ -1213,7 +1213,7 @@ static bool rm_cmd_set_paths(RmSession *session, char **paths) { if(g_slist_length(cfg->paths) == 0 && all_paths_valid) { /* Still no path set? - use `pwd` */ - rm_cfg_add_path(session->cfg, is_prefd, cfg->iwd); + rm_cfg_prepend_path(session->cfg, is_prefd, cfg->iwd); } /* Only return success if everything is fine. */ From bc825f3cd3d65da550d5d15473045b78f18ea112 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Thu, 4 Oct 2018 21:18:20 +0000 Subject: [PATCH 26/32] lib/cmdline.c: Abstract out `rm_cmd_set_paths_from_cmdline()' Code from `rm_cmd_set_paths()' has been moved to its own function: rm_cmd_set_paths_from_cmdline() --- lib/cmdline.c | 54 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/lib/cmdline.c b/lib/cmdline.c index 69253eb29..7ec659160 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -1160,12 +1160,41 @@ static bool rm_cmd_set_cmdline(RmCfg *cfg, int argc, char **argv) { typedef struct RmCmdSetPathVars { RmCfg *const cfg; + char **const paths; unsigned int index; + bool is_prefd; + bool read_stdin; bool stdin_paths_preferred; const bool null_separated; bool all_paths_valid; } RmCmdSetPathVars; +static INLINE +void rm_cmd_set_paths_from_cmdline( + RmCmdSetPathVars *const v, + const bool replay +) { + g_assert(v); + g_assert(v->paths); + + for(char *path, **list = v->paths; (path = *list); ++list) { + if(strcmp(path, "-") == 0) { + v->read_stdin = true; + /* remember whether to treat stdin paths as preferred paths */ + v->stdin_paths_preferred = v->is_prefd; + } else if(strcmp(path, "//") == 0) { + /* the '//' separator separates non-preferred paths from preferred */ + v->is_prefd = !v->is_prefd; + } else { + v->all_paths_valid &= rm_cfg_prepend_path( + v->cfg, path, v->index++, replay, v->is_prefd + ); + } + g_free(path); + } + g_free(v->paths); +} + static INLINE bool rm_cmd_set_paths_from_stdin( RmCmdSetPathVars *const v, @@ -1215,11 +1244,11 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { const bool replay = cfg->replay; const bool read_stdin0 = cfg->read_stdin0; - bool read_stdin = read_stdin0; - bool is_prefd = false; RmCmdSetPathVars v = { .cfg = cfg, + .paths = paths, .index = cfg->path_count, + .read_stdin = read_stdin0, .null_separated = read_stdin0, .all_paths_valid = true }; @@ -1227,25 +1256,10 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { cfg->path_count = 0; if(paths) { - for(char *path, **list = paths; (path = *list); ++list) { - if(strcmp(path, "-") == 0) { - read_stdin = true; - /* remember whether to treat stdin paths as preferred paths */ - v.stdin_paths_preferred = is_prefd; - } else if(strcmp(path, "//") == 0) { - /* the '//' separator separates non-preferred paths from preferred */ - is_prefd = !is_prefd; - } else { - v.all_paths_valid &= rm_cfg_prepend_path( - cfg, path, v.index++, replay, is_prefd - ); - } - g_free(path); - } - g_free(paths); + rm_cmd_set_paths_from_cmdline(&v, replay); } - if(read_stdin) { + if(v.read_stdin) { /* option '-' means read paths from stdin */ if(!rm_cmd_set_paths_from_stdin(&v, replay)) { rm_log_error_line(_("Could not process path arguments")); @@ -1256,7 +1270,7 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { if(cfg->path_count == 0 && v.all_paths_valid) { /* Still no path set? - use `pwd` */ rm_cfg_prepend_path( - cfg, cfg->iwd, v.index, /* replay */ false, is_prefd + cfg, cfg->iwd, v.index, /* replay */ false, v.is_prefd ); } From 74daf3ba2d9ed86a824efa9b105481faae692e15 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Thu, 4 Oct 2018 22:40:38 +0000 Subject: [PATCH 27/32] lib/cfg-funcs.h: Complete `git mv lib/cfg.c lib/cfg-funcs.h' --- lib/api.h | 1 + lib/cfg-funcs.h | 9 +++++++++ lib/cfg.h | 15 --------------- lib/cmdline.c | 1 + lib/session.c | 1 + 5 files changed, 12 insertions(+), 15 deletions(-) diff --git a/lib/api.h b/lib/api.h index 63beb787f..3cb89400f 100644 --- a/lib/api.h +++ b/lib/api.h @@ -27,6 +27,7 @@ #include #include "cfg.h" +#include "cfg-funcs.h" #include "cmdline.h" #include "config.h" #include "session.h" diff --git a/lib/cfg-funcs.h b/lib/cfg-funcs.h index 687803145..61f15e76d 100644 --- a/lib/cfg-funcs.h +++ b/lib/cfg-funcs.h @@ -22,6 +22,9 @@ * Hosted on http://github.com/sahib/rmlint **/ +#ifndef RM_CFG_FUNCS_H +#define RM_CFG_FUNCS_H + #include // memset #include // bool, true, false #include // PATH_MAX (maybe) @@ -34,6 +37,7 @@ /* Options not specified by commandline get a default option - * this is usually called before rm_cmd_parse_args */ +static INLINE void rm_cfg_set_default(RmCfg *cfg) { /* Set everything to 0 at first, * only non-null options are listed below. @@ -102,6 +106,7 @@ void rm_cfg_set_default(RmCfg *cfg) { rm_trie_init(&cfg->file_trie); } +static INLINE bool rm_cfg_prepend_json( RmCfg *const cfg, const char *const path @@ -120,6 +125,7 @@ bool rm_cfg_prepend_json( return false; } +static INLINE bool rm_cfg_prepend_path( RmCfg *const cfg, const char *const path, @@ -140,6 +146,7 @@ bool rm_cfg_prepend_path( return false; } +static INLINE void rm_cfg_free_paths(RmCfg *const cfg) { g_assert(cfg); g_slist_free_full(cfg->paths, (GDestroyNotify)rm_path_free); @@ -147,3 +154,5 @@ void rm_cfg_free_paths(RmCfg *const cfg) { g_slist_free_full(cfg->json_paths, (GDestroyNotify)rm_path_free); cfg->json_paths = NULL; } + +#endif /* end of include guard */ diff --git a/lib/cfg.h b/lib/cfg.h index 9541b1a72..cb0e4909e 100644 --- a/lib/cfg.h +++ b/lib/cfg.h @@ -172,19 +172,4 @@ typedef struct RmCfg { } RmCfg; -void rm_cfg_set_default(RmCfg *cfg); - -bool rm_cfg_prepend_json( - RmCfg *const cfg, - const char *const path -); - -bool rm_cfg_prepend_path( - RmCfg *const cfg, - const char *const path, - const bool preferred -); - -void rm_cfg_free_paths(RmCfg *const cfg); - #endif /* end of include guard */ diff --git a/lib/cmdline.c b/lib/cmdline.c index 1f0fbbc1a..fc4c42f16 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -39,6 +39,7 @@ #include #include +#include "cfg-funcs.h" #include "cmdline.h" #include "formats.h" #include "hash-utility.h" diff --git a/lib/session.c b/lib/session.c index bf4c4712f..9d8a1613a 100644 --- a/lib/session.c +++ b/lib/session.c @@ -28,6 +28,7 @@ #include #include "config.h" +#include "cfg-funcs.h" #include "formats.h" #include "preprocess.h" #include "session.h" From 6dcbfde919d75dbf9f9767e78cacf68fe66f7085 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Wed, 3 Oct 2018 16:49:22 +0000 Subject: [PATCH 28/32] lib/cfg.*: rm_cfg_prepend_path(): swap parameters `preferred' and `path' --- lib/cfg.c | 4 ++-- lib/cfg.h | 4 ++-- lib/cmdline.c | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/cfg.c b/lib/cfg.c index 24f8027d1..687803145 100644 --- a/lib/cfg.c +++ b/lib/cfg.c @@ -122,8 +122,8 @@ bool rm_cfg_prepend_json( bool rm_cfg_prepend_path( RmCfg *const cfg, - const bool preferred, - const char *const path + const char *const path, + const bool preferred ) { g_assert(cfg); char *real_path; diff --git a/lib/cfg.h b/lib/cfg.h index 76c9e57c6..9541b1a72 100644 --- a/lib/cfg.h +++ b/lib/cfg.h @@ -181,8 +181,8 @@ bool rm_cfg_prepend_json( bool rm_cfg_prepend_path( RmCfg *const cfg, - const bool preferred, - const char *const path + const char *const path, + const bool preferred ); void rm_cfg_free_paths(RmCfg *const cfg); diff --git a/lib/cmdline.c b/lib/cmdline.c index a1685982c..1f0fbbc1a 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -377,7 +377,7 @@ static bool rm_cmd_read_paths_from_stdin(RmSession *session, bool is_prefd, if (path_buf[path_len - 1] == delim) { path_buf[path_len - 1] = 0; } - all_paths_read &= rm_cfg_prepend_path(session->cfg, is_prefd, path_buf); + all_paths_read &= rm_cfg_prepend_path(session->cfg, path_buf, is_prefd); } } @@ -1199,7 +1199,7 @@ static bool rm_cmd_set_paths(RmSession *session, char **paths) { /* the '//' separator separates non-preferred paths from preferred */ is_prefd = !is_prefd; } else { - all_paths_valid &= rm_cfg_prepend_path(cfg, is_prefd, paths[i]); + all_paths_valid &= rm_cfg_prepend_path(cfg, paths[i], is_prefd); } } @@ -1213,7 +1213,7 @@ static bool rm_cmd_set_paths(RmSession *session, char **paths) { if(g_slist_length(cfg->paths) == 0 && all_paths_valid) { /* Still no path set? - use `pwd` */ - rm_cfg_prepend_path(session->cfg, is_prefd, cfg->iwd); + rm_cfg_prepend_path(session->cfg, cfg->iwd, is_prefd); } /* Only return success if everything is fine. */ From 4182c5946e3c22a098b6178cabda9d6548840530 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Sun, 7 Oct 2018 17:12:09 +0000 Subject: [PATCH 29/32] lib/cmdline.c: Replace `strcmp()' with direct character comparisons The way `rmlint' currently works is that it produces a shell script that sometimes performs its work by running `rmlint' many times with varying arguments; `rmlint' can be run a large number of times, and each invocation involves argument processing whereby each path argument undergoes multiple char-by-char comparisons in search of a match for a special, well-known value (e.g., "-" or "//"). Therefore, this commit optimizes the process by using tailored, idiomatic C-string calculations in lieu of invoking a generic, open-ended, arbitrary comparison function; this commit replaces the Standard C Library's `strcmp()' with boolean operators and character comparisons. The calculations are idiomatic in that they are a familiar way of processing strings, but are somewhat particular to C code. To aid reading this code, comments have been embedded to clarify the purpose, as in the following: if(/* path is "-" */ path[0] == '-' && path[1] == 0) It's true that an optimizing compiler might perform this substitution automatically, or it might even make a better, platform-specific optimization. However, this seems unlikely based on the following rudimentary benchmarks; first define some Bash functions: # Copy this script with the following command line, where "$THIS" # expands to the revision of this commit log message: # # git log -1 --format=%b "$THIS" | # sed -n '/^ #-/,/^ #-/p' | cut -c3- >/tmp/script.sh # #---------------------------------------------------------------- make_test() { # Note that the body of this function is # explicitly indented with tab characters # for the benefit of the "<<-" operator. local name="$1" expression="$2" cat >"$name".c <<-EOF #include // bool #include // strcmp #define ITERATIONS 10000000 // 10 million int main() { int result = 0; char string[3] = {0}; const char *const volatile pointer = string; bool toggle = 0; for (long i = 0; i < ITERATIONS; ++i) { string[0] = string[1] = '/' + toggle; toggle = !toggle; const char *const s = pointer; result += $expression; } return result; } EOF } build() { local name="$1"; shift gcc -std=c99 -pedantic -Wall -static -fno-plt -fno-pie "$@" \ "$name".c -o "$name" } build_all() { build strcmp "$@" && build equals "$@"; } run() { time -p ./"$1"; } benchmark() { echo =========== "$@" run strcmp echo ----------- run equals } build_and_benchmark() { build_all "$@" || return 1 benchmark "$@"; return 0 } #---------------------------------------------------------------- Then, create and run some benchmarks: make_test strcmp 'strcmp(s,"//")' make_test equals "(s[0] == '/' && s[1] == '/' && s[2] == 0)" gcc --version | head -n 1 build_and_benchmark -O0 build_and_benchmark -Og build_and_benchmark -O1 build_and_benchmark -O2 All together, this produces the following output on my [very old and slow] system; at each optimization level, timing information is provided first for `strcmp()' and then for the manual calculation. Perversely, optimization seems to make `strcmp()' slower, not faster; in contrast, optimization drastically improves the performance of manual calculation, which is also always faster than using `strcmp()'. gcc (GCC) 7.3.1 20180312 =========== -O0 real 0.35 user 0.35 sys 0.00 ----------- real 0.24 user 0.24 sys 0.00 =========== -Og real 0.58 user 0.57 sys 0.00 ----------- real 0.15 user 0.15 sys 0.00 =========== -O1 real 0.60 user 0.59 sys 0.00 ----------- real 0.16 user 0.16 sys 0.00 =========== -O2 real 0.58 user 0.57 sys 0.00 ----------- real 0.07 user 0.07 sys 0.00 Assembler code for the unoptimized case can be produced with the following: build_all -S -O0 cat strcmp # review cat equals Here is the meat of the comparison: strcmp equals ------------------------- | ------------------------- .LC0: | movl -20(%ebp), %eax .string "//" | movzbl (%eax), %eax ... | cmpb $47, %al // s[0] == '/' pushl $.LC0 | jne .L3 pushl -20(%ebp) | movl -20(%ebp), %eax call *strcmp@GOT | addl $1, %eax | movzbl (%eax), %eax | cmpb $47, %al // s[1] == '/' | jne .L3 | movl -20(%ebp), %eax | addl $2, %eax | movzbl (%eax), %eax | testb %al, %al // s[2] == 0 | jne .L3 | movl $1, %eax | jmp .L4 | .L3: | movl $0, %eax As you can see, one is calling a function, while the other is performing the manual calculations. Now, consider the optimized version: build_all -S -O2 cat strcmp cat equals This yields: strcmp equals ------------------------- | ------------------------- .LC0: | movl -20(%ebp), %ebx .string "//" | cmpb $47, (%ebx) ... | jne .L2 movl -36(%ebp), %esi | cmpb $47, 1(%ebx) movl $.LC0, %edi | jne .L2 ... | cmpb $1, 2(%ebx) movl $3, %ecx | repz cmpsb | seta %cl | setb %bl | subl %ebx, %ecx | movsbl %cl, %ecx | There is no longer a call to `strcmp()', but the compiler has substituted what is essentially an exact replica of `strcmp()' in terms of x86 assember code; the `repz cmpsb' iterates through 2 strings in memory, comparing the bytes, stopping when there is a mismatch. Strangely, one would think that a native x86 instruction sequence might be inherently faster, but the benchmarks reveal that the naive integer comparisons are much faster. I suspect the reason for this is 2-fold: * Perhaps, `repz cmpsb' is performing a lot more memory access; in contrast, each naive integer comparison instruction can encode a comparand directly (e.g., the number 47, which is ASCII for '/'). * Maybe, `repz cmpsb' cannot benefit from instruction pipelining as readily as separate integer comparisons. Perhaps the GNU GCC team should re-consider how `strcmp()' is optimized. Anyway, the results are clear: It's better to do these small calculations by hand, rather than rely on the compiler to do something magical. --- lib/cmdline.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/cmdline.c b/lib/cmdline.c index 7ec659160..9869a7975 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -1178,12 +1178,10 @@ void rm_cmd_set_paths_from_cmdline( g_assert(v->paths); for(char *path, **list = v->paths; (path = *list); ++list) { - if(strcmp(path, "-") == 0) { + if(/* path is "-" */ path[0] == '-' && path[1] == 0) { v->read_stdin = true; - /* remember whether to treat stdin paths as preferred paths */ v->stdin_paths_preferred = v->is_prefd; - } else if(strcmp(path, "//") == 0) { - /* the '//' separator separates non-preferred paths from preferred */ + } else if(/* path is "//" */ path[0] == '/' && path[1] == '/' && path[2] == 0) { v->is_prefd = !v->is_prefd; } else { v->all_paths_valid &= rm_cfg_prepend_path( From a4e4b88d043f5f89ea55aefe3f8f1e6520148fe9 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Thu, 4 Oct 2018 21:38:23 +0000 Subject: [PATCH 30/32] lib/cmdline.c: Tell compiler to optimize for the value of `replay' When `cfg->replay' is false, there's no reason to check whether a path is a `json' file. --- lib/cmdline.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/lib/cmdline.c b/lib/cmdline.c index 9869a7975..366e3cc75 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -1237,6 +1237,17 @@ bool rm_cmd_set_paths_from_stdin( return true; } +#define PROCESS_PATHS(replay) \ + if(paths) { \ + rm_cmd_set_paths_from_cmdline(&v, (replay)); \ + } \ + if(v.read_stdin) { \ + if(!rm_cmd_set_paths_from_stdin(&v, (replay))) { \ + rm_log_error_line(_("Could not process path arguments")); \ + return false; \ + } \ + } + static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { g_assert(cfg); @@ -1253,16 +1264,10 @@ static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { cfg->path_count = 0; - if(paths) { - rm_cmd_set_paths_from_cmdline(&v, replay); - } - - if(v.read_stdin) { - /* option '-' means read paths from stdin */ - if(!rm_cmd_set_paths_from_stdin(&v, replay)) { - rm_log_error_line(_("Could not process path arguments")); - return false; - } + if(replay) { + PROCESS_PATHS(true); + } else { + PROCESS_PATHS(false); } if(cfg->path_count == 0 && v.all_paths_valid) { From 30e715605e4fb94c8bfc61fa257d359170cfb8c3 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Mon, 15 Oct 2018 17:58:45 +0000 Subject: [PATCH 31/32] lib/cfg-funcs.h: Insert `g_assert(cfg);' into `rm_cfg_set_default()' --- lib/cfg-funcs.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/cfg-funcs.h b/lib/cfg-funcs.h index 61f15e76d..9018b74c0 100644 --- a/lib/cfg-funcs.h +++ b/lib/cfg-funcs.h @@ -39,6 +39,8 @@ * this is usually called before rm_cmd_parse_args */ static INLINE void rm_cfg_set_default(RmCfg *cfg) { + g_assert(cfg); + /* Set everything to 0 at first, * only non-null options are listed below. */ From 2c24f775ccc7de9e0097f6544b538cfa7dcc6730 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Tue, 16 Oct 2018 18:42:40 +0000 Subject: [PATCH 32/32] lib/cmdline.c: INLINE `rm_cmd_set_paths()' It's a static function that is used in only place. --- lib/cmdline.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/cmdline.c b/lib/cmdline.c index 366e3cc75..a3ab75f97 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -1248,7 +1248,8 @@ bool rm_cmd_set_paths_from_stdin( } \ } -static bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { +static INLINE +bool rm_cmd_set_paths(RmCfg *const cfg, char **const paths) { g_assert(cfg); const bool replay = cfg->replay;