aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTristan Riehs <tristan.riehs@inria.fr>2025-12-25 19:17:44 +0100
committerTristan Riehs <tristan.riehs@inria.fr>2025-12-25 19:17:44 +0100
commit438c2aa071b069b86ee588f6f507cbce2682f055 (patch)
tree01c52e2348f7912031e66dcf2cb7a12b006eac50 /src
parent955e35044372d2fb2a8a5bbed8ead3373e2ecaf6 (diff)
Add duplicate search when adding files
Duplicates are searched for in the database as well as in the given files (on the command line).
Diffstat (limited to 'src')
-rw-r--r--src/main.c99
1 files changed, 89 insertions, 10 deletions
diff --git a/src/main.c b/src/main.c
index b83072f..09540b6 100644
--- a/src/main.c
+++ b/src/main.c
@@ -386,17 +386,34 @@ static void ftag_file_add_help(void)
printf(" $ find DIR -type f -print | xargs ftag file add -i\n");
}
+/* Structure only used by ftad_file_add and get_sums_callback, for building a
+ * list of file sums. */
+struct known_sums_s {
+ uint32_t *array;
+ int len;
+ int capacity;
+};
+
+static int get_sums_callback(void *_known_sums, int, char **cols, char **)
+{
+ struct known_sums_s *known_sums = _known_sums;
+ assert(cols[0]);
+ known_sums->array[known_sums->len] = (uint32_t)atol(cols[0]);
+ known_sums->len++;
+ assert(known_sums->len <= known_sums->capacity);
+ return 0;
+}
+
/* Add new files to the database. If directories are given, every file in the
* directory will be added, does _not_ recurse into subdirectories. */
static void ftag_file_add(int argc, char **argv)
{
+ /* step 0: parse options */
if (argc == 0) {
ftag_file_add_usage();
exit(EXIT_FAILURE);
}
- sqlite3 *db = NULL;
int interactive = 0;
- int rc;
while ((argc > 0) && (argv[0][0] == '-')) {
switch (argv[0][1]) {
case 'i':
@@ -412,24 +429,86 @@ static void ftag_file_add(int argc, char **argv)
argv++;
argc--;
}
+
+ /* step 1: compute file sums */
+ uint32_t *sums = malloc(argc*sizeof(*sums));
+ struct stat st;
+ int rc;
+ for (int i = 0; i < argc; i++) {
+ char *file = argv[i];
+ rc = stat(file, &st);
+ if (rc == -1) {
+ fprintf(stderr, "stat: \"%s\": ", file);
+ perror("");
+ exit(EXIT_FAILURE);
+ }
+ if (st.st_mode & S_IFREG) {
+ sums[i] = sum(file);
+ }
+ else {
+ /* skip non-file arguments */
+ argv[i] = NULL;
+ }
+ }
+
+ /* step 2: retrieve sums of files already in the database */
+ sqlite3 *db;
rc = sqlite3_open(DATABASE_PATH, &db);
sqlite3_check(rc, db);
+
int next_id = table_next_id(db, "files");
+ int file_count = next_id; /* number of files already in the database */
+ uint32_t known_sums_array[file_count];
+ struct known_sums_s known_sums = {
+ .array = known_sums_array,
+ .len = 0,
+ .capacity = file_count
+ };
+
+ rc = sqlite3_exec(db, "SELECT sum FROM files;", get_sums_callback, &known_sums, NULL);
+ sqlite3_check(rc, db);
+
+ /* step 3: eliminate duplicates with respect to file sum */
+ for (int i = 0; i < argc; i++) {
+ const uint32_t original = sums[i];
+ /* step 3.1: eliminate duplicates found in database */
+ for (int j = 0; j < file_count; j++) {
+ const uint32_t duplicate = known_sums_array[j];
+ if (duplicate == original) {
+ /* sum already known in database */
+ printf("%s debug: file \"%s\" found in database\n",
+ __func__, argv[i]);
+ argv[i] = NULL;
+ break;
+ }
+ }
+ /* step 3.2: eliminate duplicates within given files */
+ if (argv[i] == NULL)
+ continue;
+ for (int j = i+1; j < argc; j++) {
+ const uint32_t duplicate = sums[j];
+ if (duplicate == original) {
+ printf("%s debug: file \"%s\" found in given files\n",
+ __func__, argv[j]);
+ argv[j] = NULL;
+ }
+ }
+ }
+
+ /* step 4: perform addition to database */
for (int i = 0; i < argc; i++) {
char *file = argv[i];
+ if (file == NULL)
+ continue;
if (interactive) {
printf("Add \"%s\" to database ?", file);
if (!prompt_yes_no())
- continue; /* do not add this file */
- }
- struct stat st;
- rc = stat(file, &st);
- assert(rc == 0);
- if (st.st_mode & S_IFREG) {
- ftag_add_one_file(db, &next_id, file);
- next_id++;
+ continue;
}
+ ftag_add_one_file(db, &next_id, file, sums[i]);
+ next_id++;
}
+ free(sums);
sqlite3_close(db);
}