/*
arfnet2-search: Fast file indexer and search
Copyright (C) 2025 arf20 (Ángel Ruiz Fernandez)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
index.c: Efficient fast file index
*/
#define _GNU_SOURCE
#include "index.h"
#include
#include
#include
#include
#include
#include
#include
#include "config.h"
/* closed addressing map */
typedef struct map_s {
struct node_s *map;
size_t count, size;
} map_t;
struct node_s {
node_data_t *data;
struct node_s *next;
map_t *child;
};
static magic_t magic_cookie = NULL;
size_t
hash(const char *s, int mod)
{
size_t hash = 0;
if (!s)
return 0;
while (*s)
hash = hash * 31 + *s++;
return hash % mod;
}
map_t *
map_new(size_t size)
{
map_t *map = malloc(sizeof(map_t));
map->map = malloc(sizeof(struct node_s) * size);
memset(map->map, 0, sizeof(struct node_s) * size);
map->size = size;
map->count = 0;
return map;
}
void
map_insert(map_t *map, const char *key, node_data_t *data, map_t *child)
{
struct node_s *node = &map->map[hash(key, map->size)];
if (node->data) {
for (; node->next; node = node->next);
node->next = malloc(sizeof(struct node_s));
node->next->data = data;
node->next->child = child;
node->next->next = NULL;
} else {
node->data = data;
node->child = child;
}
map->count++;
}
results_t *
results_new()
{
results_t *r = malloc(sizeof(results_t));
r->capacity = INIT_VEC_CAPACITY;
r->results = malloc(sizeof(node_data_t*) * r->capacity);
memset(r->results, 0, sizeof(node_data_t*) * r->capacity);
r->size = 0;
return r;
}
void
results_insert(results_t *results, const node_data_t *result)
{
if (results->size + 1 >= results->capacity) {
results->capacity *= 2;
results->results = realloc(results->results, sizeof(node_data_t*) *
results->capacity);
}
results->results[results->size++] = result;
}
static int
cmp_results(const void *_r1, const void *_r2, void *arg)
{
const node_data_t *r1 = *(node_data_t**)_r1, *r2 = *(node_data_t**)_r2;
sort_type_t sort_type = ((int*)arg)[0];
int desc = ((int*)arg)[1];
int cmp = 0;
switch (sort_type) {
case SORT_NAME:
cmp = strcmp(r1->name, r2->name);
break;
case SORT_PATH:
cmp = strcmp(r1->path, r2->path);
break;
case SORT_MIME:
if (!r1->mime)
return 0;
cmp = strcmp(r1->mime, r2->mime);
break;
case SORT_SIZE:
cmp = r1->stat.st_size - r2->stat.st_size;
break;
case SORT_TIME:
cmp = r1->stat.st_mtime - r2->stat.st_mtime;
break;
}
return !desc ? cmp : -cmp;
}
void
results_sort(results_t *results, sort_type_t sort_type, int desc)
{
int arg[2] = { sort_type, desc };
qsort_r(results->results, results->size, sizeof(node_data_t*), cmp_results,
&arg);
}
results_t *
results_filter(results_t *results, const filter_t *filter)
{
results_t *filtered = results_new();
for (size_t i = 0; i < results->size; i++) {
const node_data_t *n = results->results[i];
if (filter->time_low && (n->stat.st_mtime < filter->time_low))
continue;
if (filter->time_high && (n->stat.st_mtime > filter->time_high))
continue;
if (filter->size_low && (n->stat.st_size < filter->size_low))
continue;
if (filter->size_high && (n->stat.st_size > filter->size_high))
continue;
results_insert(filtered, n);
}
results_destroy(results);
return filtered;
}
void
results_destroy(results_t *results)
{
free(results->results);
free(results);
}
int
index_init()
{
magic_cookie = magic_open(MAGIC_MIME);
if (!magic_cookie) {
fprintf(stderr, "[index] error magic_open()\n");
return -1;
}
if (magic_load(magic_cookie, NULL) < 0) {
fprintf(stderr, "[index] error magic_load(): %s\n",
magic_error(magic_cookie));
return -1;
}
return 0;
}
void
index_deinit()
{
magic_close(magic_cookie);
}
map_t *
index_recurse(size_t size, const char *dir, int examine, size_t rootlen)
{
DIR *dirp = opendir(dir);
if (!dirp) {
fprintf(stderr, "[index] error opening directory %s: %s\n", dir,
strerror(errno));
return NULL;
}
map_t *map = map_new(size);
char path[4096];
struct dirent *de = NULL;
while ((de = readdir(dirp))) {
if (de->d_name[0] == '.') {
if (de->d_name[1] == '\0')
continue;
else if (de->d_name[1] == '.')
if (de->d_name[2] == '\0')
continue;
}
snprintf(path, 4096, "%s/%s", dir, de->d_name);
/* stat it */
node_data_t *data = malloc(sizeof(node_data_t));
memset(data, 0, sizeof(node_data_t));
data->name = strdup(de->d_name);
data->path = strdup(&path[rootlen]);
if (stat(path, &data->stat) < 0) {
fprintf(stderr, "[index] error stat() %s: %s\n", path,
strerror(errno));
free(data);
data = NULL;
}
/* examine */
if (examine) {
const char *mime = magic_file(magic_cookie, path);
if (!mime) {
fprintf(stderr, "[index] error magic_file() %s: %s\n", path,
magic_error(magic_cookie));
} else
data->mime = strdup(mime);
}
/* recurse */
map_t *child = NULL;
if (de->d_type == DT_DIR) {
child = index_recurse(size, path, examine, rootlen);
}
map_insert(map, de->d_name, data, child);
}
closedir(dirp);
return map;
}
map_t *
index_new(size_t size, const char *dir, int examine) {
return index_recurse(size, dir, examine, strlen(dir) + 1);
}
void
index_lookup_substr(map_t *index, const char *query,
results_t *results)
{
for (size_t i = 0; i < index->size; i++) {
if (!index->map[i].data)
continue;
for (struct node_s *node = &index->map[i]; node; node = node->next) {
if (strstr(node->data->name, query))
results_insert(results, node->data);
if (node->child)
index_lookup_substr(node->child, query, results);
}
}
}
void
index_lookup_substr_caseinsensitive(map_t *index, const char *query,
results_t *results)
{
for (size_t i = 0; i < index->size; i++) {
if (!index->map[i].data)
continue;
for (struct node_s *node = &index->map[i]; node; node = node->next) {
if (strcasestr(node->data->name, query))
results_insert(results, node->data);
if (node->child)
index_lookup_substr_caseinsensitive(node->child, query, results);
}
}
}
void
index_lookup_exact(map_t *index, const char *query, results_t *results)
{
for (size_t i = 0; i < index->size; i++) {
if (!index->map[i].data)
continue;
for (struct node_s *node = &index->map[i]; node; node = node->next) {
if (strcmp(node->data->name, query) == 0)
results_insert(results, node->data);
if (node->child)
index_lookup_exact(node->child, query, results);
}
}
}
void
index_lookup_regex(map_t *index, const char *query,
results_t *results)
{
}
results_t *
index_lookup(map_t *index, lookup_type_t type, const char *query)
{
results_t *results = results_new();
switch (type) {
case LOOKUP_SUBSTR:
index_lookup_substr(index, query, results);
break;
case LOOKUP_SUBSTR_CASEINSENSITIVE:
index_lookup_substr_caseinsensitive(index, query, results);
break;
case LOOKUP_EXACT:
index_lookup_exact(index, query, results);
break;
case LOOKUP_REGEX:
index_lookup_regex(index, query, results);
break;
}
return results;
}
void
index_destroy(index_t index)
{
}