From b1430f0f2d76859eafda9fc6098c3b4876501471 Mon Sep 17 00:00:00 2001 From: arf20 Date: Sun, 30 Nov 2025 05:11:02 +0100 Subject: file indexing, stat, hash map tree insert --- Makefile | 4 +- README.md | 2 +- config.c | 11 +++- config.h | 1 + index.c | 181 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ index.h | 51 +++++++++++++++++ main.c | 9 +++ search | Bin 29760 -> 35488 bytes search.cfg | 2 + 9 files changed, 257 insertions(+), 4 deletions(-) create mode 100644 index.c create mode 100644 index.h diff --git a/Makefile b/Makefile index aba1197..2f28169 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,9 @@ CC = gcc CFLAGS = -g -Wall -pedantic -LDFLAGS = -lmicrohttpd +LDFLAGS = -lmicrohttpd -lmagic BIN = search -SRC = main.c config.c +SRC = main.c config.c index.c $(BIN): $(SRC) $(CC) -o $@ $(CFLAGS) $^ $(LDFLAGS) diff --git a/README.md b/README.md index 9a7cc16..bc7838c 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ ARFNET Fast file index and search ## Building -Depends on libmicrohttpd +Depends on libmicrohttpd, libmagic ``` make diff --git a/config.c b/config.c index 1235a6b..bc9eed6 100644 --- a/config.c +++ b/config.c @@ -28,7 +28,7 @@ #include unsigned short port = 0; -char *tmpl_path = NULL; +char *tmpl_path = NULL, *root = NULL; int config_load(const char *conf_path) @@ -71,6 +71,10 @@ config_load(const char *conf_path) value[strlen(value) - 1] = '\0'; tmpl_path = strdup(value); printf("\ttemplate: %s\n", tmpl_path); + } else if (strcmp(line, "root") == 0) { + value[strlen(value) - 1] = '\0'; + root = strdup(value); + printf("\troot: %s\n", root); } else { fprintf(stderr, "[config] unknown key: %s\n", line); continue; @@ -89,6 +93,11 @@ config_load(const char *conf_path) tmpl_path = DEFAULT_TMPL_PATH; } + if (!root) { + fprintf(stderr, "[config] E: no root given\n"); + return -1; + } + return 0; } diff --git a/config.h b/config.h index 07f2df5..16e1ca5 100644 --- a/config.h +++ b/config.h @@ -23,6 +23,7 @@ #define BUFF_SIZE 65535 #define INIT_VEC_CAPACITY 256 +#define INIT_MAP_CAPACITY 1024 /* index directory initial size */ #define CONFIG_PATH "search.cfg" #define DEFAULT_PORT 8888 diff --git a/index.c b/index.c new file mode 100644 index 0000000..c4ea178 --- /dev/null +++ b/index.c @@ -0,0 +1,181 @@ +/* + + arfnet2-search: Fast file indexer and search + Copyright (C) 2023 arf20 (Ángel Ruiz Fernandez) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + index.c: Efficient fast file index + +*/ + +#include "index.h" + +#include +#include +#include +#include +#include +#include + +#include + + +typedef struct map_s { + struct node_s *map; + size_t size, capacity; +} map_t; + +struct node_s { + node_data_t *data; + struct node_s *next; + map_t *child; +}; + + +static magic_t magic_cookie = NULL; + + +size_t +hash(const char *s, int mod) +{ + size_t hash = 0; + if (!s) + return 0; + while (*s) + hash = hash * 31 + *s++; + return hash % mod; +} + +map_t * +map_new(size_t icapacity) +{ + map_t *map = malloc(sizeof(map_t)); + + map->map = malloc(sizeof(struct node_s) * icapacity); + memset(map->map, 0, sizeof(struct node_s) * icapacity); + map->capacity = icapacity; + map->size = 0; + return map; +} + +void +map_insert(map_t *map, const char *key, node_data_t *data, map_t *child) +{ + struct node_s *node = &map->map[hash(key, map->capacity)]; + + if (node->data) { + for (; node->next; node = node->next); + + node->next = malloc(sizeof(struct node_s)); + node->next->data = data; + node->next->child = child; + node->next->next = NULL; + } else { + node->data = data; + node->child = child; + } +} + +int +index_init() +{ + magic_cookie = magic_open(MAGIC_MIME); + if (!magic_cookie) { + fprintf(stderr, "[index] error magic_open()\n"); + return -1; + } + if (magic_load(magic_cookie, NULL) < 0) { + fprintf(stderr, "[index] error magic_load(): %s\n", + magic_error(magic_cookie)); + return -1; + } + return 0; +} + +void +index_deinit() +{ + magic_close(magic_cookie); +} + +map_t * +index_new(size_t icapacity, const char *dir, int examine) +{ + DIR *dirp = opendir(dir); + if (!dirp) { + fprintf(stderr, "[index] error opening directory %s: %s\n", dir, + strerror(errno)); + return NULL; + } + + map_t *map = map_new(icapacity); + + char path[4096]; + struct dirent *de = NULL; + while ((de = readdir(dirp))) { + if (de->d_name[0] == '.') { + if (de->d_name[1] == '\0') + continue; + else if (de->d_name[1] == '.') + if (de->d_name[2] == '\0') + continue; + } + + snprintf(path, 4096, "%s/%s", dir, de->d_name); + + /* stat it */ + node_data_t *data = malloc(sizeof(node_data_t)); + data->name = strdup(de->d_name); + if (stat(path, &data->stat) < 0) { + fprintf(stderr, "[index] error stat() %s: %s\n", path, + strerror(errno)); + free(data); + data = NULL; + } + + /* examine */ + if (examine) { + data->mime = magic_file(magic_cookie, path); + if (!data->mime) + fprintf(stderr, "[index] error magic_file() %s: %s\n", path, + magic_error(magic_cookie)); + } + + /* recurse */ + map_t *child = NULL; + if (de->d_type == DT_DIR) { + index_new(icapacity, path); + } + + map_insert(map, de->d_name, data, child); + } + + return map; +} + +int +index_lookup(index_t index, lookup_type_t type, const char *query, + const node_data_t **results) +{ + +} + +void +index_destroy(index_t index) +{ + +} + + diff --git a/index.h b/index.h new file mode 100644 index 0000000..7ae54c9 --- /dev/null +++ b/index.h @@ -0,0 +1,51 @@ +/* + + arfnet2-search: Fast file indexer and search + Copyright (C) 2023 arf20 (Ángel Ruiz Fernandez) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + index.c: Efficient fast file index + +*/ + +#ifndef _INDEX_H +#define _INDEX_H + +#include +#include + +typedef enum { + LOOKUP_SUBSTR, + LOOKUP_SUBSTR_NOCASE, + LOOKUP_REGEX +} lookup_type_t; + +typedef struct { + char *name; + struct stat stat; + const char *mime; +} node_data_t; + +typedef struct map_s *index_t; + +int index_init(); +void index_deinit(); +index_t index_new(size_t icapacity, const char *root); +int index_lookup(index_t index, lookup_type_t type, const char *query, + const node_data_t **results); +void index_destroy(index_t index); + +#endif /* _INDEX_H */ + diff --git a/main.c b/main.c index 2de3c9b..e4924d8 100644 --- a/main.c +++ b/main.c @@ -35,6 +35,7 @@ #include #include "config.h" +#include "index.h" static char *index_format_template = NULL; @@ -119,6 +120,14 @@ int main() { return 1; } + /* begin indexing */ + if (index_init() < 0) + return 1; + + index_t index = index_new(INIT_MAP_CAPACITY, root); + + printf("[index] indexed\n"); + while (1) { sleep(1000); } diff --git a/search b/search index 4fd2bdf..0b5001e 100755 Binary files a/search and b/search differ diff --git a/search.cfg b/search.cfg index bb7e8ef..b8110dd 100644 --- a/search.cfg +++ b/search.cfg @@ -6,4 +6,6 @@ port=8888 # html template path template=index.htm.tmpl +# root +root=/home/arf20/projects/arfminesweeper -- cgit v1.2.3