diff options
| author | arf20 <aruizfernandez05@gmail.com> | 2025-11-30 05:11:02 +0100 |
|---|---|---|
| committer | arf20 <aruizfernandez05@gmail.com> | 2025-11-30 05:11:02 +0100 |
| commit | b1430f0f2d76859eafda9fc6098c3b4876501471 (patch) | |
| tree | 467c0fc17fa40bf0eda18b830de54fc596add268 | |
| parent | ae9b7722e93111a8ee67e5b9b3a791df7b94c765 (diff) | |
| download | arfnet2-search-b1430f0f2d76859eafda9fc6098c3b4876501471.tar.gz arfnet2-search-b1430f0f2d76859eafda9fc6098c3b4876501471.zip | |
file indexing, stat, hash map tree insert
| -rw-r--r-- | Makefile | 4 | ||||
| -rw-r--r-- | README.md | 2 | ||||
| -rw-r--r-- | config.c | 11 | ||||
| -rw-r--r-- | config.h | 1 | ||||
| -rw-r--r-- | index.c | 181 | ||||
| -rw-r--r-- | index.h | 51 | ||||
| -rw-r--r-- | main.c | 9 | ||||
| -rwxr-xr-x | search | bin | 29760 -> 35488 bytes | |||
| -rw-r--r-- | search.cfg | 2 |
9 files changed, 257 insertions, 4 deletions
@@ -1,9 +1,9 @@ CC = gcc CFLAGS = -g -Wall -pedantic -LDFLAGS = -lmicrohttpd +LDFLAGS = -lmicrohttpd -lmagic BIN = search -SRC = main.c config.c +SRC = main.c config.c index.c $(BIN): $(SRC) $(CC) -o $@ $(CFLAGS) $^ $(LDFLAGS) @@ -22,7 +22,7 @@ ARFNET Fast file index and search ## Building -Depends on libmicrohttpd +Depends on libmicrohttpd, libmagic ``` make @@ -28,7 +28,7 @@ #include <errno.h> unsigned short port = 0; -char *tmpl_path = NULL; +char *tmpl_path = NULL, *root = NULL; int config_load(const char *conf_path) @@ -71,6 +71,10 @@ config_load(const char *conf_path) value[strlen(value) - 1] = '\0'; tmpl_path = strdup(value); printf("\ttemplate: %s\n", tmpl_path); + } else if (strcmp(line, "root") == 0) { + value[strlen(value) - 1] = '\0'; + root = strdup(value); + printf("\troot: %s\n", root); } else { fprintf(stderr, "[config] unknown key: %s\n", line); continue; @@ -89,6 +93,11 @@ config_load(const char *conf_path) tmpl_path = DEFAULT_TMPL_PATH; } + if (!root) { + fprintf(stderr, "[config] E: no root given\n"); + return -1; + } + return 0; } @@ -23,6 +23,7 @@ #define BUFF_SIZE 65535 #define INIT_VEC_CAPACITY 256 +#define INIT_MAP_CAPACITY 1024 /* index directory initial size */ #define CONFIG_PATH "search.cfg" #define DEFAULT_PORT 8888 @@ -0,0 +1,181 @@ +/* + + arfnet2-search: Fast file indexer and search + Copyright (C) 2023 arf20 (Ángel Ruiz Fernandez) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + + index.c: Efficient fast file index + +*/ + +#include "index.h" + +#include <sys/types.h> +#include <dirent.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> + +#include <magic.h> + + +typedef struct map_s { + struct node_s *map; + size_t size, capacity; +} map_t; + +struct node_s { + node_data_t *data; + struct node_s *next; + map_t *child; +}; + + +static magic_t magic_cookie = NULL; + + +size_t +hash(const char *s, int mod) +{ + size_t hash = 0; + if (!s) + return 0; + while (*s) + hash = hash * 31 + *s++; + return hash % mod; +} + +map_t * +map_new(size_t icapacity) +{ + map_t *map = malloc(sizeof(map_t)); + + map->map = malloc(sizeof(struct node_s) * icapacity); + memset(map->map, 0, sizeof(struct node_s) * icapacity); + map->capacity = icapacity; + map->size = 0; + return map; +} + +void +map_insert(map_t *map, const char *key, node_data_t *data, map_t *child) +{ + struct node_s *node = &map->map[hash(key, map->capacity)]; + + if (node->data) { + for (; node->next; node = node->next); + + node->next = malloc(sizeof(struct node_s)); + node->next->data = data; + node->next->child = child; + node->next->next = NULL; + } else { + node->data = data; + node->child = child; + } +} + +int +index_init() +{ + magic_cookie = magic_open(MAGIC_MIME); + if (!magic_cookie) { + fprintf(stderr, "[index] error magic_open()\n"); + return -1; + } + if (magic_load(magic_cookie, NULL) < 0) { + fprintf(stderr, "[index] error magic_load(): %s\n", + magic_error(magic_cookie)); + return -1; + } + return 0; +} + +void +index_deinit() +{ + magic_close(magic_cookie); +} + +map_t * +index_new(size_t icapacity, const char *dir, int examine) +{ + DIR *dirp = opendir(dir); + if (!dirp) { + fprintf(stderr, "[index] error opening directory %s: %s\n", dir, + strerror(errno)); + return NULL; + } + + map_t *map = map_new(icapacity); + + char path[4096]; + struct dirent *de = NULL; + while ((de = readdir(dirp))) { + if (de->d_name[0] == '.') { + if (de->d_name[1] == '\0') + continue; + else if (de->d_name[1] == '.') + if (de->d_name[2] == '\0') + continue; + } + + snprintf(path, 4096, "%s/%s", dir, de->d_name); + + /* stat it */ + node_data_t *data = malloc(sizeof(node_data_t)); + data->name = strdup(de->d_name); + if (stat(path, &data->stat) < 0) { + fprintf(stderr, "[index] error stat() %s: %s\n", path, + strerror(errno)); + free(data); + data = NULL; + } + + /* examine */ + if (examine) { + data->mime = magic_file(magic_cookie, path); + if (!data->mime) + fprintf(stderr, "[index] error magic_file() %s: %s\n", path, + magic_error(magic_cookie)); + } + + /* recurse */ + map_t *child = NULL; + if (de->d_type == DT_DIR) { + index_new(icapacity, path); + } + + map_insert(map, de->d_name, data, child); + } + + return map; +} + +int +index_lookup(index_t index, lookup_type_t type, const char *query, + const node_data_t **results) +{ + +} + +void +index_destroy(index_t index) +{ + +} + + @@ -0,0 +1,51 @@ +/* + + arfnet2-search: Fast file indexer and search + Copyright (C) 2023 arf20 (Ángel Ruiz Fernandez) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + + index.c: Efficient fast file index + +*/ + +#ifndef _INDEX_H +#define _INDEX_H + +#include <sys/stat.h> +#include <stddef.h> + +typedef enum { + LOOKUP_SUBSTR, + LOOKUP_SUBSTR_NOCASE, + LOOKUP_REGEX +} lookup_type_t; + +typedef struct { + char *name; + struct stat stat; + const char *mime; +} node_data_t; + +typedef struct map_s *index_t; + +int index_init(); +void index_deinit(); +index_t index_new(size_t icapacity, const char *root); +int index_lookup(index_t index, lookup_type_t type, const char *query, + const node_data_t **results); +void index_destroy(index_t index); + +#endif /* _INDEX_H */ + @@ -35,6 +35,7 @@ #include <microhttpd.h> #include "config.h" +#include "index.h" static char *index_format_template = NULL; @@ -119,6 +120,14 @@ int main() { return 1; } + /* begin indexing */ + if (index_init() < 0) + return 1; + + index_t index = index_new(INIT_MAP_CAPACITY, root); + + printf("[index] indexed\n"); + while (1) { sleep(1000); } Binary files differ@@ -6,4 +6,6 @@ port=8888 # html template path template=index.htm.tmpl +# root +root=/home/arf20/projects/arfminesweeper |
