aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorarf20 <aruizfernandez05@gmail.com>2025-11-30 05:11:02 +0100
committerarf20 <aruizfernandez05@gmail.com>2025-11-30 05:11:02 +0100
commitb1430f0f2d76859eafda9fc6098c3b4876501471 (patch)
tree467c0fc17fa40bf0eda18b830de54fc596add268
parentae9b7722e93111a8ee67e5b9b3a791df7b94c765 (diff)
downloadarfnet2-search-b1430f0f2d76859eafda9fc6098c3b4876501471.tar.gz
arfnet2-search-b1430f0f2d76859eafda9fc6098c3b4876501471.zip
file indexing, stat, hash map tree insert
-rw-r--r--Makefile4
-rw-r--r--README.md2
-rw-r--r--config.c11
-rw-r--r--config.h1
-rw-r--r--index.c181
-rw-r--r--index.h51
-rw-r--r--main.c9
-rwxr-xr-xsearchbin29760 -> 35488 bytes
-rw-r--r--search.cfg2
9 files changed, 257 insertions, 4 deletions
diff --git a/Makefile b/Makefile
index aba1197..2f28169 100644
--- a/Makefile
+++ b/Makefile
@@ -1,9 +1,9 @@
CC = gcc
CFLAGS = -g -Wall -pedantic
-LDFLAGS = -lmicrohttpd
+LDFLAGS = -lmicrohttpd -lmagic
BIN = search
-SRC = main.c config.c
+SRC = main.c config.c index.c
$(BIN): $(SRC)
$(CC) -o $@ $(CFLAGS) $^ $(LDFLAGS)
diff --git a/README.md b/README.md
index 9a7cc16..bc7838c 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ ARFNET Fast file index and search
## Building
-Depends on libmicrohttpd
+Depends on libmicrohttpd, libmagic
```
make
diff --git a/config.c b/config.c
index 1235a6b..bc9eed6 100644
--- a/config.c
+++ b/config.c
@@ -28,7 +28,7 @@
#include <errno.h>
unsigned short port = 0;
-char *tmpl_path = NULL;
+char *tmpl_path = NULL, *root = NULL;
int
config_load(const char *conf_path)
@@ -71,6 +71,10 @@ config_load(const char *conf_path)
value[strlen(value) - 1] = '\0';
tmpl_path = strdup(value);
printf("\ttemplate: %s\n", tmpl_path);
+ } else if (strcmp(line, "root") == 0) {
+ value[strlen(value) - 1] = '\0';
+ root = strdup(value);
+ printf("\troot: %s\n", root);
} else {
fprintf(stderr, "[config] unknown key: %s\n", line);
continue;
@@ -89,6 +93,11 @@ config_load(const char *conf_path)
tmpl_path = DEFAULT_TMPL_PATH;
}
+ if (!root) {
+ fprintf(stderr, "[config] E: no root given\n");
+ return -1;
+ }
+
return 0;
}
diff --git a/config.h b/config.h
index 07f2df5..16e1ca5 100644
--- a/config.h
+++ b/config.h
@@ -23,6 +23,7 @@
#define BUFF_SIZE 65535
#define INIT_VEC_CAPACITY 256
+#define INIT_MAP_CAPACITY 1024 /* index directory initial size */
#define CONFIG_PATH "search.cfg"
#define DEFAULT_PORT 8888
diff --git a/index.c b/index.c
new file mode 100644
index 0000000..c4ea178
--- /dev/null
+++ b/index.c
@@ -0,0 +1,181 @@
+/*
+
+ arfnet2-search: Fast file indexer and search
+ Copyright (C) 2023 arf20 (Ángel Ruiz Fernandez)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+ index.c: Efficient fast file index
+
+*/
+
+#include "index.h"
+
+#include <sys/types.h>
+#include <dirent.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <magic.h>
+
+
+typedef struct map_s {
+ struct node_s *map;
+ size_t size, capacity;
+} map_t;
+
+struct node_s {
+ node_data_t *data;
+ struct node_s *next;
+ map_t *child;
+};
+
+
+static magic_t magic_cookie = NULL;
+
+
+size_t
+hash(const char *s, int mod)
+{
+ size_t hash = 0;
+ if (!s)
+ return 0;
+ while (*s)
+ hash = hash * 31 + *s++;
+ return hash % mod;
+}
+
+map_t *
+map_new(size_t icapacity)
+{
+ map_t *map = malloc(sizeof(map_t));
+
+ map->map = malloc(sizeof(struct node_s) * icapacity);
+ memset(map->map, 0, sizeof(struct node_s) * icapacity);
+ map->capacity = icapacity;
+ map->size = 0;
+ return map;
+}
+
+void
+map_insert(map_t *map, const char *key, node_data_t *data, map_t *child)
+{
+ struct node_s *node = &map->map[hash(key, map->capacity)];
+
+ if (node->data) {
+ for (; node->next; node = node->next);
+
+ node->next = malloc(sizeof(struct node_s));
+ node->next->data = data;
+ node->next->child = child;
+ node->next->next = NULL;
+ } else {
+ node->data = data;
+ node->child = child;
+ }
+}
+
+int
+index_init()
+{
+ magic_cookie = magic_open(MAGIC_MIME);
+ if (!magic_cookie) {
+ fprintf(stderr, "[index] error magic_open()\n");
+ return -1;
+ }
+ if (magic_load(magic_cookie, NULL) < 0) {
+ fprintf(stderr, "[index] error magic_load(): %s\n",
+ magic_error(magic_cookie));
+ return -1;
+ }
+ return 0;
+}
+
+void
+index_deinit()
+{
+ magic_close(magic_cookie);
+}
+
+map_t *
+index_new(size_t icapacity, const char *dir, int examine)
+{
+ DIR *dirp = opendir(dir);
+ if (!dirp) {
+ fprintf(stderr, "[index] error opening directory %s: %s\n", dir,
+ strerror(errno));
+ return NULL;
+ }
+
+ map_t *map = map_new(icapacity);
+
+ char path[4096];
+ struct dirent *de = NULL;
+ while ((de = readdir(dirp))) {
+ if (de->d_name[0] == '.') {
+ if (de->d_name[1] == '\0')
+ continue;
+ else if (de->d_name[1] == '.')
+ if (de->d_name[2] == '\0')
+ continue;
+ }
+
+ snprintf(path, 4096, "%s/%s", dir, de->d_name);
+
+ /* stat it */
+ node_data_t *data = malloc(sizeof(node_data_t));
+ data->name = strdup(de->d_name);
+ if (stat(path, &data->stat) < 0) {
+ fprintf(stderr, "[index] error stat() %s: %s\n", path,
+ strerror(errno));
+ free(data);
+ data = NULL;
+ }
+
+ /* examine */
+ if (examine) {
+ data->mime = magic_file(magic_cookie, path);
+ if (!data->mime)
+ fprintf(stderr, "[index] error magic_file() %s: %s\n", path,
+ magic_error(magic_cookie));
+ }
+
+ /* recurse */
+ map_t *child = NULL;
+ if (de->d_type == DT_DIR) {
+ index_new(icapacity, path);
+ }
+
+ map_insert(map, de->d_name, data, child);
+ }
+
+ return map;
+}
+
+int
+index_lookup(index_t index, lookup_type_t type, const char *query,
+ const node_data_t **results)
+{
+
+}
+
+void
+index_destroy(index_t index)
+{
+
+}
+
+
diff --git a/index.h b/index.h
new file mode 100644
index 0000000..7ae54c9
--- /dev/null
+++ b/index.h
@@ -0,0 +1,51 @@
+/*
+
+ arfnet2-search: Fast file indexer and search
+ Copyright (C) 2023 arf20 (Ángel Ruiz Fernandez)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+ index.c: Efficient fast file index
+
+*/
+
+#ifndef _INDEX_H
+#define _INDEX_H
+
+#include <sys/stat.h>
+#include <stddef.h>
+
+typedef enum {
+ LOOKUP_SUBSTR,
+ LOOKUP_SUBSTR_NOCASE,
+ LOOKUP_REGEX
+} lookup_type_t;
+
+typedef struct {
+ char *name;
+ struct stat stat;
+ const char *mime;
+} node_data_t;
+
+typedef struct map_s *index_t;
+
+int index_init();
+void index_deinit();
+index_t index_new(size_t icapacity, const char *root);
+int index_lookup(index_t index, lookup_type_t type, const char *query,
+ const node_data_t **results);
+void index_destroy(index_t index);
+
+#endif /* _INDEX_H */
+
diff --git a/main.c b/main.c
index 2de3c9b..e4924d8 100644
--- a/main.c
+++ b/main.c
@@ -35,6 +35,7 @@
#include <microhttpd.h>
#include "config.h"
+#include "index.h"
static char *index_format_template = NULL;
@@ -119,6 +120,14 @@ int main() {
return 1;
}
+ /* begin indexing */
+ if (index_init() < 0)
+ return 1;
+
+ index_t index = index_new(INIT_MAP_CAPACITY, root);
+
+ printf("[index] indexed\n");
+
while (1) {
sleep(1000);
}
diff --git a/search b/search
index 4fd2bdf..0b5001e 100755
--- a/search
+++ b/search
Binary files differ
diff --git a/search.cfg b/search.cfg
index bb7e8ef..b8110dd 100644
--- a/search.cfg
+++ b/search.cfg
@@ -6,4 +6,6 @@ port=8888
# html template path
template=index.htm.tmpl
+# root
+root=/home/arf20/projects/arfminesweeper