aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorarf20 <aruizfernandez05@gmail.com>2025-12-07 20:56:40 +0100
committerarf20 <aruizfernandez05@gmail.com>2025-12-07 20:56:40 +0100
commit00909fc4333b1cefc502dc40afa2c7c06ec7f713 (patch)
tree55cc73743966780e0cd51867b5451db14734e784
parent9fc16dba71c6deb26bf09ec5198a8a6d7c4dbb74 (diff)
downloadarfnet2-search-00909fc4333b1cefc502dc40afa2c7c06ec7f713.tar.gz
arfnet2-search-00909fc4333b1cefc502dc40afa2c7c06ec7f713.zip
sorting and filtering working
-rw-r--r--config.c2
-rw-r--r--config.h2
-rw-r--r--index.c79
-rw-r--r--index.h21
-rw-r--r--index.htm.tmpl69
-rw-r--r--main.c171
6 files changed, 290 insertions, 54 deletions
diff --git a/config.c b/config.c
index d99cc2c..8d05eab 100644
--- a/config.c
+++ b/config.c
@@ -1,7 +1,7 @@
/*
arfnet2-search: Fast file indexer and search
- Copyright (C) 2023 arf20 (Ángel Ruiz Fernandez)
+ Copyright (C) 2025 arf20 (Ángel Ruiz Fernandez)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/config.h b/config.h
index b12f226..aeb74d5 100644
--- a/config.h
+++ b/config.h
@@ -1,7 +1,7 @@
/*
arfnet2-search: Fast file indexer and search
- Copyright (C) 2023 arf20 (Ángel Ruiz Fernandez)
+ Copyright (C) 2025 arf20 (Ángel Ruiz Fernandez)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/index.c b/index.c
index 3430178..096fe6e 100644
--- a/index.c
+++ b/index.c
@@ -1,7 +1,7 @@
/*
arfnet2-search: Fast file indexer and search
- Copyright (C) 2023 arf20 (Ángel Ruiz Fernandez)
+ Copyright (C) 2025 arf20 (Ángel Ruiz Fernandez)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,6 +20,7 @@
*/
+#define _GNU_SOURCE
#include "index.h"
#include <sys/types.h>
@@ -115,6 +116,67 @@ results_insert(results_t *results, const node_data_t *result)
results->results[results->size++] = result;
}
+static int
+cmp_results(const void *_r1, const void *_r2, void *arg)
+{
+ const node_data_t *r1 = *(node_data_t**)_r1, *r2 = *(node_data_t**)_r2;
+ sort_type_t sort_type = ((int*)arg)[0];
+ int desc = ((int*)arg)[1];
+
+ int cmp = 0;
+
+ switch (sort_type) {
+ case SORT_NAME:
+ cmp = strcmp(r1->name, r2->name);
+ break;
+ case SORT_PATH:
+ cmp = strcmp(r1->path, r2->path);
+ break;
+ case SORT_MIME:
+ if (!r1->mime)
+ return 0;
+ cmp = strcmp(r1->mime, r2->mime);
+ break;
+ case SORT_SIZE:
+ cmp = r1->stat.st_size - r2->stat.st_size;
+ break;
+ case SORT_TIME:
+ cmp = r1->stat.st_mtime - r2->stat.st_mtime;
+ break;
+ }
+
+ return !desc ? cmp : -cmp;
+}
+
+void
+results_sort(results_t *results, sort_type_t sort_type, int desc)
+{
+ int arg[2] = { sort_type, desc };
+ qsort_r(results->results, results->size, sizeof(node_data_t*), cmp_results,
+ &arg);
+}
+
+results_t *
+results_filter(results_t *results, const filter_t *filter)
+{
+ results_t *filtered = results_new();
+ for (size_t i = 0; i < results->size; i++) {
+ const node_data_t *n = results->results[i];
+ if (filter->time_low && (n->stat.st_mtime < filter->time_low))
+ continue;
+ if (filter->time_high && (n->stat.st_mtime > filter->time_high))
+ continue;
+ if (filter->size_low && (n->stat.st_size < filter->size_low))
+ continue;
+ if (filter->size_high && (n->stat.st_size > filter->size_high))
+ continue;
+
+ results_insert(filtered, n);
+ }
+ results_destroy(results);
+ return filtered;
+}
+
void
results_destroy(results_t *results)
{
@@ -226,13 +288,19 @@ index_lookup_substr(map_t *index, const char *query,
}
void
-index_lookup_substr_nocase(map_t *index, const char *query,
+index_lookup_substr_caseinsensitive(map_t *index, const char *query,
results_t *results)
{
}
void
+index_lookup_exact(map_t *index, const char *query, results_t *results)
+{
+
+}
+
+void
index_lookup_regex(map_t *index, const char *query,
results_t *results)
{
@@ -248,8 +316,11 @@ index_lookup(map_t *index, lookup_type_t type, const char *query)
case LOOKUP_SUBSTR:
index_lookup_substr(index, query, results);
break;
- case LOOKUP_SUBSTR_NOCASE:
- index_lookup_substr_nocase(index, query, results);
+ case LOOKUP_SUBSTR_CASEINSENSITIVE:
+ index_lookup_substr_caseinsensitive(index, query, results);
+ break;
+ case LOOKUP_EXACT:
+ index_lookup_exact(index, query, results);
break;
case LOOKUP_REGEX:
index_lookup_regex(index, query, results);
diff --git a/index.h b/index.h
index 187de94..26ac586 100644
--- a/index.h
+++ b/index.h
@@ -1,7 +1,7 @@
/*
arfnet2-search: Fast file indexer and search
- Copyright (C) 2023 arf20 (Ángel Ruiz Fernandez)
+ Copyright (C) 2025 arf20 (Ángel Ruiz Fernandez)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -28,7 +28,8 @@
typedef enum {
LOOKUP_SUBSTR,
- LOOKUP_SUBSTR_NOCASE,
+ LOOKUP_SUBSTR_CASEINSENSITIVE,
+ LOOKUP_EXACT,
LOOKUP_REGEX
} lookup_type_t;
@@ -40,6 +41,19 @@ typedef struct {
typedef struct map_s *index_t;
+typedef enum {
+ SORT_NAME,
+ SORT_MIME,
+ SORT_PATH,
+ SORT_SIZE,
+ SORT_TIME
+} sort_type_t;
+
+typedef struct {
+ time_t time_low, time_high;
+ size_t size_low, size_high;
+} filter_t;
+
typedef struct {
const node_data_t **results;
size_t size, capacity;
@@ -50,6 +64,9 @@ void index_deinit();
index_t index_new(size_t icapacity, const char *root, int examine);
results_t *index_lookup(index_t index, lookup_type_t type, const char *query);
void index_destroy(index_t index);
+
+void results_sort(results_t *results, sort_type_t sort_type, int desc);
+results_t *results_filter(results_t *results, const filter_t *filter);
void results_destroy(results_t *results);
#endif /* _INDEX_H */
diff --git a/index.htm.tmpl b/index.htm.tmpl
index fdb1b58..759625b 100644
--- a/index.htm.tmpl
+++ b/index.htm.tmpl
@@ -10,6 +10,7 @@
}
.box {
+ margin-bottom: 1em;
width: 50%;
min-width: 400px;
display: block;
@@ -41,6 +42,10 @@
width: 200px;
}
+.advanced {
+ margin-left: 1em;
+}
+
.collapse-title {
font-weight: bold;
}
@@ -53,6 +58,10 @@
display: inline-block;
}
+.result-header {
+ margin-bottom: 1em;
+}
+
.result {
margin-left: 1em;
margin-bottom: 1em;
@@ -97,53 +106,45 @@
<p>Search all of the ARFNET content fast</p>
<form class="searchform" action="/query" method="get">
<div class="box form-inline">
- <input class="input" type="text" name="query" value="%s">
+ <input class="input" type="text" name="q" value="%s">
<button type="submit">Search</button><br>
</div>
<div>
- <details>
+ <details class="advanced">
<summary class="collapse-title">Advanced</summary>
- <input type="radio" id="substr" name="type" value="substr">
- <label for="substr">substring</label>
- <input type="radio" id="substr_nocase" name="type" value="substr_nocase">
- <label for="substr_nocase">case insensitive substring</label>
- <input type="radio" id="exact" name="type" value="exact">
- <label for="exact">exact</label>
- <input type="radio" id="regex" name="type" value="regex">
- <label for="regex">regex</label>
- </details>
- <details>
- <summary class="collapse-title">Filtering</summary>
+ <p>
+ <label class="label">Search type</label>
+ <input type="radio" id="substr" name="t" value="s" checked="checked">
+ <label for="substr">substring</label>
+ <input type="radio" id="substr_nocase" name="t" value="i">
+ <label for="substr_nocase">case insensitive substring</label>
+ <input type="radio" id="exact" name="t" value="e">
+ <label for="exact">exact</label>
+ <input type="radio" id="regex" name="t" value="r">
+ <label for="regex">regex</label>
+ </p>
+ <p>
<label class="label" for="mtime_start">Timeframe start</label>
- <input type="date" id="mtime_start" name="filter_mtime_start"><br>
+ <input type="date" id="mtime_start" name="ftl" value="%s"><br>
+ </p>
+ <p>
<label class="label" for="mtime_end">Timeframe end</label>
- <input type="date" id="mtime_end" name="filter_mtime_end"><br>
+ <input type="date" id="mtime_end" name="fth" value="%s"><br>
+ </p>
+ <p>
<label class="label" for="size_start">Size lower bound</label>
- <input type="text" id="size_start" name="filter_size_start"><br>
+ <input type="text" id="size_start" name="fsl" value="%s"><br>
+ </p>
+ <p>
<label class="label" for="size_end">Size upper bound</label>
- <input type="text" id="size_end" name="filter_size_end"><br>
- </details>
- <details>
- <summary class="collapse-title">Sorting</summary>
- <div class="sort-left">
- <input type="radio" name="sort" id="name" value="name">
- <label for="name">name</label><br>
- <input type="radio" name="sort" id="time" value="time">
- <label for="time">time</label><br>
- <input type="radio" name="sort" id="size" value="size">
- <label for="size">size</label><br>
- </div>
- <div class="sort-right">
- <input type="radio" name="sort_dir" id="asc" value="asc">
- <label for="asc">ascending</label><br>
- <input type="radio" name="sort_dir" id="desc" value="desc">
- <label for="desc">descending</label><br>
- </div>
+ <input type="text" id="size_end" name="fsh" value="%s"><br>
+ </p>
</details>
</div>
</form>
<hr>
%s
+ %s
</main>
</body>
</html>
diff --git a/main.c b/main.c
index f3492aa..d7603fc 100644
--- a/main.c
+++ b/main.c
@@ -1,7 +1,7 @@
/*
arfnet2-search: Fast file indexer and search
- Copyright (C) 2023 arf20 (Ángel Ruiz Fernandez)
+ Copyright (C) 2025 arf20 (Ángel Ruiz Fernandez)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,6 +20,8 @@
*/
+#define _XOPEN_SOURCE 700 /* strptime() without destroying clock_gettime() */
+
#include <sys/types.h>
#include <sys/select.h>
#include <sys/socket.h>
@@ -41,13 +43,58 @@ static char *index_format_template = NULL;
static index_t g_index = NULL;
+static const char *result_html_header =
+ "<p>%ld results in %f seconds</p>\n"
+ "<div class=\"result-header\">\n"
+ "<a class=\"name\" href=\"%s\">Name %s</a><a class=\"mime\" href=\"%s\">mime-type %s</a><br>\n"
+ "<a class=\"path\" href=\"%s\">path %s</a><div class=\"attrib\">"
+ "<a class=\"size\" href=\"%s\">Size %s</a>"
+ "<a class=\"time\" href=\"%s\">Time %s</a></div><br>\n"
+ "</div>\n";
+
static const char *result_html_template =
"<div class=\"result\">\n"
- "<span class=\"name\">%s</span>""<super class=\"mime\">%s</super><br>\n"
- "<a class=\"path\" href=\"%s\">%s</a><div class=\"attrib\"><span class=\"size\">%s</span><span class=\"time\">%s</span></div><br>\n"
+ "<span class=\"name\">%s</span><super class=\"mime\">%s</super><br>\n"
+ "<a class=\"path\" href=\"%s\">%s</a><div class=\"attrib\">"
+ "<span class=\"size\">%s</span>"
+ "<span class=\"time\">%s</span></div><br>\n"
"</div>\n";
static const char *
+generate_results_header_html(struct MHD_Connection *connection, const char *baseurl,
+ sort_type_t sort_type, int sort_order, size_t nresults, float lookup_time)
+{
+ static char buff[65535], name_url[256], mime_url[256], path_url[256],
+ size_url[256], time_url[256];
+
+ *buff = '\0';
+
+ const char *arrows[] = { "&#8593;", "&#8595;" };
+
+ char name_order = (sort_type == SORT_NAME) && sort_order ? 'a' : 'd';
+ char mime_order = (sort_type == SORT_MIME) && sort_order ? 'a' : 'd';
+ char path_order = (sort_type == SORT_PATH) && sort_order ? 'a' : 'd';
+ char size_order = (sort_type == SORT_SIZE) && sort_order ? 'a' : 'd';
+ char time_order = (sort_type == SORT_TIME) && sort_order ? 'a' : 'd';
+
+ snprintf(name_url, 256, "%s&s=n&o=%c", baseurl, name_order);
+ snprintf(mime_url, 256, "%s&s=m&o=%c", baseurl, mime_order);
+ snprintf(path_url, 256, "%s&s=p&o=%c", baseurl, path_order);
+ snprintf(size_url, 256, "%s&s=s&o=%c", baseurl, size_order);
+ snprintf(time_url, 256, "%s&s=t&o=%c", baseurl, time_order);
+
+ snprintf(buff, 65535, result_html_header, nresults, lookup_time,
+ name_url, arrows[!name_order],
+ mime_url, arrows[!mime_order],
+ path_url, arrows[!path_order],
+ size_url, arrows[!size_order],
+ time_url, arrows[!time_order]
+ );
+
+ return buff;
+}
+
+static const char *
sizestr(size_t size)
{
static char buf[32];
@@ -68,13 +115,12 @@ static const char *
generate_results_html(results_t *results)
{
static char buff[65535], timebuf[256], urlbuf[4096];
-
char *pos = buff;
for (int i = 0; i < results->size; i++) {
const node_data_t *data = results->results[i];
- struct tm *tm_mtim = gmtime(&data->stat.st_mtim.tv_sec);
- strftime(timebuf, 256, "%Y-%m-%d %H:%M:%S", tm_mtim);
+ struct tm *tm_mtim = gmtime(&data->stat.st_mtime);
+ strftime(timebuf, 256, "%b %d %Y", tm_mtim);
snprintf(urlbuf, 4096, "%s%s", subdir, data->path);
@@ -117,7 +163,7 @@ enum MHD_Result answer_to_connection(
int ret;
if (strcmp(method, "GET") == 0 && strcmp(url, "/") == 0) {
- snprintf(buff, BUFF_SIZE, index_format_template, "", "");
+ snprintf(buff, BUFF_SIZE, index_format_template, "", "", "");
response = MHD_create_response_from_buffer(strlen(buff), (void*)buff,
MHD_RESPMEM_PERSISTENT);
@@ -127,23 +173,124 @@ enum MHD_Result answer_to_connection(
MHD_destroy_response(response);
}
else if (strcmp(method, "GET") == 0 && strcmp(url, "/query") == 0) {
+ /* get query */
const char *query = MHD_lookup_connection_value(connection,
- MHD_GET_ARGUMENT_KIND, "query");
+ MHD_GET_ARGUMENT_KIND, "q");
+
+ /* get and parse query type */
+ lookup_type_t query_type = -1;
+ const char *query_type_str = MHD_lookup_connection_value(connection,
+ MHD_GET_ARGUMENT_KIND, "t");
+ if (!query_type_str)
+ query_type_str = "s";
+
+ if (query_type_str) {
+ switch (query_type_str[0]) {
+ case 's': query_type = LOOKUP_SUBSTR; break;
+ case 'i': query_type = LOOKUP_SUBSTR_CASEINSENSITIVE; break;
+ case 'e': query_type = LOOKUP_EXACT; break;
+ case 'r': query_type = LOOKUP_REGEX; break;
+ }
+ } else query_type = LOOKUP_SUBSTR;
+
+ /* get and parse sorting */
+ sort_type_t sort_type = SORT_NAME;
+ int sort_order = 0;
+ const char *sort_type_str = MHD_lookup_connection_value(connection,
+ MHD_GET_ARGUMENT_KIND, "s");
+ const char *sort_order_str = MHD_lookup_connection_value(connection,
+ MHD_GET_ARGUMENT_KIND, "o");
+ if (sort_type_str) {
+ switch (sort_type_str[0]) {
+ case 'n': sort_type = SORT_NAME; break;
+ case 'm': sort_type = SORT_MIME; break;
+ case 'p': sort_type = SORT_PATH; break;
+ case 's': sort_type = SORT_SIZE; break;
+ case 't': sort_type = SORT_TIME; break;
+ }
+ }
+ if (sort_order_str)
+ sort_order = sort_order_str[0] == 'd';
+
+ /* get and parse filters */
+ const char *filter_time_low = MHD_lookup_connection_value(connection,
+ MHD_GET_ARGUMENT_KIND, "ftl");
+ const char *filter_time_high = MHD_lookup_connection_value(connection,
+ MHD_GET_ARGUMENT_KIND, "fth");
+ const char *filter_size_low = MHD_lookup_connection_value(connection,
+ MHD_GET_ARGUMENT_KIND, "fsl");
+ const char *filter_size_high = MHD_lookup_connection_value(connection,
+ MHD_GET_ARGUMENT_KIND, "fsh");
+
+ filter_t filter = { 0 };
+
+ struct tm filter_tm;
+ if (strptime(filter_time_low, "%Y-%m-%d", &filter_tm))
+ filter.time_low = mktime(&filter_tm);
+ else
+ filter.time_low = 0;
+
+ if (strptime(filter_time_high, "%Y-%m-%d", &filter_tm))
+ filter.time_high = mktime(&filter_tm);
+ else
+ filter.time_high = 0;
+
+ filter.size_low = atoi(filter_size_low);
+ filter.size_high = atoi(filter_size_high);
+
+
+ /* build baseurl with query and filters (no sort) for sort links */
+ char baseurl[1024];
+ snprintf(baseurl, 1024, "/query?q=%s&t=%s&ftl=%s&fth=%s&fsl=%s&fsh=%s",
+ query,
+ query_type_str,
+ filter_time_low ? filter_time_low : "",
+ filter_time_high ? filter_time_high : "",
+ filter_size_low ? filter_size_low : "",
+ filter_size_high ? filter_size_high : ""
+ );
+
+
+ /* lookup query in index with type, mesuring time */
+ struct timespec start, finish;
+ clock_gettime(CLOCK_REALTIME, &start);
results_t *results = NULL;
- if (g_index)
- results = index_lookup(g_index, LOOKUP_SUBSTR, query);
+ if (query && g_index)
+ results = index_lookup(g_index, query_type, query);
+
+ clock_gettime(CLOCK_REALTIME, &finish);
+
+ /* sort results */
+ if (results)
+ results_sort(results, sort_type, sort_order);
+ /* filter results */
if (results)
+ results = results_filter(results, &filter);
+
+ /* generate response with header, results, and time */
+ float lookup_time = (finish.tv_sec + (0.000000001 * finish.tv_nsec)) -
+ (start.tv_sec + (0.000000001 * start.tv_nsec));
+
+ if (query && results)
snprintf(buff, BUFF_SIZE, index_format_template, query,
+ filter_time_low ? filter_time_low : "",
+ filter_time_high ? filter_time_high : "",
+ filter_size_low ? filter_size_low : "",
+ filter_size_high ? filter_size_high : "",
+ generate_results_header_html(connection, baseurl, sort_type,
+ sort_order, results->size, lookup_time),
generate_results_html(results));
else
- snprintf(buff, BUFF_SIZE, index_format_template, query,
- "indexing in progress... try again later");
+ snprintf(buff, BUFF_SIZE, index_format_template, query ? query : "",
+ "", "indexing in progress... try again later");
+ /* send it */
response = MHD_create_response_from_buffer(strlen(buff), (void*)buff,
MHD_RESPMEM_PERSISTENT);
+ /* cleanup */
if (results)
results_destroy(results);