diff options
| author | arf20 <aruizfernandez05@gmail.com> | 2025-12-07 20:56:40 +0100 |
|---|---|---|
| committer | arf20 <aruizfernandez05@gmail.com> | 2025-12-07 20:56:40 +0100 |
| commit | 00909fc4333b1cefc502dc40afa2c7c06ec7f713 (patch) | |
| tree | 55cc73743966780e0cd51867b5451db14734e784 | |
| parent | 9fc16dba71c6deb26bf09ec5198a8a6d7c4dbb74 (diff) | |
| download | arfnet2-search-00909fc4333b1cefc502dc40afa2c7c06ec7f713.tar.gz arfnet2-search-00909fc4333b1cefc502dc40afa2c7c06ec7f713.zip | |
sorting and filtering working
| -rw-r--r-- | config.c | 2 | ||||
| -rw-r--r-- | config.h | 2 | ||||
| -rw-r--r-- | index.c | 79 | ||||
| -rw-r--r-- | index.h | 21 | ||||
| -rw-r--r-- | index.htm.tmpl | 69 | ||||
| -rw-r--r-- | main.c | 171 |
6 files changed, 290 insertions, 54 deletions
@@ -1,7 +1,7 @@ /* arfnet2-search: Fast file indexer and search - Copyright (C) 2023 arf20 (Ángel Ruiz Fernandez) + Copyright (C) 2025 arf20 (Ángel Ruiz Fernandez) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1,7 +1,7 @@ /* arfnet2-search: Fast file indexer and search - Copyright (C) 2023 arf20 (Ángel Ruiz Fernandez) + Copyright (C) 2025 arf20 (Ángel Ruiz Fernandez) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1,7 +1,7 @@ /* arfnet2-search: Fast file indexer and search - Copyright (C) 2023 arf20 (Ángel Ruiz Fernandez) + Copyright (C) 2025 arf20 (Ángel Ruiz Fernandez) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,6 +20,7 @@ */ +#define _GNU_SOURCE #include "index.h" #include <sys/types.h> @@ -115,6 +116,67 @@ results_insert(results_t *results, const node_data_t *result) results->results[results->size++] = result; } +static int +cmp_results(const void *_r1, const void *_r2, void *arg) +{ + const node_data_t *r1 = *(node_data_t**)_r1, *r2 = *(node_data_t**)_r2; + sort_type_t sort_type = ((int*)arg)[0]; + int desc = ((int*)arg)[1]; + + int cmp = 0; + + switch (sort_type) { + case SORT_NAME: + cmp = strcmp(r1->name, r2->name); + break; + case SORT_PATH: + cmp = strcmp(r1->path, r2->path); + break; + case SORT_MIME: + if (!r1->mime) + return 0; + cmp = strcmp(r1->mime, r2->mime); + break; + case SORT_SIZE: + cmp = r1->stat.st_size - r2->stat.st_size; + break; + case SORT_TIME: + cmp = r1->stat.st_mtime - r2->stat.st_mtime; + break; + } + + return !desc ? cmp : -cmp; +} + +void +results_sort(results_t *results, sort_type_t sort_type, int desc) +{ + int arg[2] = { sort_type, desc }; + qsort_r(results->results, results->size, sizeof(node_data_t*), cmp_results, + &arg); +} + +results_t * +results_filter(results_t *results, const filter_t *filter) +{ + results_t *filtered = results_new(); + for (size_t i = 0; i < results->size; i++) { + const node_data_t *n = results->results[i]; + if (filter->time_low && (n->stat.st_mtime < filter->time_low)) + continue; + if (filter->time_high && (n->stat.st_mtime > filter->time_high)) + continue; + if (filter->size_low && (n->stat.st_size < filter->size_low)) + continue; + if (filter->size_high && (n->stat.st_size > filter->size_high)) + continue; + + results_insert(filtered, n); + } + results_destroy(results); + return filtered; +} + void results_destroy(results_t *results) { @@ -226,13 +288,19 @@ index_lookup_substr(map_t *index, const char *query, } void -index_lookup_substr_nocase(map_t *index, const char *query, +index_lookup_substr_caseinsensitive(map_t *index, const char *query, results_t *results) { } void +index_lookup_exact(map_t *index, const char *query, results_t *results) +{ + +} + +void index_lookup_regex(map_t *index, const char *query, results_t *results) { @@ -248,8 +316,11 @@ index_lookup(map_t *index, lookup_type_t type, const char *query) case LOOKUP_SUBSTR: index_lookup_substr(index, query, results); break; - case LOOKUP_SUBSTR_NOCASE: - index_lookup_substr_nocase(index, query, results); + case LOOKUP_SUBSTR_CASEINSENSITIVE: + index_lookup_substr_caseinsensitive(index, query, results); + break; + case LOOKUP_EXACT: + index_lookup_exact(index, query, results); break; case LOOKUP_REGEX: index_lookup_regex(index, query, results); @@ -1,7 +1,7 @@ /* arfnet2-search: Fast file indexer and search - Copyright (C) 2023 arf20 (Ángel Ruiz Fernandez) + Copyright (C) 2025 arf20 (Ángel Ruiz Fernandez) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,7 +28,8 @@ typedef enum { LOOKUP_SUBSTR, - LOOKUP_SUBSTR_NOCASE, + LOOKUP_SUBSTR_CASEINSENSITIVE, + LOOKUP_EXACT, LOOKUP_REGEX } lookup_type_t; @@ -40,6 +41,19 @@ typedef struct { typedef struct map_s *index_t; +typedef enum { + SORT_NAME, + SORT_MIME, + SORT_PATH, + SORT_SIZE, + SORT_TIME +} sort_type_t; + +typedef struct { + time_t time_low, time_high; + size_t size_low, size_high; +} filter_t; + typedef struct { const node_data_t **results; size_t size, capacity; @@ -50,6 +64,9 @@ void index_deinit(); index_t index_new(size_t icapacity, const char *root, int examine); results_t *index_lookup(index_t index, lookup_type_t type, const char *query); void index_destroy(index_t index); + +void results_sort(results_t *results, sort_type_t sort_type, int desc); +results_t *results_filter(results_t *results, const filter_t *filter); void results_destroy(results_t *results); #endif /* _INDEX_H */ diff --git a/index.htm.tmpl b/index.htm.tmpl index fdb1b58..759625b 100644 --- a/index.htm.tmpl +++ b/index.htm.tmpl @@ -10,6 +10,7 @@ } .box { + margin-bottom: 1em; width: 50%; min-width: 400px; display: block; @@ -41,6 +42,10 @@ width: 200px; } +.advanced { + margin-left: 1em; +} + .collapse-title { font-weight: bold; } @@ -53,6 +58,10 @@ display: inline-block; } +.result-header { + margin-bottom: 1em; +} + .result { margin-left: 1em; margin-bottom: 1em; @@ -97,53 +106,45 @@ <p>Search all of the ARFNET content fast</p> <form class="searchform" action="/query" method="get"> <div class="box form-inline"> - <input class="input" type="text" name="query" value="%s"> + <input class="input" type="text" name="q" value="%s"> <button type="submit">Search</button><br> </div> <div> - <details> + <details class="advanced"> <summary class="collapse-title">Advanced</summary> - <input type="radio" id="substr" name="type" value="substr"> - <label for="substr">substring</label> - <input type="radio" id="substr_nocase" name="type" value="substr_nocase"> - <label for="substr_nocase">case insensitive substring</label> - <input type="radio" id="exact" name="type" value="exact"> - <label for="exact">exact</label> - <input type="radio" id="regex" name="type" value="regex"> - <label for="regex">regex</label> - </details> - <details> - <summary class="collapse-title">Filtering</summary> + <p> + <label class="label">Search type</label> + <input type="radio" id="substr" name="t" value="s" checked="checked"> + <label for="substr">substring</label> + <input type="radio" id="substr_nocase" name="t" value="i"> + <label for="substr_nocase">case insensitive substring</label> + <input type="radio" id="exact" name="t" value="e"> + <label for="exact">exact</label> + <input type="radio" id="regex" name="t" value="r"> + <label for="regex">regex</label> + </p> + <p> <label class="label" for="mtime_start">Timeframe start</label> - <input type="date" id="mtime_start" name="filter_mtime_start"><br> + <input type="date" id="mtime_start" name="ftl" value="%s"><br> + </p> + <p> <label class="label" for="mtime_end">Timeframe end</label> - <input type="date" id="mtime_end" name="filter_mtime_end"><br> + <input type="date" id="mtime_end" name="fth" value="%s"><br> + </p> + <p> <label class="label" for="size_start">Size lower bound</label> - <input type="text" id="size_start" name="filter_size_start"><br> + <input type="text" id="size_start" name="fsl" value="%s"><br> + </p> + <p> <label class="label" for="size_end">Size upper bound</label> - <input type="text" id="size_end" name="filter_size_end"><br> - </details> - <details> - <summary class="collapse-title">Sorting</summary> - <div class="sort-left"> - <input type="radio" name="sort" id="name" value="name"> - <label for="name">name</label><br> - <input type="radio" name="sort" id="time" value="time"> - <label for="time">time</label><br> - <input type="radio" name="sort" id="size" value="size"> - <label for="size">size</label><br> - </div> - <div class="sort-right"> - <input type="radio" name="sort_dir" id="asc" value="asc"> - <label for="asc">ascending</label><br> - <input type="radio" name="sort_dir" id="desc" value="desc"> - <label for="desc">descending</label><br> - </div> + <input type="text" id="size_end" name="fsh" value="%s"><br> + </p> </details> </div> </form> <hr> %s + %s </main> </body> </html> @@ -1,7 +1,7 @@ /* arfnet2-search: Fast file indexer and search - Copyright (C) 2023 arf20 (Ángel Ruiz Fernandez) + Copyright (C) 2025 arf20 (Ángel Ruiz Fernandez) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,6 +20,8 @@ */ +#define _XOPEN_SOURCE 700 /* strptime() without destroying clock_gettime() */ + #include <sys/types.h> #include <sys/select.h> #include <sys/socket.h> @@ -41,13 +43,58 @@ static char *index_format_template = NULL; static index_t g_index = NULL; +static const char *result_html_header = + "<p>%ld results in %f seconds</p>\n" + "<div class=\"result-header\">\n" + "<a class=\"name\" href=\"%s\">Name %s</a><a class=\"mime\" href=\"%s\">mime-type %s</a><br>\n" + "<a class=\"path\" href=\"%s\">path %s</a><div class=\"attrib\">" + "<a class=\"size\" href=\"%s\">Size %s</a>" + "<a class=\"time\" href=\"%s\">Time %s</a></div><br>\n" + "</div>\n"; + static const char *result_html_template = "<div class=\"result\">\n" - "<span class=\"name\">%s</span>""<super class=\"mime\">%s</super><br>\n" - "<a class=\"path\" href=\"%s\">%s</a><div class=\"attrib\"><span class=\"size\">%s</span><span class=\"time\">%s</span></div><br>\n" + "<span class=\"name\">%s</span><super class=\"mime\">%s</super><br>\n" + "<a class=\"path\" href=\"%s\">%s</a><div class=\"attrib\">" + "<span class=\"size\">%s</span>" + "<span class=\"time\">%s</span></div><br>\n" "</div>\n"; static const char * +generate_results_header_html(struct MHD_Connection *connection, const char *baseurl, + sort_type_t sort_type, int sort_order, size_t nresults, float lookup_time) +{ + static char buff[65535], name_url[256], mime_url[256], path_url[256], + size_url[256], time_url[256]; + + *buff = '\0'; + + const char *arrows[] = { "↑", "↓" }; + + char name_order = (sort_type == SORT_NAME) && sort_order ? 'a' : 'd'; + char mime_order = (sort_type == SORT_MIME) && sort_order ? 'a' : 'd'; + char path_order = (sort_type == SORT_PATH) && sort_order ? 'a' : 'd'; + char size_order = (sort_type == SORT_SIZE) && sort_order ? 'a' : 'd'; + char time_order = (sort_type == SORT_TIME) && sort_order ? 'a' : 'd'; + + snprintf(name_url, 256, "%s&s=n&o=%c", baseurl, name_order); + snprintf(mime_url, 256, "%s&s=m&o=%c", baseurl, mime_order); + snprintf(path_url, 256, "%s&s=p&o=%c", baseurl, path_order); + snprintf(size_url, 256, "%s&s=s&o=%c", baseurl, size_order); + snprintf(time_url, 256, "%s&s=t&o=%c", baseurl, time_order); + + snprintf(buff, 65535, result_html_header, nresults, lookup_time, + name_url, arrows[!name_order], + mime_url, arrows[!mime_order], + path_url, arrows[!path_order], + size_url, arrows[!size_order], + time_url, arrows[!time_order] + ); + + return buff; +} + +static const char * sizestr(size_t size) { static char buf[32]; @@ -68,13 +115,12 @@ static const char * generate_results_html(results_t *results) { static char buff[65535], timebuf[256], urlbuf[4096]; - char *pos = buff; for (int i = 0; i < results->size; i++) { const node_data_t *data = results->results[i]; - struct tm *tm_mtim = gmtime(&data->stat.st_mtim.tv_sec); - strftime(timebuf, 256, "%Y-%m-%d %H:%M:%S", tm_mtim); + struct tm *tm_mtim = gmtime(&data->stat.st_mtime); + strftime(timebuf, 256, "%b %d %Y", tm_mtim); snprintf(urlbuf, 4096, "%s%s", subdir, data->path); @@ -117,7 +163,7 @@ enum MHD_Result answer_to_connection( int ret; if (strcmp(method, "GET") == 0 && strcmp(url, "/") == 0) { - snprintf(buff, BUFF_SIZE, index_format_template, "", ""); + snprintf(buff, BUFF_SIZE, index_format_template, "", "", ""); response = MHD_create_response_from_buffer(strlen(buff), (void*)buff, MHD_RESPMEM_PERSISTENT); @@ -127,23 +173,124 @@ enum MHD_Result answer_to_connection( MHD_destroy_response(response); } else if (strcmp(method, "GET") == 0 && strcmp(url, "/query") == 0) { + /* get query */ const char *query = MHD_lookup_connection_value(connection, - MHD_GET_ARGUMENT_KIND, "query"); + MHD_GET_ARGUMENT_KIND, "q"); + + /* get and parse query type */ + lookup_type_t query_type = -1; + const char *query_type_str = MHD_lookup_connection_value(connection, + MHD_GET_ARGUMENT_KIND, "t"); + if (!query_type_str) + query_type_str = "s"; + + if (query_type_str) { + switch (query_type_str[0]) { + case 's': query_type = LOOKUP_SUBSTR; break; + case 'i': query_type = LOOKUP_SUBSTR_CASEINSENSITIVE; break; + case 'e': query_type = LOOKUP_EXACT; break; + case 'r': query_type = LOOKUP_REGEX; break; + } + } else query_type = LOOKUP_SUBSTR; + + /* get and parse sorting */ + sort_type_t sort_type = SORT_NAME; + int sort_order = 0; + const char *sort_type_str = MHD_lookup_connection_value(connection, + MHD_GET_ARGUMENT_KIND, "s"); + const char *sort_order_str = MHD_lookup_connection_value(connection, + MHD_GET_ARGUMENT_KIND, "o"); + if (sort_type_str) { + switch (sort_type_str[0]) { + case 'n': sort_type = SORT_NAME; break; + case 'm': sort_type = SORT_MIME; break; + case 'p': sort_type = SORT_PATH; break; + case 's': sort_type = SORT_SIZE; break; + case 't': sort_type = SORT_TIME; break; + } + } + if (sort_order_str) + sort_order = sort_order_str[0] == 'd'; + + /* get and parse filters */ + const char *filter_time_low = MHD_lookup_connection_value(connection, + MHD_GET_ARGUMENT_KIND, "ftl"); + const char *filter_time_high = MHD_lookup_connection_value(connection, + MHD_GET_ARGUMENT_KIND, "fth"); + const char *filter_size_low = MHD_lookup_connection_value(connection, + MHD_GET_ARGUMENT_KIND, "fsl"); + const char *filter_size_high = MHD_lookup_connection_value(connection, + MHD_GET_ARGUMENT_KIND, "fsh"); + + filter_t filter = { 0 }; + + struct tm filter_tm; + if (strptime(filter_time_low, "%Y-%m-%d", &filter_tm)) + filter.time_low = mktime(&filter_tm); + else + filter.time_low = 0; + + if (strptime(filter_time_high, "%Y-%m-%d", &filter_tm)) + filter.time_high = mktime(&filter_tm); + else + filter.time_high = 0; + + filter.size_low = atoi(filter_size_low); + filter.size_high = atoi(filter_size_high); + + + /* build baseurl with query and filters (no sort) for sort links */ + char baseurl[1024]; + snprintf(baseurl, 1024, "/query?q=%s&t=%s&ftl=%s&fth=%s&fsl=%s&fsh=%s", + query, + query_type_str, + filter_time_low ? filter_time_low : "", + filter_time_high ? filter_time_high : "", + filter_size_low ? filter_size_low : "", + filter_size_high ? filter_size_high : "" + ); + + + /* lookup query in index with type, mesuring time */ + struct timespec start, finish; + clock_gettime(CLOCK_REALTIME, &start); results_t *results = NULL; - if (g_index) - results = index_lookup(g_index, LOOKUP_SUBSTR, query); + if (query && g_index) + results = index_lookup(g_index, query_type, query); + + clock_gettime(CLOCK_REALTIME, &finish); + + /* sort results */ + if (results) + results_sort(results, sort_type, sort_order); + /* filter results */ if (results) + results = results_filter(results, &filter); + + /* generate response with header, results, and time */ + float lookup_time = (finish.tv_sec + (0.000000001 * finish.tv_nsec)) - + (start.tv_sec + (0.000000001 * start.tv_nsec)); + + if (query && results) snprintf(buff, BUFF_SIZE, index_format_template, query, + filter_time_low ? filter_time_low : "", + filter_time_high ? filter_time_high : "", + filter_size_low ? filter_size_low : "", + filter_size_high ? filter_size_high : "", + generate_results_header_html(connection, baseurl, sort_type, + sort_order, results->size, lookup_time), generate_results_html(results)); else - snprintf(buff, BUFF_SIZE, index_format_template, query, - "indexing in progress... try again later"); + snprintf(buff, BUFF_SIZE, index_format_template, query ? query : "", + "", "indexing in progress... try again later"); + /* send it */ response = MHD_create_response_from_buffer(strlen(buff), (void*)buff, MHD_RESPMEM_PERSISTENT); + /* cleanup */ if (results) results_destroy(results); |
