Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- This code is hereby released to the public domain.
- ~aaaaaa123456789, 2014-11-07
- */
- // there's two ways of handling strings
- // Linux uses UTF-8, so strings are still char *, and internationalization works directly
- // Windows uses UTF-16LE, locale-specific ANSI/OEM-to-Unicode conversions, and lots of
- // annoying things that make internationalization a massive clusterfuck from beginning to end
- // if you're wondering why these constants exist, ask Bill Gates
- #define UNICODE
- #define _UNICODE
- #include <windows.h>
- #include <wchar.h>
- #include <stdlib.h>
- #include <string.h>
- #include <stdarg.h>
- enum options {
- OPT_RECURSE = 1,
- OPT_DECIMAL = 2,
- OPT_SHOW_NO_MATCH = 4,
- OPT_FILENAMES_ONLY = 8,
- };
- struct file_info {
- unsigned long long size;
- struct file_info * next;
- wchar_t name[];
- };
- #define dword_to_qword(dw1, dw2) ((((unsigned long long) (unsigned) (dw1)) << 32) | ((unsigned long long) (unsigned) (dw2)))
- #define hex_char_to_number(hex) ( \
- (((hex) >= 0x30) && ((hex) <= 0x39)) ? (hex) - 0x30 : \
- (((hex) >= 0x41) && ((hex) <= 0x46)) ? (hex) - 0x37 : \
- (((hex) >= 0x61) && ((hex) <= 0x66)) ? (hex) - 0x57 : \
- -1 \
- )
- int wmain(int, wchar_t **);
- void console_write(int, const wchar_t *, ...);
- void console_write_line(int, const wchar_t *, ...);
- wchar_t console_get_char(void);
- int parse_options(wchar_t ***, const wchar_t *);
- struct file_info * find_files(const wchar_t *, int);
- struct file_info * get_node_for_file(WIN32_FIND_DATA *, const wchar_t *, int);
- struct file_info * get_dir_contents(const wchar_t *, int);
- struct file_info * find_last_node(struct file_info *);
- void insert_node(struct file_info **, struct file_info **, struct file_info *);
- void destroy_file_info(struct file_info *);
- void print_usage(const wchar_t *);
- unsigned get_match_buffer(wchar_t **, void **);
- unsigned get_match_from_cmdline(wchar_t **, void **);
- unsigned get_match_from_input(void **);
- wchar_t * get_line(void);
- unsigned parse_match_item(wchar_t *, void **);
- void find_in_file(const struct file_info *, const char *, unsigned, int, unsigned char);
- int find_in_buffer(const char *, unsigned, const char *, unsigned, unsigned long long, const wchar_t *, int, char, unsigned char);
- wchar_t * get_error_message(int);
- unsigned long long max_file_size(struct file_info *);
- unsigned char size_length(unsigned long long, int);
- wchar_t * repeat_character(wchar_t, unsigned);
- void print_headers(unsigned char);
- const wchar_t * get_filename(const wchar_t *);
- wchar_t * print_number(wchar_t *, unsigned long long, int, unsigned char);
- const wchar_t * get_dir_separator(const wchar_t *);
- void show_help(const wchar_t *);
- // this is a hack to make things work on GCC, since it doesn't define wmain() -- another one of Microsoft's new spectacular inventions
- #ifdef __GNUC__
- // from <internal.h>, I believe; easier to just copy the declarations
- extern int __wgetmainargs(int *, wchar_t ***, wchar_t ***, int, int *);
- int main (void) {
- int argc;
- wchar_t ** argv;
- wchar_t ** envp;
- int startinfo; // type should be _startupinfo, but that's typedef'd to int
- if (__wgetmainargs(&argc, &argv, &envp, 0, &startinfo) < 0) abort();
- return wmain(argc, argv);
- }
- #endif
- int wmain (int argc, wchar_t ** argv) {
- wchar_t * invocation_name = *(argv ++);
- int options = parse_options(&argv, invocation_name);
- const wchar_t * location = *(argv ++);
- if (!location) print_usage(invocation_name);
- void * match_buffer;
- unsigned match_length = get_match_buffer(argv, &match_buffer);
- if (!match_length) return 1;
- struct file_info * files = find_files(location, options & OPT_RECURSE);
- if (!files) {
- free(match_buffer);
- if (GetLastError()) {
- wchar_t * error_text = get_error_message(GetLastError());
- console_write_line(1, invocation_name, L": error: ", error_text, NULL);
- free(error_text);
- return 2;
- }
- return 0;
- }
- struct file_info * current_file;
- unsigned char size_chars = size_length(max_file_size(files), !(options & OPT_DECIMAL));
- print_headers(size_chars);
- for (current_file = files; current_file; current_file = current_file -> next)
- find_in_file(current_file, match_buffer, match_length, options, size_chars);
- destroy_file_info(files);
- free(match_buffer);
- return 0;
- }
- void console_write (int error, const wchar_t * string, ...) {
- // you'd expect C99 to have a wide char equivalent of stdin and stdout, huh?
- va_list ap;
- HANDLE console = GetStdHandle(error ? STD_ERROR_HANDLE: STD_OUTPUT_HANDLE);
- DWORD dummy; // like variable name, like API call
- va_start(ap, string);
- while (string) {
- WriteConsole(console, string, wcslen(string), &dummy, NULL);
- string = va_arg(ap, const wchar_t *);
- }
- va_end(ap);
- }
- void console_write_line (int error, const wchar_t * string, ...) {
- va_list ap;
- va_start(ap, string);
- while (string) {
- console_write(error, string, NULL);
- string = va_arg(ap, const wchar_t *);
- }
- va_end(ap);
- console_write(error, L"\r\n", NULL);
- }
- wchar_t console_get_char (void) {
- // but there isn't a wide char equivalent of this, so leave it to the poor programmer to rewrite getchar() with syscalls
- wchar_t result;
- DWORD nr;
- nr = ReadConsole(GetStdHandle(STD_INPUT_HANDLE), &result, 1, &nr, NULL) || nr;
- if (!nr) return 0;
- return result;
- }
- int parse_options (wchar_t *** pcmd, const wchar_t * program_name) {
- int options = 0;
- int keep_parsing = 1;
- const wchar_t * option_text;
- while (keep_parsing) {
- if (!**pcmd) return options;
- if (***pcmd != L'-') return options;
- for (option_text = *((*pcmd) ++) + 1; *option_text; option_text ++)
- switch (*option_text) {
- case L'R': options |= OPT_RECURSE; break;
- case L'd': options |= OPT_DECIMAL; break;
- case L'f': options |= OPT_FILENAMES_ONLY; break;
- case L'N': options |= OPT_SHOW_NO_MATCH; break;
- case L'-': keep_parsing = 0; break;
- case L'?': show_help(program_name);
- default: print_usage(program_name);
- }
- }
- return options;
- }
- struct file_info * find_files (const wchar_t * location, int recursive) {
- if (!location) exit(0);
- int len = wcslen(location);
- if (!len) exit(0);
- if (location[len - 1] == L'\\') return get_dir_contents(location, recursive);
- WIN32_FIND_DATA fd;
- HANDLE handle = FindFirstFile(location, &fd);
- if (handle == INVALID_HANDLE_VALUE) {
- if (GetLastError() == ERROR_FILE_NOT_FOUND) SetLastError(0);
- return NULL;
- }
- struct file_info * result = NULL;
- struct file_info * last = NULL;
- struct file_info * current_file = get_node_for_file(&fd, location, recursive);
- if (current_file) insert_node(&result, &last, current_file);
- while (FindNextFile(handle, &fd)) {
- current_file = get_node_for_file(&fd, location, recursive);
- if (current_file) insert_node(&result, &last, current_file);
- }
- if (GetLastError() == ERROR_NO_MORE_FILES) SetLastError(0);
- FindClose(handle);
- return result;
- }
- struct file_info * get_node_for_file (WIN32_FIND_DATA * fd, const wchar_t * search_string, int recursive) {
- if (!(wcscmp(fd -> cFileName, L".") && wcscmp(fd -> cFileName, L".."))) return NULL;
- if (fd -> dwFileAttributes & FILE_ATTRIBUTE_DEVICE) return NULL;
- struct file_info * newNode;
- wchar_t * full_name = malloc(sizeof(wchar_t) * (wcslen(search_string) + wcslen(fd -> cFileName) + 1));
- const wchar_t * dir_separator = get_dir_separator(search_string);
- if (dir_separator) {
- unsigned length = (dir_separator - search_string) + 1;
- memcpy(full_name, search_string, length * sizeof(wchar_t));
- wcscpy(full_name + length, fd -> cFileName);
- } else
- wcscpy(full_name, fd -> cFileName);
- if (fd -> dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
- if (recursive)
- newNode = get_dir_contents(full_name, 1);
- else
- newNode = NULL;
- } else {
- newNode = malloc(sizeof(struct file_info) + sizeof(wchar_t) * (wcslen(full_name) + 1));
- newNode -> next = NULL;
- newNode -> size = dword_to_qword(fd -> nFileSizeHigh, fd -> nFileSizeLow); // someone tell Microsoft that we have 64-bit variables now
- wcscpy(newNode -> name, full_name);
- }
- free(full_name);
- return newNode;
- }
- struct file_info * get_dir_contents (const wchar_t * directory, int recursive) {
- int len = wcslen(directory);
- wchar_t * target = malloc((3 + len) * sizeof(wchar_t));
- memcpy(target, directory, sizeof(wchar_t) * len);
- if (target[len - 1] != L'\\') target[len ++] = L'\\';
- target[len ++] = L'*';
- target[len] = 0;
- struct file_info * files = find_files(target, recursive);
- free(target);
- return files;
- }
- struct file_info * find_last_node (struct file_info * list) {
- if (!list) return NULL;
- while (list -> next) list = list -> next;
- return list;
- }
- void insert_node (struct file_info ** pFirst, struct file_info ** pLast, struct file_info * node) {
- if (!node) return;
- if (!*pFirst)
- *pFirst = node;
- else
- (**pLast).next = node;
- *pLast = find_last_node(node);
- }
- void destroy_file_info (struct file_info * file_info) {
- struct file_info * prev;
- while (file_info) {
- prev = file_info;
- file_info = file_info -> next;
- free(prev);
- }
- }
- void print_usage (const wchar_t * program_name) {
- console_write_line(1, L"usage: ", program_name, L" [-RdfN?] file-mask [hex-string]", NULL);
- exit(1);
- }
- unsigned get_match_buffer (wchar_t ** cmdline, void ** pBuffer) {
- if (*cmdline)
- return get_match_from_cmdline(cmdline, pBuffer);
- else
- return get_match_from_input(pBuffer);
- }
- unsigned get_match_from_cmdline (wchar_t ** cmdline, void ** pBuffer) {
- char * match_buffer = NULL;
- char * temp_buffer;
- unsigned temp_length, length = 0;
- *pBuffer = NULL;
- for (; *cmdline; cmdline ++) {
- temp_length = parse_match_item(*cmdline, (void **) &temp_buffer);
- if (!temp_length) {
- console_write_line(1, L"'", *cmdline, L"' is not a valid hexadecimal byte string", NULL);
- free(match_buffer);
- return 0;
- }
- match_buffer = realloc(match_buffer, length + temp_length);
- memcpy(match_buffer + length, temp_buffer, temp_length);
- length += temp_length;
- free(temp_buffer);
- }
- *pBuffer = match_buffer;
- return length;
- }
- unsigned get_match_from_input (void ** pBuffer) {
- console_write(1, L"Enter search string: ", NULL);
- wchar_t * input_line = get_line();
- wchar_t * current = input_line;
- wchar_t * limit;
- wchar_t * token;
- char * result = NULL;
- void * item;
- unsigned item_length, length = 0;
- while (*current) {
- while (*current == L' ') current ++;
- if (!*current) break;
- limit = wcschr(current, L' ');
- if (!limit) limit = current + wcslen(current);
- token = malloc(sizeof(wchar_t) * (limit - current + 1));
- memcpy(token, current, sizeof(wchar_t) * (limit - current));
- token[limit - current] = 0;
- item_length = parse_match_item(token, &item);
- if (item_length) {
- free(token);
- result = realloc(result, length + item_length);
- memcpy(result + length, item, item_length);
- length += item_length;
- } else {
- console_write_line(1, L"'", token, L"' is not a valid hexadecimal byte string", NULL);
- free(token);
- free(result);
- free(input_line);
- *pBuffer = NULL;
- return 0;
- }
- current = limit;
- }
- free(input_line);
- *pBuffer = result;
- return length;
- }
- unsigned parse_match_item (wchar_t * item, void ** result) {
- *result = NULL;
- if (!(item && *item)) return 0;
- unsigned length = wcslen(item);
- if (length % 2) return 0;
- length >>= 1;
- unsigned char * buffer = malloc(length);
- unsigned pos;
- for (pos = 0; pos < length; pos ++) {
- if ((hex_char_to_number(item[pos * 2]) < 0) || (hex_char_to_number(item[pos * 2 + 1]) < 0)) {
- free(buffer);
- return 0;
- }
- buffer[pos] = hex_char_to_number(item[pos * 2]) * 16 + hex_char_to_number(item[pos * 2 + 1]);
- }
- *result = buffer;
- return length;
- }
- wchar_t * get_line (void) {
- wchar_t * line = NULL;
- unsigned length = 0;
- wchar_t character;
- do {
- character = console_get_char();
- if ((character == L'\r') || (character == L'\n')) break;
- line = realloc(line, sizeof(wchar_t) * (length + 1));
- line[length ++] = character;
- } while (character);
- if (character == L'\r') console_get_char(); // Windows newline is \r\n, and of course the console doesn't convert that to \n
- line = realloc(line, sizeof(wchar_t) * (length + 1));
- line[length] = 0;
- return line;
- }
- void find_in_file (const struct file_info * file, const char * search_item, unsigned length, int options, unsigned char size_chars) {
- const wchar_t * filename = (options & OPT_FILENAMES_ONLY) ? get_filename(file -> name) : file -> name;
- HANDLE fh = CreateFile(file -> name, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL);
- if (fh == INVALID_HANDLE_VALUE) {
- wchar_t * strbuf = repeat_character(L' ', size_chars - 5);
- console_write_line(0, strbuf, L"error ", filename, NULL);
- free(strbuf);
- return;
- }
- char * read_buffer = NULL;
- unsigned buffer_length = 4 << 20;
- while ((buffer_length >> 1) < length) buffer_length <<= 1;
- while (!read_buffer) {
- if ((buffer_length >> 1) < length) {
- console_write_line(1, L"*** out of memory ***", NULL);
- abort();
- }
- read_buffer = malloc(buffer_length);
- buffer_length >>= 1;
- }
- DWORD read_length; // uint32_t. Windows just doesn't like standard stuff. Reinventing the wheel FTW!
- ReadFile(fh, read_buffer, buffer_length << 1, &read_length, NULL);
- int found = 0;
- if (read_length < (buffer_length << 1)) {
- found = find_in_buffer(read_buffer, read_length, search_item, length, 0, filename, !(options & OPT_DECIMAL), 0, size_chars);
- goto end; // you don't like goto? I don't like bracing and indenting lots of lines on an else just to avoid one. Moving on...
- }
- unsigned long long offset = 0;
- do {
- found |= find_in_buffer(read_buffer, buffer_length, search_item, length, offset, filename, !(options & OPT_DECIMAL), 1, size_chars);
- offset += buffer_length;
- memcpy(read_buffer, read_buffer + buffer_length, buffer_length);
- ReadFile(fh, read_buffer + buffer_length, buffer_length, &read_length, NULL);
- } while (read_length == buffer_length);
- found |= find_in_buffer(read_buffer, buffer_length + read_length, search_item, length, offset, filename, !(options & OPT_DECIMAL), 0, size_chars);
- end:
- CloseHandle(fh);
- free(read_buffer);
- if ((options & OPT_SHOW_NO_MATCH) && !found) {
- wchar_t * strbuf = repeat_character(L' ', size_chars - 8);
- console_write_line(0, strbuf, L"no match ", filename, NULL);
- free(strbuf);
- }
- }
- int find_in_buffer (const char * buffer, unsigned buffer_length, const char * search, unsigned search_length, unsigned long long base_offset,
- const wchar_t * filename, int use_hex, char more_data, unsigned char size_chars) {
- // 9 parameters in a function, feels like I'm finally getting along with the Windows API
- if (!search_length) return 0;
- if (!more_data) {
- if (buffer_length < search_length) return 0;
- buffer_length -= search_length - 1;
- }
- unsigned pos;
- int found = 0;
- wchar_t * strbuf = malloc(sizeof(wchar_t) * (size_chars + 1));
- for (pos = 0; pos < buffer_length; pos ++) {
- if (memcmp(buffer + pos, search, search_length)) continue;
- console_write_line(0, print_number(strbuf, base_offset + pos, use_hex, size_chars), L" ", filename, NULL);
- found = 1;
- }
- free(strbuf);
- return found;
- }
- wchar_t * get_error_message (int error_code) {
- wchar_t * error_message = malloc(1200 * sizeof(wchar_t));
- unsigned rv = FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM + FORMAT_MESSAGE_IGNORE_INSERTS + 79, NULL, error_code, 0, error_message, 1200, NULL);
- // the above line does indeed return the error text for a certain code -- it also shows the API's efforts in aiding you with code obfuscation
- return realloc(error_message, sizeof(wchar_t) * (1 + rv));
- }
- unsigned long long max_file_size (struct file_info * files) {
- unsigned long long max = 0;
- while (files) {
- if (files -> size > max) max = files -> size;
- files = files -> next;
- }
- return max;
- }
- unsigned char size_length (unsigned long long max_size, int use_hex) {
- unsigned char result = 0;
- while (max_size) {
- result ++;
- max_size /= use_hex ? 16 : 10;
- }
- if (result && !use_hex) result += (result - 1) / 3;
- return (result < 8) ? 8 : result;
- }
- wchar_t * repeat_character (wchar_t character, unsigned count) {
- wchar_t * result = malloc(sizeof(wchar_t) * (count + 1));
- wchar_t * current;
- for (current = result; count; current ++, count --) *current = character;
- *current = 0;
- return result;
- }
- void print_headers (unsigned char size_chars) {
- wchar_t * padding = repeat_character(L' ', size_chars - 8);
- console_write_line(0, L"Location", padding, L" Filename", NULL);
- free(padding);
- padding = repeat_character(L'-', size_chars);
- console_write_line(0, padding, L" --------", NULL);
- free(padding);
- }
- const wchar_t * get_filename (const wchar_t * path) {
- const wchar_t * backslash = get_dir_separator(path);
- if (!backslash) return path;
- return backslash + 1;
- }
- wchar_t * print_number (wchar_t * buf, unsigned long long number, int use_hex, unsigned char size) {
- wchar_t * current;
- unsigned char digit;
- for (current = buf; size; current ++, size --) *current = L' ';
- *current = 0;
- if (!number) {
- current[-1] = L'0';
- return buf;
- }
- use_hex = use_hex ? 16 : 10;
- size = 0;
- while (number) {
- digit = number % use_hex;
- number /= use_hex;
- if ((use_hex == 10) && size && !(size % 3)) *(-- current) = L',';
- size ++;
- *(-- current) = L"0123456789ABCDEF"[digit];
- }
- return buf;
- }
- const wchar_t * get_dir_separator (const wchar_t * path) {
- const wchar_t * backslash = wcsrchr(path, L'\\');
- const wchar_t * fwdslash = wcsrchr(path, L'/');
- return (backslash > fwdslash) ? backslash : fwdslash;
- }
- void show_help (const wchar_t * program_name) {
- const wchar_t * help_text_1 = L"This program searches one or more files for a certain byte string.\r\n\r\n";
- const wchar_t * invocation = L" [-RdfN?] file-mask [hex-string]\r\n\r\n";
- const wchar_t * help_text_2 =
- L"The file mask allows wildcards (? and *). The hex string must be a string of\r\n"
- L"valid hexadecimal characters. Spaces are allowed as long as each token has an\r\n"
- L"even number of characters (i.e., each token must contain an integral number of\r\n"
- L"bytes). If no hex string is entered, the program will prompt for one.\r\n\r\n";
- const wchar_t * options =
- L"The options have the following meanings:\r\n"
- L" -R search recursively (if a directory is found, search its contents)\r\n"
- L" -d show offsets in decimal (default is hexadecimal)\r\n"
- L" -f show filenames only (default is to show the path, relative or absolute\r\n"
- L" depending on the file mask)\r\n"
- L" -N show also the files for which no match is found (\"no match\" will be\r\n"
- L" shown as offset)\r\n"
- L" -- stop parsing options (useful if the filename begins with -)\r\n"
- L" -? show this help text and quit\r\n";
- console_write(0, help_text_1, L"usage: ", program_name, invocation, help_text_2, options, NULL);
- exit(0);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement