diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml new file mode 100644 index 0000000..8a9c809 --- /dev/null +++ b/.github/workflows/cmake-multi-platform.yml @@ -0,0 +1,153 @@ +name: CMake Multi-Platform + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +env: + BUILD_TYPE: Release + +jobs: + build-ubuntu: + runs-on: ubuntu-latest + strategy: + matrix: + compiler: [gcc, clang] + + steps: + - uses: actions/checkout@v4 + + - name: Set up compiler + run: | + if [ "${{ matrix.compiler }}" == "gcc" ]; then + sudo apt-get update + sudo apt-get install -y g++ + elif [ "${{ matrix.compiler }}" == "clang" ]; then + sudo apt-get update + sudo apt-get install -y clang + fi + shell: bash + + - name: Configure CMake + run: cmake -DCMAKE_BUILD_TYPE=${{ env.BUILD_TYPE }} -B build + shell: bash + + - name: Build with CMake + run: cmake --build build + shell: bash + + - name: Install Library + run: cmake --install build --prefix ${{ github.workspace }}/mime_magic + shell: bash + + - name: Upload Build Artifacts + uses: actions/upload-artifact@v3 + with: + name: build-artifacts-ubuntu-${{ matrix.compiler }} + path: ${{ github.workspace }}/mime_magic + + test-ubuntu: + runs-on: ubuntu-latest + needs: build-ubuntu + strategy: + matrix: + compiler: [gcc, clang] + + steps: + - uses: actions/checkout@v4 + + - name: Download Build Artifacts + uses: actions/download-artifact@v3 + with: + name: build-artifacts-ubuntu-${{ matrix.compiler }} + path: ${{ github.workspace }}/mime_magic + + - name: Configure Tests + working-directory: ${{ github.workspace }}/tests + run: cmake -DCMAKE_BUILD_TYPE=${{ env.BUILD_TYPE }} -B build + shell: bash + + - name: Build Tests + working-directory: ${{ github.workspace }}/tests + run: cmake --build build + shell: bash + + - name: Run Tests + working-directory: ${{ github.workspace }}/tests/build + run: ./tests + shell: bash + + build-windows: + runs-on: windows-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up MSBuild + uses: microsoft/setup-msbuild@v1.1 + + - name: Install Visual Studio Build Tools + run: | + choco install visualstudio2019buildtools --package-parameters "--add Microsoft.VisualStudio.Workload.VCTools --includeRecommended --includeOptional --quiet --norestart" + shell: cmd + + - name: Configure CMake + run: cmake -G "Visual Studio 16 2019" -B ${{ github.workspace }}/build + shell: cmd + + - name: Build with CMake + run: cmake --build ${{ github.workspace }}/build --config ${{ env.BUILD_TYPE }} + shell: cmd + + - name: Install Library + run: cmake --install ${{ github.workspace }}/build --config ${{ env.BUILD_TYPE }} --prefix ${{ github.workspace }}/mime_magic + shell: cmd + + - name: Upload Build Artifacts + uses: actions/upload-artifact@v3 + with: + name: build-artifacts-windows + path: ${{ github.workspace }}/mime_magic + + test-windows: + runs-on: windows-latest + needs: build-windows + steps: + - uses: actions/checkout@v4 + + - name: Download Build Artifacts + uses: actions/download-artifact@v3 + with: + name: build-artifacts-windows + path: ${{ github.workspace }}/mime_magic + + - name: Configure Tests + working-directory: ${{ github.workspace }}/tests + run: cmake -G "Visual Studio 16 2019" -B build + shell: cmd + + - name: Build Tests + working-directory: ${{ github.workspace }}/tests + run: cmake --build build --config ${{ env.BUILD_TYPE }} + shell: cmd + + - name: Run Tests + working-directory: ${{ github.workspace }}/tests/build + run: ./tests + shell: cmd + + send_message: + runs-on: ubuntu-latest + needs: [test-ubuntu, test-windows] + steps: + - name: Send telegram message + uses: appleboy/telegram-action@master + with: + to: ${{ secrets.TELEGRAM_TO }} + token: ${{ secrets.TELEGRAM_TOKEN }} + message: | + mime_magic ready to merge + Выполнил: ${{ github.actor }} + Сообщение коммита: ${{ github.event.commits[0].message }} + Ссылка на коммит: https://github.com/${{ github.repository }}/commit/${{ github.sha }} diff --git a/CMakeLists.txt b/CMakeLists.txt index c32be7b..23da650 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,26 +1,47 @@ cmake_minimum_required(VERSION 3.23) -project(mime_magic) +project(mime_magic VERSION 1.0.1 LANGUAGES CXX) -include_directories( - src - src/loader - srd/node +add_subdirectory(src) + +add_custom_target(copy_docs + COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/docs ${CMAKE_BINARY_DIR}/docs + COMMENT "Copying documentation" +) + +add_custom_target(copy_examples + COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/examples ${CMAKE_BINARY_DIR}/examples + COMMENT "Copying examples" +) + +add_dependencies(${PROJECT_NAME}_shared copy_docs) +add_dependencies(${PROJECT_NAME}_shared copy_examples) +add_dependencies(${PROJECT_NAME}_static copy_docs) +add_dependencies(${PROJECT_NAME}_static copy_examples) + +# Создание и установка конфигурационных файлов +include(CMakePackageConfigHelpers) +write_basic_package_version_file( + "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake" + VERSION ${PROJECT_VERSION} + COMPATIBILITY AnyNewerVersion ) -add_library( - ${PROJECT_NAME} - STATIC - src/node/mime_node.cpp - src/loader/mime_loader.cpp +configure_file(src/mime_magicConfig.cmake.in + "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake" + @ONLY ) -add_executable( - ${PROJECT_NAME}_test - test/main.cpp +install(DIRECTORY ${CMAKE_BINARY_DIR}/docs DESTINATION ./) +install(DIRECTORY ${CMAKE_BINARY_DIR}/examples DESTINATION ./) + +install(FILES + "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake" + DESTINATION lib/cmake/${PROJECT_NAME} ) -target_link_libraries( - ${PROJECT_NAME}_test - PUBLIC - ${PROJECT_NAME} -) \ No newline at end of file +install(EXPORT ${PROJECT_NAME}Targets + FILE ${PROJECT_NAME}Targets.cmake + NAMESPACE ${PROJECT_NAME}:: + DESTINATION lib/cmake/${PROJECT_NAME} +) diff --git a/README.md b/README.md index a0a836d..9a2cf65 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,40 @@ -# cpp-mime-magic -A MIME magic parser for identifying file types. +# MIME Magic + +## Overview +MIME magic is a cross-platform library that +is an implementation of the standard +and has a number of differences from the standard, +designed for parsing and executing magic files. + +## Deployment +To deploy the library you need +follow these steps: + +### Configure project +In the below command you are free to change CMAKE_BUILD_TYPE +if you want to configure the Debug version +and the build directory to your own. +```shell +cmake -DCMAKE_BUILD_TYPE=Release -B cmake_build_release +``` +> [!IMPORTANT] +> If you are building a library for msbuild +> specifying BUILD_TYPE is not necessary. +###Building the project +```shell +Use the command below to build the library. +You can change the build directory. +cmake --build cmake_build_release +``` +> [!IMPORTANT] +> If you are building a library for msbuild +> add `--config {BUILD_TYPE}` + +### Installing the library +Use below command +```shell +cmake --install ./build --prefix ./mime_magic +``` +> [!IMPORTANT] +> If you are building a library for msbuild +> add `--config {BUILD_TYPE}` \ No newline at end of file diff --git a/bootstrap.bat b/bootstrap.bat new file mode 100644 index 0000000..b228553 --- /dev/null +++ b/bootstrap.bat @@ -0,0 +1,22 @@ +@echo off + +REM Check if the installation path argument is passed +if "%~1"=="" ( + set /p INSTALLATION_PATH="Enter the installation path (for example, C:\Program Files\MyApp): " +) else ( + set INSTALLATION_PATH=%~1 +) + +REM Create a build directory +cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -S . -B build +cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -S . -B build + +REM Putting the project together +cmake --build build + +REM Install the project +cmake --install build --prefix %INSTALLATION_PATH% + +rmdir /s /q build + +pause diff --git a/examples/basic_project/CMakeLists.txt b/examples/basic_project/CMakeLists.txt new file mode 100644 index 0000000..4e5f5e7 --- /dev/null +++ b/examples/basic_project/CMakeLists.txt @@ -0,0 +1,12 @@ +cmake_minimum_required(VERSION 3.23) +project(test LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) + +set(CMAKE_PREFIX_PATH ..\\mime_magic\\lib\\cmake\\MimeMagic) + +find_package(MimeMagic REQUIRED) + +add_executable(${PROJECT_NAME} main.cpp) + +target_link_libraries(${PROJECT_NAME} MimeMagic::MimeMagic_static) \ No newline at end of file diff --git a/examples/basic_project/main.cpp b/examples/basic_project/main.cpp new file mode 100644 index 0000000..608ffb3 --- /dev/null +++ b/examples/basic_project/main.cpp @@ -0,0 +1,84 @@ +#include +#include +#include +#include +#include + +#include "loader/mime_loader.h" + +class Timer { + public: + Timer() { + start = std::chrono::high_resolution_clock::now(); + } + + ~Timer() { + auto end = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(end - start); + std::cout << "Time taken: " << duration.count() << " ms" << std::endl; + } + + private: + std::chrono::time_point start; +}; + +int main() { + using namespace std; + boolalpha(cout); + system("chcp 1251"); + + magic::mime_list nodes; +// You can check how it fast +// { +// Timer t; +// nodes = std::move(magic::load("C:\\Sophus-NEW\\modules\\files.etl")); +// } + + nodes = magic::load("magic.etl"); + + cout << nodes.size() << " node workers SUCCESSFULLY LOADED!" << endl; + + vector data; + data.resize(30); + + { + ifstream png("image.png", ios::in | ios::binary); + png.read(data.data(), data.size()); + + cout << "PNG" << endl; + cout << std::string(80, '=') << endl << endl; + int i = 1; + for (const auto& node: nodes) { + auto response = node->process_data(data.data(), data.size()); + if (response.has_value()) { + cout << dec << i << hex << ")\n" << response.value() << endl; + cout << endl << std::string(80, '=') << endl << endl; + break; + } + ++i; + } + } + data.clear(); + data.shrink_to_fit(); + data.resize(30); + + { + ifstream corrupt_png("corrupted-image.png", ios::in | ios::binary); + corrupt_png.read(data.data(), data.size()); + + cout << "CORRUPT PNG" << endl; + cout << std::string(80, '=') << endl << endl; + size_t i = 1; + for (const auto& node: nodes) { + auto response = node->process_data(data.data(), data.size()); + if (response.has_value()) { + cout << dec << i << hex << ")\n" << response.value() << endl; + cout << endl << std::string(80, '=') << endl << endl; + break; + } + ++i; + } + } + + return 0; +} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..012f872 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,45 @@ +cmake_minimum_required(VERSION 3.23) +project(mime_magic LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) + +set(CURRENT_DIR ${CMAKE_CURRENT_LIST_DIR}) + +#set(CMAKE_CXX_FLAGS "-Wall -Wextra -pedantic") + +include_directories( + ${CURRENT_DIR} + ${CURRENT_DIR}/loader + ${CURRENT_DIR}/nodes +) + +add_library( + ${PROJECT_NAME}_static + STATIC + loader/mime_loader.cpp + nodes/basic_mime_node.cpp + nodes/date_node/date_node.cpp + nodes/string_node/string_node.cpp + nodes/numeric_node/numeric_node.cpp +) + +add_library( + ${PROJECT_NAME}_shared + SHARED + loader/mime_loader.cpp + nodes/basic_mime_node.cpp + nodes/date_node/date_node.cpp + nodes/string_node/string_node.cpp + nodes/numeric_node/numeric_node.cpp +) + +# Installing libraries and headers +install(TARGETS ${PROJECT_NAME}_static ${PROJECT_NAME}_shared + EXPORT ${PROJECT_NAME}Targets + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin +) + +install(DIRECTORY loader/ DESTINATION include/loader FILES_MATCHING PATTERN "*.h") +install(DIRECTORY nodes/ DESTINATION include/nodes FILES_MATCHING PATTERN "*.h") diff --git a/src/loader/loading_error.h b/src/loader/loading_error.h new file mode 100644 index 0000000..80270b5 --- /dev/null +++ b/src/loader/loading_error.h @@ -0,0 +1,50 @@ +#ifndef _MIME_MAGIC_LOADING_ERROR_H_ +#define _MIME_MAGIC_LOADING_ERROR_H_ + +#include +#include + +namespace magic { + + class loading_error : public std::exception { + public: + + loading_error() = default; + + explicit loading_error( + size_t line, + const std::string& message, + const std::string& recommendation = "" + ) : + line_ {line}, + message_ {message}, + recommendation_ {recommendation} + { + } + + loading_error(const loading_error& other) = default; + + loading_error(loading_error&& other) = default; + + [[nodiscard]] const char *what() const noexcept override { + std::string result {"Loading error: "}; + + result += message_; + result += "\nIn line: "; + result += std::to_string(line_); + if (!recommendation_.empty()) { + result += "\nRecommendation"; + result += recommendation_; + } + return result.c_str(); + } + + private: + + size_t line_ {}; + std::string message_ {}; + std::string recommendation_ {}; + }; +} + +#endif //_MIME_MAGIC_LOADING_ERROR_H_ diff --git a/src/loader/mime_loader.cpp b/src/loader/mime_loader.cpp index 01e2041..7145ea6 100644 --- a/src/loader/mime_loader.cpp +++ b/src/loader/mime_loader.cpp @@ -1,37 +1,31 @@ #include "mime_loader.h" +#include "nodes/date_node/date_node.h" +#include "nodes/numeric_node/numeric_node.h" +#include "nodes/string_node/string_node.h" +#include "loading_error.h" + #include #include #include #include +#include +#include +#include using std::string; using std::string_view; using namespace magic; -class line_counter { - public: - - operator string() { - return cnt_view; - } - - string operator++() { - return cnt_view = std::to_string(++line_cnt); - } - - string operator--() { - return cnt_view = std::to_string(--line_cnt); - } - - private: - size_t line_cnt {}; - string cnt_view {'0'}; -} current_line; +size_t current_line; void remove_operands(string_view& value, string_view operands) { for (char c: operands) { + if (c == 'x' && value == "x") { + value.remove_prefix(1); + return; + } if (value.front() == c) { value.remove_prefix(1); return; @@ -43,17 +37,17 @@ char parse_symcode(string_view line) { using namespace std::literals; if (line[1] == 'x') { - return std::stoi(string(line.substr(2)), nullptr, 16); + return static_cast(std::stoi(string(line.substr(2)), nullptr, 16)); } if (isdigit(line[1])) { - return std::stoi(string(line.substr(1))); + return static_cast(std::stoi(string(line.substr(1)))); } - throw std::runtime_error { - "Syntax error: Invalid symbol code\n" - "In line: "s + string(current_line) + "\n" - + "\tCode: "s + line[0] + throw loading_error { + current_line, + "Invalid symbol code"s, + std::string {line[0]} }; } @@ -68,7 +62,9 @@ const std::unordered_set escape { '\'', '\"', '\?', 'a', 'b', 'f', 'n', 'r', 't', - 'v', '\\', '0' + 'v', '\\', '0', + ' ', '=', '>', + '<', 'x' }; char parse_escape(char c) { @@ -99,17 +95,29 @@ char parse_escape(char c) { return '\\'; case '0': return '\0'; + case ' ': + return ' '; + case '=': + return '='; + case '<': + return '<'; + case '>': + return '>'; + case 'x': + return 'x'; default: - throw std::runtime_error { - "Syntax error: Invalid escape sequence\n" - "In line: "s + string(current_line) + "\n"s - + "\tSequence: \\"s + c + throw loading_error { + current_line, + "Invalid escape sequence"s, + "\\"s + c }; } } string parse_string(string_view line) { + remove_operands(line, "=<>x"); string result; + for (size_t i = 0; i < line.size(); ++i) { if (line[i] == '\\') { if (line[i + 1] == 'x' || (isdigit(line[i + 1]) && i + 2 < line.size() && line[i + 2] != '\\')) { @@ -128,158 +136,282 @@ string parse_string(string_view line) { return result; } -uint32_t parse_raw_value(string_view raw_value) { +int64_t parse_single_raw_value(string_view raw_value) { using namespace std::literals; if (raw_value == "x") { return 1; } + if ((raw_value[0] == '0' && raw_value.size() == 1) || isdigit(raw_value[0])) { + return std::stoll(string(raw_value), nullptr, 10); + } + if (raw_value.front() == '0') { if (raw_value.size() < 2) { return 0l; } if (isdigit(raw_value[1])) { - return std::stoul(string(raw_value)); + return std::stoll(string(raw_value), nullptr, 8); } if (raw_value[1] == 'x') { - return std::stoul(string(raw_value), nullptr, 16); + return std::stoll(string(raw_value), nullptr, 16); } - throw std::runtime_error { - "Syntax error: Invalid value\n" - "In line: "s + string(current_line) - + "\tValue: "s + string(raw_value) + throw loading_error { + current_line, + "Invalid value"s, + " value = {"s + string(raw_value) + '}' }; } - return std::stoul(string(raw_value)); + + if (raw_value.empty()) { + return 0; + } + + return std::stoll(string(raw_value)); } -mime_node::value parse_value(string_view raw_type, string_view raw_value) { +// Supports only minus and plus expressions +int64_t parse_raw_value(string_view raw_value) { using namespace std::literals; + try { + if(raw_value.empty()) { + return std::numeric_limits::max(); + } + if (raw_value.front() == '(') { + if (raw_value.back() != ')') { + throw loading_error { + current_line, + "Parentheses error"s + }; + } + raw_value.remove_prefix(1); + raw_value.remove_suffix(1); - mime_node::value result_value; + size_t operator_pos {raw_value.find('+')}; + if (operator_pos == string_view::npos) { + operator_pos = raw_value.find('-'); + } - if (raw_type == "string"s) { - remove_operands(raw_value, "=!<>"); - return parse_string(raw_value); + // If operator not found + if (operator_pos == string_view::npos) { + return parse_single_raw_value(raw_value); + } + const int64_t lhs {parse_single_raw_value(raw_value.substr(0, operator_pos))}; + const int64_t rhs {parse_single_raw_value(raw_value.substr(operator_pos + 1))}; + if (raw_value[operator_pos] == '+') { + return lhs + rhs; + } + return lhs - rhs; + } + return parse_single_raw_value(raw_value); + + } catch (std::invalid_argument&) { + throw loading_error { + current_line, + "Invalid date value"s, + "raw_value = " + std::string {raw_value} + }; } - remove_operands(raw_value, "=!<>&|^"); +} - size_t mask_pos {raw_type.find('&')}; +operands parse_operand(string_view line, string_view required_operands = "") { + if (!required_operands.empty() && required_operands.find(line.front()) == string_view::npos) { + return operands::equal; + } - string mask; - if (mask_pos != string_view::npos) { - mask = raw_type.substr(mask_pos + 1); - raw_type.remove_suffix(raw_type.size() - mask_pos); // TODO: See + if (line == "x") { + return operands::any; } + switch (line.front()) { + case '<': + return operands::less_than; + case '>': + return operands::greater_than; + case '=': + return operands::equal; + case '!': + return operands::not_equal; + case '&': + return operands::bit_and; + case '|': + return operands::bit_or; + case '^': + return operands::bit_xor; + default: + return operands::equal; + } +} - uint32_t value; +struct node_context { + size_t offset {}; + std::string message {}; + mime_list mimes {}; - try { - value = parse_raw_value(raw_value); - } catch (std::invalid_argument&) { - throw std::runtime_error { - "Syntax Error: Invalid raw value\n" - "In line: "s + string {current_line} + '\n' - + "\tRaw value: "s + string {raw_value} - }; + node_context() = default; + + node_context(size_t offset, std::string message, mime_list&& mimes) : offset(offset), message(std::move(message)), + mimes(std::move(mimes)) { } - if (raw_type == "byte") { - if (mask.empty()) { - return mime_data { - static_cast(value) - }; + node_context(const node_context&) = delete; + + node_context(node_context&&) = default; +}; + +std::unique_ptr create_string(node_context context, std::string_view raw_type, string_view raw_value) { + string_node::options option {string_node::options::none}; + raw_type.remove_prefix(6); // Removing "string" + if (!raw_type.empty() && raw_type.front() == '/') { + // Parse options + if (raw_type.find('c') != string_view::npos) { + option = string_node::options::not_case_sensitive; } - return mime_data { - static_cast(value), - static_cast(parse_raw_value(mask)) - }; } - if (raw_type.substr(0, 2) == "be"sv) { - raw_type.remove_prefix(2); - if (raw_type == "short"sv) { - if (mask.empty()) { - return mime_data { - static_cast(value), - mime_data::be - }; - } - return mime_data { - static_cast(value), - static_cast(parse_raw_value(mask)), - mime_data::be - }; + return std::make_unique( + context.offset, + string_node::data_template { + parse_string(raw_value), + option, + parse_operand(raw_value, "=<>x") + }, + context.message, + std::move(context.mimes) + ); +} + +std::unique_ptr create_date(node_context context, std::string_view raw_type, string_view raw_value) { + using namespace std::literals; + date_node::data_template data; + if (raw_type.substr(2) == "be"sv + ) { + data.normalize_byte_order = utils::change_order; + } else { + data.normalize_byte_order = [](auto val) { return val; }; + } + data.operand = parse_operand(raw_value, "=!<>x"); + remove_operands(raw_value, "=!<>x"); + data.value = parse_raw_value(raw_value); + return std::make_unique( + context.offset, + data, + context.message, + std::move(context.mimes) + ); +} + +std::unique_ptr +create_numeric(node_context context, std::string_view raw_type, string_view raw_value) { + using namespace std::literals; + operands operand = parse_operand(raw_value); + remove_operands(raw_value, "=!<>&|^x"); + + uint64_t mask {~0ull}; + { + size_t mask_pos = raw_type.find('&'); + string tmp_mask; + if (mask_pos != string_view::npos) { + tmp_mask = raw_type.substr(mask_pos + 1); + raw_type.remove_suffix(raw_type.size() - mask_pos); } - if (raw_type == "date"sv || raw_type == "long"sv) { - if (mask.empty()) { - return mime_data { - value, - mime_data::be - }; - } - return mime_data { - value, - parse_raw_value(mask), - mime_data::be - }; + if (!tmp_mask.empty()) { + mask = parse_raw_value(tmp_mask); } - throw std::runtime_error { - "Syntax error: Invalid type\n" - "In line: "s + string(current_line) + '\n' - + "Type: "s + string(raw_type) - }; } - if (raw_type == "short"sv || raw_type == "leshort"sv) { - if (mask.empty()) { - return mime_data { - static_cast(value) - }; - } - return mime_data( - static_cast(value), - static_cast(parse_raw_value(mask)) + numeric_node::type final_value; + numeric_node::type final_mask; + std::function byte_order_normalizer = [](char *, size_t) { + }; + + if (raw_type == "byte"sv + ) { + return std::make_unique( + context.offset, + numeric_node::data_template { + static_cast(parse_raw_value(raw_value)), + static_cast(mask), + operand, + byte_order_normalizer + }, + context.message, + std::move(context.mimes) ); } - if (raw_type == "long"sv || raw_type == "lelong"sv || raw_type == "date"sv || raw_type == "ledate"sv) { - if (mask.empty()) { - return mime_data {value}; + + bool sign = raw_type.front() != 'u'; + if (!sign) { + raw_type.remove_prefix(1); + } + + if (raw_type.substr(0, 2) == "be") { + raw_type.remove_prefix(2); + byte_order_normalizer = utils::change_raw_order; + } else if (raw_type.substr(0, 2) == "le") { + raw_type.remove_prefix(2); + } + + if (raw_type == "short") { + if (sign) { + final_value = static_cast(parse_raw_value(raw_value)); + final_mask = static_cast(mask); + } else { + final_value = static_cast(parse_raw_value(raw_value)); + final_mask = static_cast(mask); } - return mime_data( - value, - parse_raw_value(mask) - ); + } else if (raw_type == "long") { + if (sign) { + final_value = static_cast(parse_raw_value(raw_value)); + final_mask = static_cast(mask); + } else { + final_value = static_cast(parse_raw_value(raw_value)); + final_mask = static_cast(mask); + } + } else { + throw loading_error { + current_line, + "Unknown type"s, + "raw_type = " + std::string {raw_type} + }; } - throw std::runtime_error { - "Syntax error: Invalid type\n" - "In line: "s + string(current_line) + '\n' - + "Type: "s + string(raw_type) - }; + return std::make_unique( + context.offset, + numeric_node::data_template { + final_value, + final_mask, + operand, + byte_order_normalizer + }, + context.message, + std::move(context.mimes) + ); } -mime_node::operands parse_operand(string_view line) { - switch (line.front()) { - case '<': - return mime_node::operands::less_than; - case '>': - return mime_node::operands::greater_than; - case '=': - return mime_node::operands::equal; - case '!': - return mime_node::operands::not_equal; - case '&': - return mime_node::operands::bit_and; - case '|': - return mime_node::operands::bit_or; - case '^': - return mime_node::operands::bit_xor; - case 'x': - return mime_node::operands::bit_or; // x is any value - default: - return mime_node::operands::equal; +std::unique_ptr create(node_context context, std::string_view raw_type, string_view raw_value) { + std::unique_ptr result; + using namespace std::literals; + if (raw_type.find("string"s) == 0) { + return create_string(std::move(context), raw_type, raw_value); + } + + if (raw_type.find("date") != string_view::npos) { + return create_date(std::move(context), raw_type, raw_value); } + + if ( + raw_type.find("byte") != string_view::npos + || raw_type.find("short") != string_view::npos + || raw_type.find("long") != string_view::npos + ) { + return create_numeric(std::move(context), raw_type, raw_value); + } + + throw loading_error { + current_line, + "Unknown type"s, + " raw_type = {" + std::string(raw_type) + '}' + }; } std::vector split_by_columns(string_view line) { @@ -288,28 +420,45 @@ std::vector split_by_columns(string_view line) { std::vector columns; std::string_view::iterator left = line.begin(); std::string_view::iterator right = std::find_if(left, line.end(), isspace); + columns.emplace_back(&(*left), std::distance(left, right)); while (left != line.end() && columns.size() < 3) { - columns.emplace_back(left, std::distance(left, right)); - left = right; - char prev = ' '; - left = std::find_if(left, line.end(), [](char c) { return !isspace(c); }); + left = right + 1; + left = std::find_if( + left, + line.end(), + [](char c) { + return !isspace(static_cast(c)); + } + ); + if (left == line.end()) { + throw loading_error { + current_line, + "Invalid columns"s + }; + } + char prev {' '}; right = std::find_if(left, line.end(), [&prev](char c) { - if (isspace(c) && prev != '\\') { + if (isspace(static_cast(c)) && prev != '\\') { prev = c; return true; } prev = c; return false; }); + columns.emplace_back(&(*left), std::distance(left, right)); } + left = right; if (columns.size() != 3) { - throw std::runtime_error {"Syntax Error"}; // TODO(Pavel): Write reason + throw loading_error { + current_line, + "Missing columns"s + }; } if (left < line.end()) { - columns.emplace_back(left, std::distance(left, line.end())); + columns.emplace_back(&(*left), std::distance(left, line.end())); } else { columns.emplace_back(""sv); } @@ -317,6 +466,13 @@ std::vector split_by_columns(string_view line) { return columns; } +void replace_escapes(std::string& str) { + size_t pos {0}; + while ((pos = str.find('\\', pos)) != std::string::npos) { + str.replace(pos, 2, std::string {parse_escape(str[pos + 1])}); + } +} + size_t extract_level(string& line) { size_t level {0}; for (char c: line) { @@ -329,65 +485,80 @@ size_t extract_level(string& line) { return level; } -std::pair load_nodes(std::istream& in, size_t level) { +struct loading_result { + mime_list nodes {}; + bool status {false}; +}; + +loading_result load_nodes(std::istream& in, size_t level) { using namespace std::literals; + string line; std::getline(in, line); - mime_list current_level_nodes; - bool end_of_node = false; + loading_result result; do { ++current_line; - if (line.front() == '#' - || line.size() <= 1 + if (line.empty() + || line.front() == '#' || line.front() == '\n' - || line.front() == '\r') { + || line.front() == '\r' + ) { continue; } size_t current_level = extract_level(line); if (current_level > level) { - throw std::runtime_error { - "Syntax error: Invalid level\n" - "In line: "s + string(current_line) + throw loading_error { + current_line, + "Invalid level"s }; } if (current_level < level) { in.seekg(-static_cast(current_level + line.size() + 1), std::istream::cur); --current_line; if (current_level == 0) { - return {{}, true}; + return {std::move(mime_list {}), true}; } break; } auto columns {split_by_columns(line)}; if (columns.size() < 4) { - throw std::runtime_error { - "Syntax error: Invalid number of columns\n" - "In line: "s + string(current_line) + throw loading_error { + current_line, + "Invalid number of columns"s }; } - if (columns[3].back() == '\r') { - columns[3].remove_suffix(1); + std::string message {columns[3]}; + if (!message.empty()) { + replace_escapes(message); + if (message.back() == '\r') { + message.back() = ' '; + } else if (message.back() != ' ') { + message.push_back(' '); + } } auto [children, status] = load_nodes(in, current_level + 1); - end_of_node = status; - // Create a new node - current_level_nodes.emplace_back( - parse_raw_value(columns[0]), - parse_value(columns[1], columns[2]), - children, - parse_operand(columns[2]), - string {columns[3]} - ); - - } while (!end_of_node && std::getline(in, line)); - - return {current_level_nodes, end_of_node}; + result.status = status; + // Create a new node + result.nodes.emplace_back(std::move( + create( + { + static_cast(parse_raw_value(columns[0])), + message, + std::move(children) + }, + columns[1], + columns[2] + )) + ); + } while (!result.status && std::getline(in, line)); + + return result; } mime_list magic::load(std::istream& in) { @@ -400,14 +571,22 @@ mime_list magic::load(std::istream& in) { continue; } in.seekg(-static_cast(buffer.size() + 1), std::istream::cur); - // First is a result - // | - mime_list mimes = load_nodes(in, 0).first; + mime_list mimes {std::move(load_nodes(in, 0).nodes)}; + if (mimes.empty()) { continue; } - nodes.emplace_back(0, nullptr, mimes); + if (mimes.size() == 1) { + nodes.emplace_back(std::move(mimes.back())); + continue; + } + nodes.emplace_back(std::make_unique(0, "", std::move(mimes))); } - return nodes; + return std::move(nodes); +} + +mime_list magic::load(const string& filename) { + std::ifstream file {filename, std::ios::in | std::ios::binary}; + return std::move(load(file)); } diff --git a/src/loader/mime_loader.h b/src/loader/mime_loader.h index 13cf659..25f4682 100644 --- a/src/loader/mime_loader.h +++ b/src/loader/mime_loader.h @@ -3,12 +3,14 @@ #include -#include "node/mime_node.h" +#include "nodes/basic_mime_node.h" namespace magic { mime_list load(std::istream& in); + mime_list load(const std::string& filename); + } // magic #endif //LOADER_H diff --git a/src/mime_magicConfig.cmake.in b/src/mime_magicConfig.cmake.in new file mode 100644 index 0000000..21c8349 --- /dev/null +++ b/src/mime_magicConfig.cmake.in @@ -0,0 +1,13 @@ +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) + +include("${CMAKE_CURRENT_LIST_DIR}/mime_magicTargets.cmake") + +set(mime_magic_INCLUDE_DIRS "${CMAKE_CURRENT_LIST_DIR}/../../../include" CACHE PATH "Path to mime_magic include directory" FORCE) + +include_directories( + ${mime_magic_INCLUDE_DIRS} + ${mime_magic_INCLUDE_DIRS}/loader + ${mime_magic_INCLUDE_DIRS}/nodes +) diff --git a/src/node/mime_data.h b/src/node/mime_data.h deleted file mode 100644 index 8a11d25..0000000 --- a/src/node/mime_data.h +++ /dev/null @@ -1,115 +0,0 @@ -#ifndef MIME_DATA_H -#define MIME_DATA_H - -#include -#include - -namespace magic { - template - class mime_data; - - template - Type like(Type data, mime_data src); - - template - class mime_data { - public: - - static Type le(Type value) { - return value; - } - - static Type be(Type value) { - static_assert(std::numeric_limits::is_integer); - union { - Type val; - uint8_t arr[sizeof(Type)]; - } result; - result.val = 0; - - auto value_ptr {reinterpret_cast(&value)}; - for (size_t i {0}; i < sizeof(Type); ++i) - result.arr[sizeof(Type) - 1 - i] = *(value_ptr++); - return result.val; - } - - mime_data(Type val, std::function endian = le) - : value_(val), - endian_(endian) { - mask_ = ~0; - } - - mime_data(Type val, Type mask, std::function endian = le) - : value_(val), - mask_(mask), - endian_(endian) { - } - - bool operator<(Type other) const { - return (endian_(value_) & mask_) < other; - } - - bool operator>(Type other) const { - return (endian_(value_) & mask_) > other; - } - - bool operator==(Type other) const { - return (endian_(value_) & mask_) == other; - } - - bool operator!=(Type other) const { - auto res = endian_(value_); - auto rres = res & mask_; - return (endian_(value_) & mask_) != other; - } - - bool operator<=(Type other) const { - return (endian_(value_) & mask_) <= other; - } - - bool operator>=(Type other) const { - return (endian_(value_) & mask_) <= other; - } - - bool operator&(Type other) const { - return (endian_(value_) & mask_) & other; - } - - bool operator|(Type other) const { - return (endian_(value_) & mask_) | other; - } - - bool operator^(Type other) const { - return (endian_(value_) & mask_) ^ other; - } - - operator Type() const { - return endian_(value_) & mask_; - } - - [[nodiscard]] size_t size() const { - return sizeof(Type); - } - - private: - Type value_ {}; - Type mask_ {}; - - std::function endian_; - - template - friend T like(T, mime_data); - }; - - template - Type like(Type data, mime_data src) { - return src.endian_(data); - } - - template - Type like(Type data, std::function endian) { - return endian(data); - } -} - -#endif //MIME_DATA_H diff --git a/src/node/mime_node.cpp b/src/node/mime_node.cpp deleted file mode 100644 index a0a4de2..0000000 --- a/src/node/mime_node.cpp +++ /dev/null @@ -1,167 +0,0 @@ -#include "mime_node.h" - -#include -#include - -using namespace magic; - -namespace { - template - T convert_raw(const void *ptr) { - static_assert(std::is_trivially_copyable_v == true); - T val; - std::memcpy(&val, ptr, sizeof(T)); - return val; - } - - template - void is_enough_data(size_t size, T data, bool& res) { - res = size >= data.size(); - } - - void is_enough_data(size_t size, std::nullptr_t, bool& res) { - res = true; - } - - bool is_enough_data(size_t size, const mime_node::value& node) { - bool result; - std::visit([&](const auto& val) { - is_enough_data(size, val, result); - }, node); - return result; - } -} - -mime_node::mime_node( - size_t offset, - value val, - const mime_list& children, - operands operand, - mime_string message -) - : variant(std::move(val)), - offset_(offset), - operand_(operand), - message_(std::move(message)), - children_(children) { -// if (std::holds_alternative(*this) && (operand_ != operands::equal && operand_ != operands::not_equal)) { -// throw std::invalid_argument("Invalid operand for string"); -// } -} - -class mime_node_bool_processor { -public: - mime_node_bool_processor(const char *data, size_t size, mime_node::operands operand, bool& result) - : data_(data), - size_(size), - operand_(operand), - result_(result) - { - } - - void operator()(std::nullptr_t) { - result_ = true; - } - - template - void operator()(mime_data value) { - Value tmp = convert_raw(data_); - switch (operand_) { - case mime_node::operands::equal: { - result_ = value == tmp; - break; - } - case mime_node::operands::not_equal: { - result_ = value != tmp; - break; - } - case mime_node::operands::less_than: { - result_ = value > tmp; - break; - } - case mime_node::operands::greater_than: { - result_ = value < tmp; - break; - } - case mime_node::operands::bit_and: { - result_ = value & tmp; - break; - } - case mime_node::operands::bit_or: { - result_ = value | tmp; - break; - } - case mime_node::operands::bit_xor: { - result_ = value ^ tmp; - break; - } - default: { - throw std::runtime_error(std::string("Invalid operand ") + std::to_string(static_cast(operand_))); - } - } - } - - void operator()(const mime_string& value) { - mime_string tmp(data_, value.size()); - switch (operand_) { - case mime_node::operands::equal: { - result_ = value == tmp; - break; - } - case mime_node::operands::not_equal: { - result_ = value != tmp; - break; - } - default: { - throw std::runtime_error(std::string("Invalid operand ") + std::to_string(static_cast(operand_))); - } - } - } - -private: - const char *data_; - size_t size_; - mime_node::operands operand_; - bool& result_; -}; - -bool mime_node::process_data(const char *data, size_t size) const { - if (!is_enough_data(size - offset_, *this)) { - return false; - } - - bool result; - - std::visit(mime_node_bool_processor(data + offset_, size - offset_, operand_, result), static_cast(*this)); - - if (result == false) { - return result; - } - -#ifdef ProcDebug - std::visit( - [](const auto& val) { - std::cout << "value = { " << std::hex << val << " }, "; - }, - static_cast(*this) - ); - - std::cout << "branch = { " << message_ << " }, result = { " << result << " }" << std::endl; -// std::cout << ", data = '" << std::string(data + offset_, size - offset_) << "'" << std::endl; - -#endif - - if (children_.empty()) { - return result; - } - - bool handler_result = false; - for (const auto& node : children_) { - handler_result |= node.process_data(data, size); - } - - result &= handler_result; - // Make switch statement for operands - return result; -} - diff --git a/src/node/mime_node.h b/src/node/mime_node.h deleted file mode 100644 index 9286cca..0000000 --- a/src/node/mime_node.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef MIME_NODE_H -#define MIME_NODE_H - -#include - -#include -#include -#include -#include -#include - -#include "mime_data.h" - -namespace magic { - class mime_node; - - using mime_string = std::string; - using mime_list = std::list; - - class mime_node final - : private std::variant< - std::nullptr_t, - mime_data, - mime_data, - mime_data, - mime_string - > { - public: - using variant::variant; - using value = variant; - - enum class operands { - equal, // = - not_equal, // ! - less_than, // < - greater_than, // > - bit_and, // & - only for number - bit_or, // | - only for number - bit_xor, // ^ - only for number - any, // x - any value - case_sensitive_equal // ~ - only for string - }; - - mime_node() = delete; - - mime_node( - size_t offset, - value val, - const mime_list& children = {}, - operands operand = operands::equal, - std::string message = "" - ); - - mime_node(const mime_node& other) = default; - - mime_node(mime_node&& other) = default; - - bool process_data(const char *data, size_t size) const; - - private: - size_t offset_ {0}; - operands operand_ {operands::equal}; - std::string message_ {}; - mime_list children_ {}; - }; -} // magic - -#endif //MIME_NODE_H diff --git a/src/nodes/basic_mime_node.cpp b/src/nodes/basic_mime_node.cpp new file mode 100644 index 0000000..8056ee1 --- /dev/null +++ b/src/nodes/basic_mime_node.cpp @@ -0,0 +1,56 @@ +#include "basic_mime_node.h" + +using namespace magic; + +basic_mime_node::basic_mime_node(size_t offset, std::string message, mime_list children) + : offset_ {offset}, + message_ {std::move(message)}, + children_ {std::move(children)} +{ +} + +basic_mime_node::response_t basic_mime_node::process_data(const char *data, size_t size) { + if (offset_ > size || !is_enough_data(size - offset_)) { + return std::nullopt; + } + + response_t result {process_current(data + offset_, size - offset_)}; + + if (!result.has_value()) { + return result; + } + + if (children_.empty()) { + return result; + } + + response_t handler_result; + for (const auto& node: children_) { + response_t node_result = node->process_data(data, size); + + if (node_result.has_value()) { + if (!handler_result.has_value()) { + handler_result = node_result.value(); + continue; + } + handler_result.value() += node_result.value(); + } + } + + if (!handler_result.has_value()) { + return std::nullopt; + } + + result.value() += handler_result.value(); + + return result; +} + +bool basic_mime_node::is_enough_data(size_t) { + return true; +} + +basic_mime_node::response_t basic_mime_node::process_current(const char *, size_t) { + return message_; +} + diff --git a/src/nodes/basic_mime_node.h b/src/nodes/basic_mime_node.h new file mode 100644 index 0000000..f6cada0 --- /dev/null +++ b/src/nodes/basic_mime_node.h @@ -0,0 +1,50 @@ +#ifndef _MIME_MAGIC_BASIC_MIME_NODE_H_ +#define _MIME_MAGIC_BASIC_MIME_NODE_H_ + +#include +#include +#include +#include +#include + +namespace magic { + + class basic_mime_node; + + using mime_string = std::string; + using mime_list = std::list>; + + class basic_mime_node { + public: + using response_t = std::optional; + + basic_mime_node() = delete; + + basic_mime_node(size_t offset, std::string message, mime_list children); + + basic_mime_node(basic_mime_node&&) noexcept = default; + + basic_mime_node(const basic_mime_node&) = delete; + + response_t process_data(const char *data, size_t size); + + virtual ~basic_mime_node() = default; + + protected: + virtual bool is_enough_data(size_t); + + virtual response_t process_current(const char *, size_t); + + private: + size_t offset_ {0}; + protected: + std::string message_ {}; + private: + mime_list children_ {}; + + + }; + +} + +#endif \ No newline at end of file diff --git a/src/nodes/common.h b/src/nodes/common.h new file mode 100644 index 0000000..03f0b17 --- /dev/null +++ b/src/nodes/common.h @@ -0,0 +1,22 @@ +#ifndef _MIME_MAGIC_COMMON_H_ +#define _MIME_MAGIC_COMMON_H_ + +#include + +#include "basic_mime_node.h" + +namespace magic { + + enum class operands { + any, + equal, + not_equal, + less_than, + greater_than, + bit_and, + bit_or, + bit_xor + }; + +} +#endif //_MIME_MAGIC_COMMON_H_ diff --git a/src/nodes/date_node/date_node.cpp b/src/nodes/date_node/date_node.cpp new file mode 100644 index 0000000..0d62cfa --- /dev/null +++ b/src/nodes/date_node/date_node.cpp @@ -0,0 +1,33 @@ +#include +#include "date_node.h" + + +using namespace magic; + +bool date_node::is_enough_data(size_t size) { + return sizeof(value_) < size; +} + +basic_mime_node::response_t date_node::process_current(const char *data, size_t) { + time_t tmp = normalize_byte_order_(utils::convert_raw(data)); + + response_t response; + std::string result = utils::format(message_, std::string {std::ctime(&tmp)}); + switch (operand_) { + case operands::any: return response_t { result }; + case operands::equal: return tmp == value_ ? response_t { result } : std::nullopt; + case operands::not_equal: return tmp != value_ ? response_t { result } : std::nullopt; + case operands::less_than: return tmp < value_ ? response_t { result } : std::nullopt; + case operands::greater_than: return tmp > value_ ? response_t { result } : std::nullopt; + default: + throw std::invalid_argument("Unknown operand"); + } + return std::nullopt; +} + +date_node::date_node(size_t offset, const date_node::data_template& data, std::string message, mime_list children) + : basic_mime_node {offset, std::move(message), std::move(children)}, + value_ {data.value}, + normalize_byte_order_ {data.normalize_byte_order}, + operand_ {data.operand} { +} diff --git a/src/nodes/date_node/date_node.h b/src/nodes/date_node/date_node.h new file mode 100644 index 0000000..2982f53 --- /dev/null +++ b/src/nodes/date_node/date_node.h @@ -0,0 +1,41 @@ +#ifndef _MIME_MAGIC_DATE_NODE_H_ +#define _MIME_MAGIC_DATE_NODE_H_ + +#include "nodes/basic_mime_node.h" +#include "nodes/utils.h" +#include "nodes/common.h" + +#include + +namespace magic { + + class date_node final : public basic_mime_node { + public: + + struct data_template { + time_t value {}; + std::function normalize_byte_order; + operands operand {operands::equal}; + }; + + explicit date_node(size_t offset, const data_template& data, std::string message, mime_list children); + + date_node(date_node&&) noexcept = default; + + ~date_node() override = default; + + private: + + time_t value_ {}; + std::function normalize_byte_order_{}; + operands operand_ {}; + + bool is_enough_data(size_t size) override; + + response_t process_current(const char *data, size_t size) override; + + }; + +} // magic + +#endif //_MIME_MAGIC_DATE_NODE_H_ diff --git a/src/nodes/numeric_node/numeric_node.cpp b/src/nodes/numeric_node/numeric_node.cpp new file mode 100644 index 0000000..090abfc --- /dev/null +++ b/src/nodes/numeric_node/numeric_node.cpp @@ -0,0 +1,96 @@ +#include "numeric_node.h" + +using namespace magic; + +numeric_node::numeric_node(size_t offset, const data_template& data, std::string message, mime_list children) + : basic_mime_node {offset, std::move(message), std::move(children)}, + value_ {data.value}, + mask_ {data.mask}, + operand_ {data.operand}, + normalize_byte_order_ {data.normalize_byte_order} +{ +} + +bool numeric_node::is_enough_data(size_t size) { + size_t size_of_type; + std::visit( + [&](auto t) { + size_of_type = sizeof(t); + }, + value_ + ); + return size_of_type < size; +} + +basic_mime_node::response_t numeric_node::process_current(const char *data, size_t) { + type tmp = value_; + std::visit( + [&](auto& value) { + extract_value(value, data); + }, + tmp + ); + + + std::string result; + // TODO(pavel-cpp): It may be necessary to do something different for bit operations + std::visit( + [&](auto value) { + result = utils::format(message_, value); + if (std::holds_alternative(value_) && std::get(value_) == 6) { + throw std::exception(); + } + std::string tmp = utils::format(message_, value); + }, + tmp + ); + + response_t response; + std::visit( + [&](auto val) { + switch (operand_) { + case operands::any: + response = std::make_optional(result); + break; + case operands::equal: + response = (val & std::get(mask_)) == std::get(value_) + ? std::make_optional(result) + : std::nullopt; + break; + case operands::not_equal: + response = (val & std::get(mask_)) != std::get(value_) + ? std::make_optional(result) + : std::nullopt; + break; + case operands::less_than: + response = (val & std::get(mask_)) < std::get(value_) + ? std::make_optional(result) + : std::nullopt; + break; + case operands::greater_than: + response = (val & std::get(mask_)) > std::get(value_) + ? std::make_optional(result) + : std::nullopt; + break; + case operands::bit_and: + response = (val & std::get(mask_)) & std::get(value_) + ? std::make_optional(result) + : std::nullopt; + break; + case operands::bit_or: + response = (val & std::get(mask_)) | std::get(value_) + ? std::make_optional(result) + : std::nullopt; + break; + case operands::bit_xor: + response = (val & std::get(mask_)) ^ std::get(value_) + ? std::make_optional(result) + : std::nullopt; + break; + } + }, + tmp + ); + + return response; +} diff --git a/src/nodes/numeric_node/numeric_node.h b/src/nodes/numeric_node/numeric_node.h new file mode 100644 index 0000000..f7ff6a6 --- /dev/null +++ b/src/nodes/numeric_node/numeric_node.h @@ -0,0 +1,68 @@ +#ifndef _MIME_MAGIC_NUMERIC_NODE_H_ +#define _MIME_MAGIC_NUMERIC_NODE_H_ + +#include +#include "nodes/basic_mime_node.h" +#include "nodes/utils.h" +#include "nodes/common.h" + +#include + + +namespace magic { + + + class numeric_node final : public basic_mime_node { + public: + + using type = std::variant< + uint8_t, + int16_t, + uint16_t, + int32_t, + uint32_t + >; + + struct data_template { + type value {}; + type mask {~0}; + operands operand {operands::equal}; + std::function normalize_byte_order; + }; + + explicit numeric_node( + size_t offset, + const data_template& data, + std::string message, + mime_list children + ); + + numeric_node(numeric_node&&) noexcept = default; + + ~numeric_node() override = default; + + private: + + type value_ {}; + type mask_ {~0}; + operands operand_ {operands::equal}; + std::function normalize_byte_order_; + + private: + + bool is_enough_data(size_t size) override; + + template + void extract_value(Type& dst, const char *data) { + std::string tmp {data, sizeof(Type)}; + normalize_byte_order_(tmp.data(), tmp.size()); + dst = utils::convert_raw(tmp.data()); + } + + response_t process_current(const char *data, size_t) override; + + }; + +} // magic + +#endif //_MIME_MAGIC_NUMERIC_NODE_H_ diff --git a/src/nodes/string_node/string_node.cpp b/src/nodes/string_node/string_node.cpp new file mode 100644 index 0000000..df2e70c --- /dev/null +++ b/src/nodes/string_node/string_node.cpp @@ -0,0 +1,50 @@ +#include "string_node.h" + +#include "nodes/utils.h" + +#include + +using namespace magic; + +void tolower(std::string& str) { + for (char& c: str) { + c = tolower(c); + } +} + +basic_mime_node::response_t string_node::process_current(const char *data, size_t size) { + using namespace std::literals; + if (!value_.empty() && value_.size() < size && value_ != "\0"s) { + size = value_.size(); + } + std::string temp {data, size}; + if (opt_ == options::not_case_sensitive) { + tolower(temp); + tolower(value_); + } + std::string result {utils::format(message_, std::string(data, size))}; + switch (operand_) { + case operands::any: return result; + case operands::equal: return temp == value_ ? response_t {message_} : std::nullopt; + case operands::less_than: return temp < value_ ? response_t {message_} : std::nullopt; + case operands::greater_than: return temp > value_ ? response_t {message_} : std::nullopt; + default: + throw std::invalid_argument("Unknown operand"); + } + return std::nullopt; +} + +bool string_node::is_enough_data(size_t size) { + return value_.size() < size; +} + +string_node::string_node( + size_t offset, + const string_node::data_template& data, + std::string message, + mime_list children + ) : basic_mime_node {offset, std::move(message), std::move(children)}, + value_ {data.value}, + opt_ {data.opt}, + operand_ {data.operand} + {} diff --git a/src/nodes/string_node/string_node.h b/src/nodes/string_node/string_node.h new file mode 100644 index 0000000..8acc438 --- /dev/null +++ b/src/nodes/string_node/string_node.h @@ -0,0 +1,42 @@ +#ifndef _MIME_MAGIC_STRING_NODE_H_ +#define _MIME_MAGIC_STRING_NODE_H_ + +#include "nodes/basic_mime_node.h" +#include "nodes/common.h" + +namespace magic { + class string_node final : public basic_mime_node { + public: + + enum options { + none, + not_case_sensitive + }; + + struct data_template { + std::string value {}; + options opt {options::none}; + operands operand {operands::equal}; + }; + + explicit string_node(size_t offset, const data_template& data, std::string message, mime_list children); + + string_node(string_node&&) noexcept = default; + + ~string_node() override = default; + + private: + + bool is_enough_data(size_t size) override; + + response_t process_current(const char *data, size_t size) override; + + std::string value_ {}; + options opt_ {options::none}; + operands operand_ {operands::equal}; + + }; + +} + +#endif //_MIME_MAGIC_STRING_NODE_H_ diff --git a/src/nodes/utils.h b/src/nodes/utils.h new file mode 100644 index 0000000..5748d31 --- /dev/null +++ b/src/nodes/utils.h @@ -0,0 +1,64 @@ +#ifndef _MIME_MAGIC_UTILS_H_ +#define _MIME_MAGIC_UTILS_H_ + +#include +#include +#include +#include +#include +#include +#include + +namespace magic::utils { + + inline std::string format(const std::string& format_s, const std::string& str) { + std::string out; + out.resize(format_s.size() + str.size()); + snprintf(out.data(), out.size(), format_s.c_str(), str.c_str()); + return {out.c_str()}; + } + + template + inline std::string format(const std::string& format_s, Type value) { + std::string out; + out.resize(format_s.size() + 20); + snprintf(out.data(), out.size(), format_s.c_str(), value); + return {out.c_str()}; + } + + template + T convert_raw(const void *ptr) { + static_assert(std::is_trivially_copyable_v == true); + T val; + std::memcpy(&val, ptr, sizeof(T)); + return val; + } + + template + inline Type change_order(Type value) { + static_assert(std::numeric_limits::is_integer); + union { + Type val; + uint8_t arr[sizeof(Type)]; + } result; + result.val = 0; + + auto value_ptr {reinterpret_cast(&value)}; + for (size_t i {0}; i < sizeof(Type); ++i) + result.arr[sizeof(Type) - 1 - i] = *(value_ptr++); + return result.val; + } + + inline void change_raw_order(char *data, size_t size) { + assert(size % 2 == 0); + + for (size_t i = 0; i < size / 2; ++i) { + char temp = data[i]; + data[i] = data[size - 1 - i]; + data[size - 1 - i] = temp; + } + } + +} + +#endif //_MIME_MAGIC_UTILS_H_ diff --git a/test/main.cpp b/test/main.cpp deleted file mode 100644 index 1929ca5..0000000 --- a/test/main.cpp +++ /dev/null @@ -1,52 +0,0 @@ -#include -#include - -#include - -#include "../src/loader/mime_loader.h" -#include "../src/node/mime_node.h" - -int main() { - using namespace std; - boolalpha(cout); - system("chcp 1251"); - - ifstream file("C:\\Sophus-NEW\\modules\\files.etl", ios::in | ios::binary); -// ifstream file("magic", ios::in | ios::binary); - auto nodes = magic::load(file); - - cout << nodes.size() << " node workers SUCCESSFULLY LOADED!" << endl; - system("pause"); - -// string str {"HelloWorld"}; -// -// vector data {'H', 'e', 'l', 'l', 'o', 'W', 'o', 'r', 'l', 'd', static_cast(0xFF), static_cast(0xFF)}; - - vector data; - data.resize(29); - - ifstream png("image.png", ios::in | ios::binary); - png.read(data.data(), data.size()); - - cout << "PNG" << endl; - cout << std::string(80, '=') << endl << endl; - int i = 1; - for (const auto& node: nodes) { - cout << dec << i++ << hex << ")\n" << node.process_data(data.data(), data.size()) << endl; - cout << endl << std::string(80, '=') << endl << endl; - } - - ifstream jpeg("jpeg-home.jpg", ios::in | ios::binary); - jpeg.read(data.data(), data.size()); - - cout << "JPEG" << endl; - cout << std::string(80, '=') << endl << endl; - i = 1; - for (const auto& node: nodes) { - cout << dec << i++ << hex << ")\n" << node.process_data(data.data(), data.size()) << endl; - cout << endl << std::string(80, '=') << endl << endl; - } - - - return 0; -} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..0e4309c --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 3.23) +project(tests LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) + +set(CMAKE_PREFIX_PATH ../mime_magic/lib/cmake/mime_magic) + +find_package(mime_magic REQUIRED) + +add_custom_target(copy_tests + COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_LIST_DIR}/test-data ${CMAKE_BINARY_DIR}/test-data + COMMENT "Copying tests" +) + +add_executable(${PROJECT_NAME} tests.cpp) + +add_dependencies(${PROJECT_NAME} copy_tests) + +target_link_libraries(${PROJECT_NAME} mime_magic::mime_magic_static) \ No newline at end of file diff --git a/tests/ext/libtest.h b/tests/ext/libtest.h new file mode 100644 index 0000000..ccd8357 --- /dev/null +++ b/tests/ext/libtest.h @@ -0,0 +1,60 @@ +#ifndef LIBTEST_H +#define LIBTEST_H + +#include + +using std::string; +using std::cerr; +using std::cout; +using std::endl; +using std::boolalpha; +using namespace std::literals; + +template +void AssertEqualImpl(const T& t, const U& u, const string& t_str, const string& u_str, const string& file, + const string& func, unsigned line, const string& hint) { + if (t != u) { + cerr << boolalpha; + cerr << file << "("s << line << "): "s << func << ": "s; + cerr << "ASSERT_EQUAL("s << t_str << ", "s << u_str << ") failed: "s; + cerr << t << " != "s << u << "."s; + if (!hint.empty()) { + cout << " Hint: "s << hint; + } + cerr << endl; + abort(); + } +} + +#define ASSERT_EQUAL(a, b) AssertEqualImpl((a), (b), #a, #b, __FILE__, __FUNCTION__, __LINE__, ""s) + +#define ASSERT_EQUAL_HINT(a, b, hint) AssertEqualImpl((a), (b), #a, #b, __FILE__, __FUNCTION__, __LINE__, (hint)) + +void AssertImpl(bool expr, const string& s_expr, const string& file, const string& f_name, unsigned line, const string& hint) { + if(!expr){ + cerr << boolalpha; + cerr << file << "("s << line << "): "s << f_name << ": "s; + cerr << "ASSERT_EQUAL(" << s_expr << ") failed! "s; + if (!hint.empty()) { + cout << " Hint: "s << hint; + } + cerr << endl; + abort(); + } +} + + +#define ASSERT(expr) AssertImpl((expr), #expr, __FILE__, __FUNCTION__, __LINE__, ""s) + +#define ASSERT_HINT(expr, hint) AssertImpl((expr), #expr, __FILE__, __FUNCTION__, __LINE__, (hint)) + + +template +void RunTestImpl(const TestFunc& func, const string& test_name) { + func(); + cerr << test_name << " OK"s << endl; +} + +#define RUN_TEST(func) RunTestImpl(func, #func) + +#endif //LIBTEST_H diff --git a/tests/test-data/correct/correct-png-file.etl b/tests/test-data/correct/correct-png-file.etl new file mode 100644 index 0000000..076b98a --- /dev/null +++ b/tests/test-data/correct/correct-png-file.etl @@ -0,0 +1,16 @@ +# Png images +0 string \x89PNG %s +>4 belong !0x0d0a1a0a %ld +>4 belong 01502415012 %ld +>>16 belong x %ld +>>20 belong x %ld +>>24 byte x %d +>>25 byte 0 %d +>>25 byte 2 %d +>>25 byte 3 %d +>>25 byte 4 %d +>>25 byte 6 %d +#>>26 byte 0 %d should not be parsed +>>28 byte 0 %d +>>28 byte 1 %d +1 string PNG %s \ No newline at end of file diff --git a/tests/test-data/correct/hard-correct-png-file.etl b/tests/test-data/correct/hard-correct-png-file.etl new file mode 100644 index 0000000..482c6be --- /dev/null +++ b/tests/test-data/correct/hard-correct-png-file.etl @@ -0,0 +1,83 @@ +# Png images +0 string \x89PNG %s + + + + + +>4 belong !0x0d0a1a0a %ld + + + + +>4 belong 0x0d0a1a0a %ld + + + +>>(16) belong x %ld + + + +>>(10+10) belong x %ld + + + + +>>(28-4) byte x %d + + + + +>>25 byte 0 %d + + + + + +>>25 byte 2 %d + + + + + +>>25 byte 3 %d + + + + + +>>25 byte 4 %d + + + + + +>>25 byte (3+3) %d + + + + + +#>>26 byte 0 %d should not be parsed + + + + + +>>28 byte 0 %d + + + + + +>>28 byte 1 %d + + + + + +1 string PNG %s + + + + diff --git a/tests/test-data/incorrect/byte-type-error-file.etl b/tests/test-data/incorrect/byte-type-error-file.etl new file mode 100644 index 0000000..03adac3 --- /dev/null +++ b/tests/test-data/incorrect/byte-type-error-file.etl @@ -0,0 +1,16 @@ +# Png images +0 string \x89PNG %s +>4 belong !0x0d0a1a0a %ld +>4 belong 0x0d0a1a0a %ld +>>16 belong x %ld +>>20 belong x %ld +>>24 byte x %d +>>25 byte 0 %d +>>25 byte 2 %d +>>25 byte 3 %d +>>25 byte 4 %d +>>25 byte 6 %d +#>>26 byte 0 %d should not be parsed +>>28 byte 0 %d +>>28 byte 1 %d +1 meowstring PNG %s \ No newline at end of file diff --git a/tests/test-data/incorrect/level-error-file.etl b/tests/test-data/incorrect/level-error-file.etl new file mode 100644 index 0000000..0f8cc3e --- /dev/null +++ b/tests/test-data/incorrect/level-error-file.etl @@ -0,0 +1,16 @@ +# Png images +0 string \x89PNG %s +>4 belong !0x0d0a1a0a %ld +>4 belong 0x0d0a1a0a %ld +>>16 belong x %ld +>>20 belong x %ld +>>24 byte x %d +25 byte 0 %d +>>25 byte 2 %d +>>25 byte 3 %d +>>25 byte 4 %d +>>25 byte 6 %d +#>>26 byte 0 %d should not be parsed +>>28 byte 0 %d +>>28 byte 1 %d +1 string PNG %s \ No newline at end of file diff --git a/tests/test-data/incorrect/string-type-error-file.etl b/tests/test-data/incorrect/string-type-error-file.etl new file mode 100644 index 0000000..2378ac6 --- /dev/null +++ b/tests/test-data/incorrect/string-type-error-file.etl @@ -0,0 +1,16 @@ +# Png images +0 string \x89PNG %s +>4 belong !0x0d0a1a0a %ld +>4 belong 0x0d0a1a0a %ld +>>16 belong x %ld +>>20 belong x %ld +>>24 byte x %d +>>25 meowbyte 0 %d +>>25 byte 2 %d +>>25 byte 3 %d +>>25 byte 4 %d +>>25 byte 6 %d +#>>26 byte 0 %d should not be parsed +>>28 byte 0 %d +>>28 byte 1 %d +1 string PNG %s \ No newline at end of file diff --git a/tests/test-data/incorrect/value-error.etl b/tests/test-data/incorrect/value-error.etl new file mode 100644 index 0000000..bcc4050 --- /dev/null +++ b/tests/test-data/incorrect/value-error.etl @@ -0,0 +1,16 @@ +# Png images +0 string \x89PNG %s +>4 belong !0x0d0a1a0a %ld +>4 belong 0x0d0a1a0a %ld +>>16 belong x %ld +>>20 belong x %ld +>>24 byte x %d +>>25 byte meow %d +>>25 byte 2 %d +>>25 byte 3 %d +>>25 byte 4 %d +>>25 byte 6 %d +#>>26 byte 0 %d should not be parsed +>>28 byte 0 %d +>>28 byte 1 %d +1 string PNG %s \ No newline at end of file diff --git a/tests/test-data/png/corrupted-image.png b/tests/test-data/png/corrupted-image.png new file mode 100644 index 0000000..ec67b41 Binary files /dev/null and b/tests/test-data/png/corrupted-image.png differ diff --git a/tests/test-data/png/image.png b/tests/test-data/png/image.png new file mode 100644 index 0000000..f6505a3 Binary files /dev/null and b/tests/test-data/png/image.png differ diff --git a/tests/tests.cpp b/tests/tests.cpp new file mode 100644 index 0000000..8e0ef9b --- /dev/null +++ b/tests/tests.cpp @@ -0,0 +1,98 @@ +#include +#include +#include + +#include +#include + +#include "ext/libtest.h" + +using namespace std; + +void test_loading_correct_data() { + { + magic::mime_list nodes = magic::load("test-data/correct/correct-png-file.etl"); + ASSERT_EQUAL_HINT(nodes.size(), 2, "Invalid number of nodes"); + } + { + magic::mime_list nodes = magic::load("test-data/correct/hard-correct-png-file.etl"); + ASSERT_EQUAL_HINT(nodes.size(), 2, "Invalid number of nodes"); + } +} + +void test_loading_incorrect_data() { + { + try { + magic::mime_list nodes = magic::load("test-data/incorrect/byte-type-error-file.etl"); + ASSERT_HINT(false, "Byte type error file loaded"); + } catch (magic::loading_error& e) { + }catch (std::invalid_argument& e) { + } + } + { + try { + magic::mime_list nodes = magic::load("test-data/incorrect/string-type-error-file.etl"); + ASSERT_HINT(false, "String type error file loaded"); + } catch (std::invalid_argument& e) { + } catch (magic::loading_error& e) {} + } + { + try { + magic::mime_list nodes = magic::load("test-data/incorrect/level-error-file.etl"); + ASSERT_HINT(false, "Level error file loaded"); + } catch (magic::loading_error& e) {} + } + { + try { + magic::mime_list nodes = magic::load("test-data/incorrect/value-error.etl"); + ASSERT_HINT(false, "Value error file loaded"); + } catch (magic::loading_error& e) {} + } +} + +void test_nodes_with_correct_data() { + magic::mime_list nodes = magic::load("test-data/correct/correct-png-file.etl"); + ifstream correct_png("test-data/png/image.png", ios::binary); + std::vector image_header; + image_header.resize(30); + correct_png.read(image_header.data(), image_header.size()); + auto first_node = nodes.begin(); + { + const auto result = (*first_node)->process_data(image_header.data(), image_header.size()); + ASSERT_EQUAL_HINT(result.has_value(), false, "Invalid result of processing data"); + } + { + const auto result = (*std::next(first_node))->process_data(image_header.data(), image_header.size()); + ASSERT_EQUAL_HINT(result.has_value(), false, "Invalid result of processing data"); + } +} + +void test_nodes_with_incorrect_data() { + magic::mime_list nodes = magic::load("test-data/correct/correct-png-file.etl"); + ifstream correct_png("test-data/png/corrupted-image.png", ios::binary); + std::vector image_header; + image_header.resize(30); + correct_png.read(image_header.data(), image_header.size()); + auto first_node = nodes.begin(); + { + const auto result = (*first_node)->process_data(image_header.data(), image_header.size()); + ASSERT_EQUAL_HINT(result.has_value(), true, "Invalid result of processing data"); + } + { + const auto result = (*std::next(first_node))->process_data(image_header.data(), image_header.size()); + ASSERT_EQUAL_HINT(result.has_value(), false, "Invalid result of processing data"); + } +} + +void test_all() { + RUN_TEST(test_loading_correct_data); + RUN_TEST(test_loading_incorrect_data); + RUN_TEST(test_loading_correct_data); + RUN_TEST(test_loading_incorrect_data); +} + +int main() { + test_all(); + cerr << "All tests passed" << endl; + return 0; +} \ No newline at end of file