diff options
Diffstat (limited to 'modules/fbx/fbx_parser/FBXBinaryTokenizer.cpp')
-rw-r--r-- | modules/fbx/fbx_parser/FBXBinaryTokenizer.cpp | 488 |
1 files changed, 0 insertions, 488 deletions
diff --git a/modules/fbx/fbx_parser/FBXBinaryTokenizer.cpp b/modules/fbx/fbx_parser/FBXBinaryTokenizer.cpp deleted file mode 100644 index 1eee10b251..0000000000 --- a/modules/fbx/fbx_parser/FBXBinaryTokenizer.cpp +++ /dev/null @@ -1,488 +0,0 @@ -/*************************************************************************/ -/* FBXBinaryTokenizer.cpp */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ - -/* -Open Asset Import Library (assimp) ----------------------------------------------------------------------- - -Copyright (c) 2006-2019, assimp team - - -All rights reserved. - -Redistribution and use of this software in source and binary forms, -with or without modification, are permitted provided that the -following conditions are met: - -* Redistributions of source code must retain the above - copyright notice, this list of conditions and the - following disclaimer. - -* Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the - following disclaimer in the documentation and/or other - materials provided with the distribution. - -* Neither the name of the assimp team, nor the names of its - contributors may be used to endorse or promote products - derived from this software without specific prior - written permission of the assimp team. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - ----------------------------------------------------------------------- -*/ -/** @file FBXBinaryTokenizer.cpp - * @brief Implementation of a fake lexer for binary fbx files - - * we emit tokens so the parser needs almost no special handling - * for binary files. - */ - -#include "ByteSwapper.h" -#include "FBXTokenizer.h" -#include "core/string/print_string.h" - -#include <stdint.h> - -namespace FBXDocParser { -//enum Flag -//{ -// e_unknown_0 = 1 << 0, -// e_unknown_1 = 1 << 1, -// e_unknown_2 = 1 << 2, -// e_unknown_3 = 1 << 3, -// e_unknown_4 = 1 << 4, -// e_unknown_5 = 1 << 5, -// e_unknown_6 = 1 << 6, -// e_unknown_7 = 1 << 7, -// e_unknown_8 = 1 << 8, -// e_unknown_9 = 1 << 9, -// e_unknown_10 = 1 << 10, -// e_unknown_11 = 1 << 11, -// e_unknown_12 = 1 << 12, -// e_unknown_13 = 1 << 13, -// e_unknown_14 = 1 << 14, -// e_unknown_15 = 1 << 15, -// e_unknown_16 = 1 << 16, -// e_unknown_17 = 1 << 17, -// e_unknown_18 = 1 << 18, -// e_unknown_19 = 1 << 19, -// e_unknown_20 = 1 << 20, -// e_unknown_21 = 1 << 21, -// e_unknown_22 = 1 << 22, -// e_unknown_23 = 1 << 23, -// e_flag_field_size_64_bit = 1 << 24, // Not sure what is -// e_unknown_25 = 1 << 25, -// e_unknown_26 = 1 << 26, -// e_unknown_27 = 1 << 27, -// e_unknown_28 = 1 << 28, -// e_unknown_29 = 1 << 29, -// e_unknown_30 = 1 << 30, -// e_unknown_31 = 1 << 31 -//}; -// -//bool check_flag(uint32_t flags, Flag to_check) -//{ -// return (flags & to_check) != 0; -//} -// ------------------------------------------------------------------------------------------------ -Token::Token(const char *sbegin, const char *send, TokenType type, size_t offset) : - sbegin(sbegin), - send(send), - type(type), - line(offset), - column(BINARY_MARKER) { -#ifdef DEBUG_ENABLED - // contents is bad.. :/ - contents = std::string(sbegin, static_cast<size_t>(send - sbegin)); -#endif - // calc length - // measure from sBegin to sEnd and validate? -} - -namespace { - -// ------------------------------------------------------------------------------------------------ -// signal tokenization error -void TokenizeError(const std::string &message, size_t offset) { - print_error("[FBX-Tokenize] " + String(message.c_str()) + ", offset " + itos(offset)); -} - -// ------------------------------------------------------------------------------------------------ -size_t Offset(const char *begin, const char *cursor) { - //ai_assert(begin <= cursor); - - return cursor - begin; -} - -// ------------------------------------------------------------------------------------------------ -void TokenizeError(const std::string &message, const char *begin, const char *cursor) { - TokenizeError(message, Offset(begin, cursor)); -} - -// ------------------------------------------------------------------------------------------------ -uint32_t ReadWord(const char *input, const char *&cursor, const char *end) { - const size_t k_to_read = sizeof(uint32_t); - if (Offset(cursor, end) < k_to_read) { - TokenizeError("cannot ReadWord, out of bounds", input, cursor); - } - - uint32_t word; - ::memcpy(&word, cursor, 4); - AI_SWAP4(word); - - cursor += k_to_read; - - return word; -} - -// ------------------------------------------------------------------------------------------------ -uint64_t ReadDoubleWord(const char *input, const char *&cursor, const char *end) { - const size_t k_to_read = sizeof(uint64_t); - if (Offset(cursor, end) < k_to_read) { - TokenizeError("cannot ReadDoubleWord, out of bounds", input, cursor); - } - - uint64_t dword /*= *reinterpret_cast<const uint64_t*>(cursor)*/; - ::memcpy(&dword, cursor, sizeof(uint64_t)); - AI_SWAP8(dword); - - cursor += k_to_read; - - return dword; -} - -// ------------------------------------------------------------------------------------------------ -uint8_t ReadByte(const char *input, const char *&cursor, const char *end) { - if (Offset(cursor, end) < sizeof(uint8_t)) { - TokenizeError("cannot ReadByte, out of bounds", input, cursor); - } - - uint8_t word; /* = *reinterpret_cast< const uint8_t* >( cursor )*/ - ::memcpy(&word, cursor, sizeof(uint8_t)); - ++cursor; - - return word; -} - -// ------------------------------------------------------------------------------------------------ -unsigned int ReadString(const char *&sbegin_out, const char *&send_out, const char *input, - const char *&cursor, const char *end, bool long_length = false, bool allow_null = false) { - const uint32_t len_len = long_length ? 4 : 1; - if (Offset(cursor, end) < len_len) { - TokenizeError("cannot ReadString, out of bounds reading length", input, cursor); - } - - const uint32_t length = long_length ? ReadWord(input, cursor, end) : ReadByte(input, cursor, end); - - if (Offset(cursor, end) < length) { - TokenizeError("cannot ReadString, length is out of bounds", input, cursor); - } - - sbegin_out = cursor; - cursor += length; - - send_out = cursor; - - if (!allow_null) { - for (unsigned int i = 0; i < length; ++i) { - if (sbegin_out[i] == '\0') { - TokenizeError("failed ReadString, unexpected NUL character in string", input, cursor); - } - } - } - - return length; -} - -// ------------------------------------------------------------------------------------------------ -void ReadData(const char *&sbegin_out, const char *&send_out, const char *input, const char *&cursor, const char *end, bool &corrupt) { - if (Offset(cursor, end) < 1) { - TokenizeError("cannot ReadData, out of bounds reading length", input, cursor); - corrupt = true; - return; - } - - const char type = *cursor; - sbegin_out = cursor++; - - switch (type) { - // 16 bit int - case 'Y': - cursor += 2; - break; - - // 1 bit bool flag (yes/no) - case 'C': - cursor += 1; - break; - - // 32 bit int - case 'I': - // <- fall through - - // float - case 'F': - cursor += 4; - break; - - // double - case 'D': - cursor += 8; - break; - - // 64 bit int - case 'L': - cursor += 8; - break; - - // note: do not write cursor += ReadWord(...cursor) as this would be UB - - // raw binary data - case 'R': { - const uint32_t length = ReadWord(input, cursor, end); - cursor += length; - break; - } - - case 'b': - // TODO: what is the 'b' type code? Right now we just skip over it / - // take the full range we could get - cursor = end; - break; - - // array of * - case 'f': - case 'd': - case 'l': - case 'i': - case 'c': { - const uint32_t length = ReadWord(input, cursor, end); - const uint32_t encoding = ReadWord(input, cursor, end); - - const uint32_t comp_len = ReadWord(input, cursor, end); - - // compute length based on type and check against the stored value - if (encoding == 0) { - uint32_t stride = 0; - switch (type) { - case 'f': - case 'i': - stride = 4; - break; - - case 'd': - case 'l': - stride = 8; - break; - - case 'c': - stride = 1; - break; - - default: - break; - }; - //ai_assert(stride > 0); - if (length * stride != comp_len) { - TokenizeError("cannot ReadData, calculated data stride differs from what the file claims", input, cursor); - } - } - // zip/deflate algorithm (encoding==1)? take given length. anything else? die - else if (encoding != 1) { - TokenizeError("cannot ReadData, unknown encoding", input, cursor); - } - cursor += comp_len; - break; - } // string - case 'S': { - const char *sb, *se; - // 0 characters can legally happen in such strings - ReadString(sb, se, input, cursor, end, true, true); - break; - } - default: - corrupt = true; // must exit - TokenizeError("cannot ReadData, unexpected type code: " + std::string(&type, 1), input, cursor); - return; - } - - if (cursor > end) { - corrupt = true; // must exit - TokenizeError("cannot ReadData, the remaining size is too small for the data type: " + std::string(&type, 1), input, cursor); - return; - } - - // the type code is contained in the returned range - send_out = cursor; -} - -// ------------------------------------------------------------------------------------------------ -bool ReadScope(TokenList &output_tokens, const char *input, const char *&cursor, const char *end, bool const is64bits, bool &corrupt) { - // the first word contains the offset at which this block ends - const uint64_t end_offset = is64bits ? ReadDoubleWord(input, cursor, end) : ReadWord(input, cursor, end); - - // we may get 0 if reading reached the end of the file - - // fbx files have a mysterious extra footer which I don't know - // how to extract any information from, but at least it always - // starts with a 0. - if (!end_offset) { - return false; - } - - if (end_offset > Offset(input, end)) { - TokenizeError("block offset is out of range", input, cursor); - corrupt = true; - return false; - } else if (end_offset < Offset(input, cursor)) { - TokenizeError("block offset is negative out of range", input, cursor); - corrupt = true; - return false; - } - - // the second data word contains the number of properties in the scope - const uint64_t prop_count = is64bits ? ReadDoubleWord(input, cursor, end) : ReadWord(input, cursor, end); - - // the third data word contains the length of the property list - const uint64_t prop_length = is64bits ? ReadDoubleWord(input, cursor, end) : ReadWord(input, cursor, end); - - // now comes the name of the scope/key - const char *sbeg = nullptr, *send = nullptr; - ReadString(sbeg, send, input, cursor, end); - - output_tokens.push_back(new_Token(sbeg, send, TokenType_KEY, Offset(input, cursor))); - - // now come the individual properties - const char *begin_cursor = cursor; - for (unsigned int i = 0; i < prop_count; ++i) { - ReadData(sbeg, send, input, cursor, begin_cursor + prop_length, corrupt); - if (corrupt) { - return false; - } - - output_tokens.push_back(new_Token(sbeg, send, TokenType_DATA, Offset(input, cursor))); - - if (i != prop_count - 1) { - output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_COMMA, Offset(input, cursor))); - } - } - - if (Offset(begin_cursor, cursor) != prop_length) { - TokenizeError("property length not reached, something is wrong", input, cursor); - corrupt = true; - return false; - } - - // at the end of each nested block, there is a NUL record to indicate - // that the sub-scope exists (i.e. to distinguish between P: and P : {}) - // this NUL record is 13 bytes long on 32 bit version and 25 bytes long on 64 bit. - const size_t sentinel_block_length = is64bits ? (sizeof(uint64_t) * 3 + 1) : (sizeof(uint32_t) * 3 + 1); - - if (Offset(input, cursor) < end_offset) { - if (end_offset - Offset(input, cursor) < sentinel_block_length) { - TokenizeError("insufficient padding bytes at block end", input, cursor); - } - - output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_OPEN_BRACKET, Offset(input, cursor))); - - // XXX this is vulnerable to stack overflowing .. - while (Offset(input, cursor) < end_offset - sentinel_block_length) { - ReadScope(output_tokens, input, cursor, input + end_offset - sentinel_block_length, is64bits, corrupt); - if (corrupt) { - return false; - } - } - output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_CLOSE_BRACKET, Offset(input, cursor))); - - for (unsigned int i = 0; i < sentinel_block_length; ++i) { - if (cursor[i] != '\0') { - TokenizeError("failed to read nested block sentinel, expected all bytes to be 0", input, cursor); - corrupt = true; - return false; - } - } - cursor += sentinel_block_length; - } - - if (Offset(input, cursor) != end_offset) { - TokenizeError("scope length not reached, something is wrong", input, cursor); - corrupt = true; - return false; - } - - return true; -} -} // anonymous namespace - -// ------------------------------------------------------------------------------------------------ -// TODO: Test FBX Binary files newer than the 7500 version to check if the 64 bits address behaviour is consistent -void TokenizeBinary(TokenList &output_tokens, const char *input, size_t length, bool &corrupt) { - if (length < 0x1b) { - //TokenizeError("file is too short",0); - } - - //uint32_t offset = 0x15; - /* const char* cursor = input + 0x15; - const uint32_t flags = ReadWord(input, cursor, input + length); - const uint8_t padding_0 = ReadByte(input, cursor, input + length); // unused - const uint8_t padding_1 = ReadByte(input, cursor, input + length); // unused*/ - - if (strncmp(input, "Kaydara FBX Binary", 18)) { - TokenizeError("magic bytes not found", 0); - } - - const char *cursor = input + 18; - /*Result ignored*/ ReadByte(input, cursor, input + length); - /*Result ignored*/ ReadByte(input, cursor, input + length); - /*Result ignored*/ ReadByte(input, cursor, input + length); - /*Result ignored*/ ReadByte(input, cursor, input + length); - /*Result ignored*/ ReadByte(input, cursor, input + length); - const uint32_t version = ReadWord(input, cursor, input + length); - print_verbose("FBX Version: " + itos(version)); - //ASSIMP_LOG_DEBUG_F("FBX version: ", version); - const bool is64bits = version >= 7500; - const char *end = input + length; - while (cursor < end) { - if (!ReadScope(output_tokens, input, cursor, input + length, is64bits, corrupt)) { - break; - } - } -} -} // namespace FBXDocParser |