mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 11:22:45 +00:00 
			
		
		
		
	AK: Add a forgiving_base64_decode helper
According to the specification at https://infra.spec.whatwg.org/#forgiving-base64
This commit is contained in:
		
							parent
							
								
									50c6d133a9
								
							
						
					
					
						commit
						bbaf86fb46
					
				
					 2 changed files with 100 additions and 0 deletions
				
			
		|  | @ -5,8 +5,10 @@ | |||
|  */ | ||||
| 
 | ||||
| #include <AK/Array.h> | ||||
| #include <AK/Assertions.h> | ||||
| #include <AK/Base64.h> | ||||
| #include <AK/CharacterTypes.h> | ||||
| #include <AK/Error.h> | ||||
| #include <AK/StringBuilder.h> | ||||
| #include <AK/Types.h> | ||||
| #include <AK/Vector.h> | ||||
|  | @ -140,4 +142,100 @@ ErrorOr<String> encode_base64(ReadonlyBytes input) | |||
|     return output.to_string(); | ||||
| } | ||||
| 
 | ||||
| // https://infra.spec.whatwg.org/#forgiving-base64
 | ||||
| ErrorOr<ByteBuffer> decode_forgiving_base64(StringView input) | ||||
| { | ||||
|     // 1. Remove all ASCII whitespace from data.
 | ||||
|     auto data = input.trim_whitespace(); | ||||
| 
 | ||||
|     // 2. If data’s code point length divides by 4 leaving no remainder, then:
 | ||||
|     if (data.length() % 4 == 0) { | ||||
|         // If data ends with one or two U+003D (=) code points, then remove them from data.
 | ||||
|         if (data.ends_with("=="sv)) | ||||
|             data = data.substring_view(0, data.length() - 2); | ||||
|         else if (data.ends_with('=')) | ||||
|             data = data.substring_view(0, data.length() - 1); | ||||
|     } | ||||
| 
 | ||||
|     // 3. If data’s code point length divides by 4 leaving a remainder of 1, then return failure.
 | ||||
|     if (data.length() % 4 == 1) | ||||
|         return Error::from_string_literal("Invalid input length in forgiving base64 decode"); | ||||
| 
 | ||||
|     // 4. If data contains a code point that is not one of
 | ||||
|     //     U+002B (+), U+002F (/), ASCII alphanumeric
 | ||||
|     // then return failure.
 | ||||
|     for (auto point : data) { | ||||
|         if (point != '+' && point != '/' && !is_ascii_alphanumeric(point)) | ||||
|             return Error::from_string_literal("Invalid character in forgiving base64 decode"); | ||||
|     } | ||||
| 
 | ||||
|     // 5. Let output be an empty byte sequence.
 | ||||
|     // 6. Let buffer be an empty buffer that can have bits appended to it.
 | ||||
|     Vector<u8> output; | ||||
|     u32 buffer = 0; | ||||
|     auto accumulated_bits = 0; | ||||
| 
 | ||||
|     auto add_to_buffer = [&](u8 number) { | ||||
|         VERIFY(number < 64); | ||||
|         u32 buffer_mask = number; | ||||
| 
 | ||||
|         if (accumulated_bits == 0) | ||||
|             buffer_mask <<= 18; | ||||
|         else if (accumulated_bits == 6) | ||||
|             buffer_mask <<= 12; | ||||
|         else if (accumulated_bits == 12) | ||||
|             buffer_mask <<= 6; | ||||
|         else if (accumulated_bits == 18) | ||||
|             buffer_mask <<= 0; | ||||
| 
 | ||||
|         buffer |= buffer_mask; | ||||
| 
 | ||||
|         accumulated_bits += 6; | ||||
|     }; | ||||
| 
 | ||||
|     auto append_bytes = [&]() { | ||||
|         output.append(static_cast<u8>((buffer & 0xff0000) >> 16)); | ||||
|         output.append(static_cast<u8>((buffer & 0xff00) >> 8)); | ||||
|         output.append(static_cast<u8>(buffer & 0xff)); | ||||
| 
 | ||||
|         buffer = 0; | ||||
|         accumulated_bits = 0; | ||||
|     }; | ||||
| 
 | ||||
|     // 7. Let position be a position variable for data, initially pointing at the start of data.
 | ||||
|     // 8. While position does not point past the end of data:
 | ||||
|     for (auto point : data) { | ||||
|         // 1. Find the code point pointed to by position in the second column of Table 1: The Base 64 Alphabet of RFC 4648.
 | ||||
|         //    Let n be the number given in the first cell of the same row. [RFC4648]
 | ||||
|         auto n = alphabet_lookup_table[point]; | ||||
|         VERIFY(n >= 0); | ||||
| 
 | ||||
|         // 2. Append the six bits corresponding to n, most significant bit first, to buffer.
 | ||||
|         add_to_buffer(static_cast<u8>(n)); | ||||
| 
 | ||||
|         // 3. buffer has accumulated 24 bits,
 | ||||
|         if (accumulated_bits == 24) { | ||||
|             // interpret them as three 8-bit big-endian numbers.
 | ||||
|             // Append three bytes with values equal to those numbers to output, in the same order, and then empty buffer
 | ||||
|             append_bytes(); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     // 9. If buffer is not empty, it contains either 12 or 18 bits.
 | ||||
|     VERIFY(accumulated_bits == 0 || accumulated_bits == 12 || accumulated_bits == 18); | ||||
| 
 | ||||
|     // If it contains 12 bits, then discard the last four and interpret the remaining eight as an 8-bit big-endian number.
 | ||||
|     if (accumulated_bits == 12) | ||||
|         output.append(static_cast<u8>((buffer & 0xff0000) >> 16)); | ||||
| 
 | ||||
|     // If it contains 18 bits, then discard the last two and interpret the remaining 16 as two 8-bit big-endian numbers.
 | ||||
|     // Append the one or two bytes with values equal to those one or two numbers to output, in the same order.
 | ||||
|     if (accumulated_bits == 18) { | ||||
|         output.append(static_cast<u8>((buffer & 0xff0000) >> 16)); | ||||
|         output.append(static_cast<u8>((buffer & 0xff00) >> 8)); | ||||
|     } | ||||
| 
 | ||||
|     return ByteBuffer::copy(output); | ||||
| } | ||||
| 
 | ||||
| } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Arda Cinar
						Arda Cinar