mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 00:07:36 +00:00
LibC: Reimplement scanf from the ground up
This adds support for some previously unsupported features (e.g. length modifiers) and fixes at least one FIXME. Fixes #90.
This commit is contained in:
parent
0bf496f864
commit
8bc1bcb34b
2 changed files with 773 additions and 201 deletions
|
@ -1,227 +1,546 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2000-2002 Opsycon AB (www.opsycon.se)
|
* Copyright (c) 2021, the SerenityOS developers.
|
||||||
*
|
* All rights reserved.
|
||||||
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions
|
* modification, are permitted provided that the following conditions are met:
|
||||||
* are met:
|
|
||||||
* 1. Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* 3. All advertising materials mentioning features or use of this software
|
|
||||||
* must display the following acknowledgement:
|
|
||||||
* This product includes software developed by Opsycon AB.
|
|
||||||
* 4. The name of the author may not be used to endorse or promote products
|
|
||||||
* derived from this software without specific prior written permission.
|
|
||||||
*
|
*
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
|
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||||
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
* list of conditions and the following disclaimer.
|
||||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
|
||||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
||||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
||||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
||||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
||||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
||||||
* SUCH DAMAGE.
|
|
||||||
*
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <AK/Assertions.h>
|
#include <AK/Assertions.h>
|
||||||
|
#include <AK/GenericLexer.h>
|
||||||
|
#include <AK/LogStream.h>
|
||||||
|
#include <AK/StdLibExtras.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
static const char* determine_base(const char* p, int& base)
|
enum LengthModifier {
|
||||||
{
|
None,
|
||||||
if (p[0] == '0') {
|
Default,
|
||||||
switch (p[1]) {
|
Char,
|
||||||
case 'x':
|
Short,
|
||||||
base = 16;
|
Long,
|
||||||
break;
|
LongLong,
|
||||||
case 't':
|
IntMax,
|
||||||
case 'n':
|
Size,
|
||||||
base = 10;
|
PtrDiff,
|
||||||
break;
|
LongDouble,
|
||||||
case 'o':
|
};
|
||||||
base = 8;
|
|
||||||
break;
|
enum ConversionSpecifier {
|
||||||
default:
|
Unspecified,
|
||||||
base = 10;
|
Decimal,
|
||||||
return p;
|
Integer,
|
||||||
}
|
Octal,
|
||||||
return p + 2;
|
Unsigned,
|
||||||
|
Hex,
|
||||||
|
Floating,
|
||||||
|
String,
|
||||||
|
UseScanList,
|
||||||
|
Character,
|
||||||
|
Pointer,
|
||||||
|
OutputNumberOfBytes,
|
||||||
|
Invalid,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class ReadKind {
|
||||||
|
Normal,
|
||||||
|
Octal,
|
||||||
|
Hex,
|
||||||
|
Infer,
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename T, typename ApT, ReadKind kind = ReadKind::Normal>
|
||||||
|
struct read_element_concrete {
|
||||||
|
bool operator()(GenericLexer&, va_list)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
base = 10;
|
};
|
||||||
return p;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int _atob(unsigned long* vp, const char* p, int base)
|
template<typename ApT, ReadKind kind>
|
||||||
{
|
struct read_element_concrete<int, ApT, kind> {
|
||||||
unsigned long value, v1, v2;
|
bool operator()(GenericLexer& lexer, va_list* ap)
|
||||||
const char* q;
|
{
|
||||||
char tmp[20];
|
lexer.ignore_while(isspace);
|
||||||
int digit;
|
|
||||||
|
|
||||||
if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
|
auto* ptr = va_arg(*ap, ApT*);
|
||||||
base = 16;
|
long value = 0;
|
||||||
p += 2;
|
char* endptr = nullptr;
|
||||||
|
auto nptr = lexer.remaining().characters_without_null_termination();
|
||||||
|
if constexpr (kind == ReadKind::Normal)
|
||||||
|
value = strtol(nptr, &endptr, 10);
|
||||||
|
if constexpr (kind == ReadKind::Octal)
|
||||||
|
value = strtol(nptr, &endptr, 8);
|
||||||
|
if constexpr (kind == ReadKind::Hex)
|
||||||
|
value = strtol(nptr, &endptr, 16);
|
||||||
|
if constexpr (kind == ReadKind::Infer)
|
||||||
|
value = strtol(nptr, &endptr, 0);
|
||||||
|
|
||||||
|
if (!endptr)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (endptr == nptr)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
auto diff = endptr - nptr;
|
||||||
|
ASSERT(diff > 0);
|
||||||
|
lexer.ignore((size_t)diff);
|
||||||
|
|
||||||
|
*ptr = value;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
if (base == 16 && (q = strchr(p, '.')) != 0) {
|
template<typename ApT, ReadKind kind>
|
||||||
if (q - p > (ssize_t)sizeof(tmp) - 1)
|
struct read_element_concrete<char, ApT, kind> {
|
||||||
return 0;
|
bool operator()(GenericLexer& lexer, va_list* ap)
|
||||||
memcpy(tmp, p, q - p);
|
{
|
||||||
tmp[q - p] = '\0';
|
static_assert(kind == ReadKind::Normal, "Can't read a non-normal character");
|
||||||
|
|
||||||
if (!_atob(&v1, tmp, 16))
|
auto* ptr = va_arg(*ap, ApT*);
|
||||||
return 0;
|
|
||||||
++q;
|
if (lexer.is_eof())
|
||||||
if (strchr(q, '.'))
|
return false;
|
||||||
return 0;
|
|
||||||
if (!_atob(&v2, q, 16))
|
auto ch = lexer.consume();
|
||||||
return 0;
|
*ptr = ch;
|
||||||
*vp = (v1 << 16) + v2;
|
return true;
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
value = *vp = 0;
|
template<typename ApT, ReadKind kind>
|
||||||
for (; *p; p++) {
|
struct read_element_concrete<unsigned, ApT, kind> {
|
||||||
if (*p >= '0' && *p <= '9')
|
bool operator()(GenericLexer& lexer, va_list* ap)
|
||||||
digit = *p - '0';
|
{
|
||||||
else if (*p >= 'a' && *p <= 'f')
|
lexer.ignore_while(isspace);
|
||||||
digit = *p - 'a' + 10;
|
|
||||||
else if (*p >= 'A' && *p <= 'F')
|
auto* ptr = va_arg(*ap, ApT*);
|
||||||
digit = *p - 'A' + 10;
|
unsigned long value = 0;
|
||||||
|
char* endptr = nullptr;
|
||||||
|
auto nptr = lexer.remaining().characters_without_null_termination();
|
||||||
|
if constexpr (kind == ReadKind::Normal)
|
||||||
|
value = strtoul(nptr, &endptr, 10);
|
||||||
|
if constexpr (kind == ReadKind::Octal)
|
||||||
|
value = strtoul(nptr, &endptr, 8);
|
||||||
|
if constexpr (kind == ReadKind::Hex)
|
||||||
|
value = strtoul(nptr, &endptr, 16);
|
||||||
|
if constexpr (kind == ReadKind::Infer)
|
||||||
|
value = strtoul(nptr, &endptr, 0);
|
||||||
|
|
||||||
|
if (!endptr)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (endptr == nptr)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
auto diff = endptr - nptr;
|
||||||
|
ASSERT(diff > 0);
|
||||||
|
lexer.ignore((size_t)diff);
|
||||||
|
|
||||||
|
*ptr = value;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename ApT, ReadKind kind>
|
||||||
|
struct read_element_concrete<float, ApT, kind> {
|
||||||
|
bool operator()(GenericLexer& lexer, va_list* ap)
|
||||||
|
{
|
||||||
|
lexer.ignore_while(isspace);
|
||||||
|
|
||||||
|
auto* ptr = va_arg(*ap, ApT*);
|
||||||
|
|
||||||
|
double value = 0;
|
||||||
|
char* endptr = nullptr;
|
||||||
|
auto nptr = lexer.remaining().characters_without_null_termination();
|
||||||
|
if constexpr (kind == ReadKind::Normal)
|
||||||
|
value = strtod(nptr, &endptr);
|
||||||
else
|
else
|
||||||
return 0;
|
return false;
|
||||||
|
|
||||||
if (digit >= base)
|
if (!endptr)
|
||||||
return 0;
|
return false;
|
||||||
value *= base;
|
|
||||||
value += digit;
|
if (endptr == nptr)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
auto diff = endptr - nptr;
|
||||||
|
ASSERT(diff > 0);
|
||||||
|
lexer.ignore((size_t)diff);
|
||||||
|
|
||||||
|
*ptr = value;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
*vp = value;
|
};
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int atob(unsigned int* vp, const char* p, int base)
|
template<typename T, ReadKind kind>
|
||||||
{
|
struct read_element {
|
||||||
unsigned long v;
|
bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap)
|
||||||
|
{
|
||||||
if (base == 0)
|
switch (length_modifier) {
|
||||||
p = determine_base(p, base);
|
default:
|
||||||
if (_atob(&v, p, base)) {
|
case None:
|
||||||
*vp = v;
|
ASSERT_NOT_REACHED();
|
||||||
return 1;
|
case Default:
|
||||||
}
|
return read_element_concrete<T, T, kind> {}(input_lexer, ap);
|
||||||
return 0;
|
case Char:
|
||||||
}
|
return read_element_concrete<T, char, kind> {}(input_lexer, ap);
|
||||||
|
case Short:
|
||||||
#define ISSPACE " \t\n\r\f\v"
|
return read_element_concrete<T, short, kind> {}(input_lexer, ap);
|
||||||
|
case Long:
|
||||||
int vsscanf(const char* buf, const char* s, va_list ap)
|
if constexpr (IsSame<T, int>::value)
|
||||||
{
|
return read_element_concrete<T, long, kind> {}(input_lexer, ap);
|
||||||
int base = 10;
|
if constexpr (IsSame<T, float>::value)
|
||||||
char* t;
|
return read_element_concrete<T, double, kind> {}(input_lexer, ap);
|
||||||
char tmp[BUFSIZ];
|
return false;
|
||||||
bool noassign = false;
|
case LongLong:
|
||||||
int count = 0;
|
if constexpr (IsSame<T, int>::value)
|
||||||
int width = 0;
|
return read_element_concrete<T, long long, kind> {}(input_lexer, ap);
|
||||||
|
if constexpr (IsSame<T, float>::value)
|
||||||
// FIXME: This doesn't work quite right. For example, it fails to match 'SSH-2.0-OpenSSH_8.2p1 Ubuntu-4ubuntu0.1\r\n'
|
return read_element_concrete<T, double, kind> {}(input_lexer, ap);
|
||||||
// with 'SSH-%d.%d-%[^\n]\n'
|
return false;
|
||||||
|
case IntMax:
|
||||||
while (*s && *buf) {
|
return read_element_concrete<T, intmax_t, kind> {}(input_lexer, ap);
|
||||||
while (isspace(*s))
|
case Size:
|
||||||
s++;
|
return read_element_concrete<T, size_t, kind> {}(input_lexer, ap);
|
||||||
if (*s == '%') {
|
case PtrDiff:
|
||||||
s++;
|
return read_element_concrete<T, ptrdiff_t, kind> {}(input_lexer, ap);
|
||||||
for (; *s; s++) {
|
case LongDouble:
|
||||||
if (strchr("dibouxcsefg%", *s))
|
return read_element_concrete<T, long double, kind> {}(input_lexer, ap);
|
||||||
break;
|
|
||||||
if (*s == '*')
|
|
||||||
noassign = true;
|
|
||||||
else if (*s >= '1' && *s <= '9') {
|
|
||||||
const char* tc;
|
|
||||||
for (tc = s; isdigit(*s); s++)
|
|
||||||
;
|
|
||||||
ASSERT((ssize_t)sizeof(tmp) >= s - tc + 1);
|
|
||||||
memcpy(tmp, tc, s - tc);
|
|
||||||
tmp[s - tc] = '\0';
|
|
||||||
atob((uint32_t*)&width, tmp, 10);
|
|
||||||
s--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (*s == 's') {
|
|
||||||
while (isspace(*buf))
|
|
||||||
buf++;
|
|
||||||
if (!width)
|
|
||||||
width = strcspn(buf, ISSPACE);
|
|
||||||
if (!noassign) {
|
|
||||||
// In this case, we have no way to ensure the user buffer is not overflown :(
|
|
||||||
memcpy(t = va_arg(ap, char*), buf, width);
|
|
||||||
t[width] = '\0';
|
|
||||||
}
|
|
||||||
buf += width;
|
|
||||||
} else if (*s == 'c') {
|
|
||||||
if (!width)
|
|
||||||
width = 1;
|
|
||||||
if (!noassign) {
|
|
||||||
memcpy(t = va_arg(ap, char*), buf, width);
|
|
||||||
// No null terminator!
|
|
||||||
}
|
|
||||||
buf += width;
|
|
||||||
} else if (strchr("dobxu", *s)) {
|
|
||||||
while (isspace(*buf))
|
|
||||||
buf++;
|
|
||||||
if (*s == 'd' || *s == 'u')
|
|
||||||
base = 10;
|
|
||||||
else if (*s == 'x')
|
|
||||||
base = 16;
|
|
||||||
else if (*s == 'o')
|
|
||||||
base = 8;
|
|
||||||
else if (*s == 'b')
|
|
||||||
base = 2;
|
|
||||||
if (!width) {
|
|
||||||
if (isspace(*(s + 1)) || *(s + 1) == 0) {
|
|
||||||
width = strcspn(buf, ISSPACE);
|
|
||||||
} else {
|
|
||||||
auto* p = strchr(buf, *(s + 1));
|
|
||||||
if (p)
|
|
||||||
width = p - buf;
|
|
||||||
else {
|
|
||||||
noassign = true;
|
|
||||||
width = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
memcpy(tmp, buf, width);
|
|
||||||
tmp[width] = '\0';
|
|
||||||
buf += width;
|
|
||||||
if (!noassign) {
|
|
||||||
if (!atob(va_arg(ap, uint32_t*), tmp, base))
|
|
||||||
noassign = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!noassign)
|
|
||||||
++count;
|
|
||||||
width = 0;
|
|
||||||
noassign = false;
|
|
||||||
++s;
|
|
||||||
} else {
|
|
||||||
while (isspace(*buf))
|
|
||||||
buf++;
|
|
||||||
if (*s != *buf)
|
|
||||||
break;
|
|
||||||
else {
|
|
||||||
++s;
|
|
||||||
++buf;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return count;
|
};
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct read_element<char*, ReadKind::Normal> {
|
||||||
|
read_element(StringView scan_set = {}, bool invert = false)
|
||||||
|
: scan_set(scan_set.is_null() ? " \t\n\f\r" : scan_set)
|
||||||
|
, invert(scan_set.is_null() ? true : invert)
|
||||||
|
, was_null(scan_set.is_null())
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap)
|
||||||
|
{
|
||||||
|
// FIXME: Implement wide strings and such.
|
||||||
|
if (length_modifier != LengthModifier::Default)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (was_null)
|
||||||
|
input_lexer.ignore_while(isspace);
|
||||||
|
|
||||||
|
auto* ptr = va_arg(*ap, char*);
|
||||||
|
auto str = input_lexer.consume_while([this](auto c) { return this->matches(c); });
|
||||||
|
if (str.is_empty())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
memcpy(ptr, str.characters_without_null_termination(), str.length());
|
||||||
|
ptr[str.length()] = 0;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool matches(char c) const
|
||||||
|
{
|
||||||
|
return invert ^ scan_set.contains(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
const StringView scan_set;
|
||||||
|
bool invert { false };
|
||||||
|
bool was_null { false };
|
||||||
|
};
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct read_element<void*, ReadKind::Normal> {
|
||||||
|
bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap)
|
||||||
|
{
|
||||||
|
if (length_modifier != LengthModifier::Default)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
input_lexer.ignore_while(isspace);
|
||||||
|
|
||||||
|
auto* ptr = va_arg(*ap, void**);
|
||||||
|
auto str = input_lexer.consume_while([this](auto c) { return this->should_consume(c); });
|
||||||
|
|
||||||
|
if (count != 8) {
|
||||||
|
fail:;
|
||||||
|
for (size_t i = 0; i < count; ++i)
|
||||||
|
input_lexer.retreat();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
char buf[9] { 0 };
|
||||||
|
memcpy(buf, str.characters_without_null_termination(), 8);
|
||||||
|
buf[8] = 0;
|
||||||
|
char* endptr = nullptr;
|
||||||
|
auto value = strtoull(buf, &endptr, 16);
|
||||||
|
|
||||||
|
if (endptr != &buf[8])
|
||||||
|
goto fail;
|
||||||
|
|
||||||
|
memcpy(ptr, &value, sizeof(value));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool should_consume(char c)
|
||||||
|
{
|
||||||
|
if (count == 8)
|
||||||
|
return false;
|
||||||
|
if (!isxdigit(c))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
++count;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
size_t count { 0 };
|
||||||
|
};
|
||||||
|
|
||||||
|
extern "C" int vsscanf(const char* input, const char* format, va_list ap)
|
||||||
|
{
|
||||||
|
GenericLexer format_lexer { format };
|
||||||
|
GenericLexer input_lexer { input };
|
||||||
|
|
||||||
|
int elements_matched = 0;
|
||||||
|
|
||||||
|
while (!format_lexer.is_eof()) {
|
||||||
|
format_lexer.ignore_while(isspace);
|
||||||
|
if (!format_lexer.next_is('%')) {
|
||||||
|
read_one_literal:;
|
||||||
|
input_lexer.ignore_while(isspace);
|
||||||
|
if (format_lexer.is_eof())
|
||||||
|
break;
|
||||||
|
|
||||||
|
auto next_char = format_lexer.consume();
|
||||||
|
if (!input_lexer.consume_specific(next_char))
|
||||||
|
return elements_matched;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (format_lexer.next_is("%%")) {
|
||||||
|
format_lexer.ignore();
|
||||||
|
goto read_one_literal;
|
||||||
|
}
|
||||||
|
|
||||||
|
format_lexer.ignore(); // '%'
|
||||||
|
|
||||||
|
bool invert_scanlist = false;
|
||||||
|
StringView scanlist;
|
||||||
|
LengthModifier length_modifier { None };
|
||||||
|
ConversionSpecifier conversion_specifier { Unspecified };
|
||||||
|
reread_lookahead:;
|
||||||
|
auto format_lookahead = format_lexer.peek();
|
||||||
|
if (length_modifier == None) {
|
||||||
|
switch (format_lookahead) {
|
||||||
|
case 'h':
|
||||||
|
if (format_lexer.peek(1) == 'h') {
|
||||||
|
format_lexer.consume(2);
|
||||||
|
length_modifier = Char;
|
||||||
|
} else {
|
||||||
|
format_lexer.consume(1);
|
||||||
|
length_modifier = Short;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'l':
|
||||||
|
if (format_lexer.peek(1) == 'l') {
|
||||||
|
format_lexer.consume(2);
|
||||||
|
length_modifier = LongLong;
|
||||||
|
} else {
|
||||||
|
format_lexer.consume(1);
|
||||||
|
length_modifier = Long;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'j':
|
||||||
|
format_lexer.consume();
|
||||||
|
length_modifier = IntMax;
|
||||||
|
break;
|
||||||
|
case 'z':
|
||||||
|
format_lexer.consume();
|
||||||
|
length_modifier = Size;
|
||||||
|
break;
|
||||||
|
case 't':
|
||||||
|
format_lexer.consume();
|
||||||
|
length_modifier = PtrDiff;
|
||||||
|
break;
|
||||||
|
case 'L':
|
||||||
|
format_lexer.consume();
|
||||||
|
length_modifier = LongDouble;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
length_modifier = Default;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
goto reread_lookahead;
|
||||||
|
}
|
||||||
|
if (conversion_specifier == Unspecified) {
|
||||||
|
switch (format_lookahead) {
|
||||||
|
case 'd':
|
||||||
|
format_lexer.consume();
|
||||||
|
conversion_specifier = Decimal;
|
||||||
|
break;
|
||||||
|
case 'i':
|
||||||
|
format_lexer.consume();
|
||||||
|
conversion_specifier = Integer;
|
||||||
|
break;
|
||||||
|
case 'o':
|
||||||
|
format_lexer.consume();
|
||||||
|
conversion_specifier = Octal;
|
||||||
|
break;
|
||||||
|
case 'u':
|
||||||
|
format_lexer.consume();
|
||||||
|
conversion_specifier = Unsigned;
|
||||||
|
break;
|
||||||
|
case 'x':
|
||||||
|
format_lexer.consume();
|
||||||
|
conversion_specifier = Hex;
|
||||||
|
break;
|
||||||
|
case 'a':
|
||||||
|
case 'e':
|
||||||
|
case 'f':
|
||||||
|
case 'g':
|
||||||
|
format_lexer.consume();
|
||||||
|
conversion_specifier = Floating;
|
||||||
|
break;
|
||||||
|
case 's':
|
||||||
|
format_lexer.consume();
|
||||||
|
conversion_specifier = String;
|
||||||
|
break;
|
||||||
|
case '[':
|
||||||
|
format_lexer.consume();
|
||||||
|
scanlist = format_lexer.consume_until(']');
|
||||||
|
if (scanlist.starts_with('^')) {
|
||||||
|
scanlist = scanlist.substring_view(1);
|
||||||
|
invert_scanlist = true;
|
||||||
|
}
|
||||||
|
conversion_specifier = UseScanList;
|
||||||
|
break;
|
||||||
|
case 'c':
|
||||||
|
format_lexer.consume();
|
||||||
|
conversion_specifier = Character;
|
||||||
|
break;
|
||||||
|
case 'p':
|
||||||
|
format_lexer.consume();
|
||||||
|
conversion_specifier = Pointer;
|
||||||
|
break;
|
||||||
|
case 'n':
|
||||||
|
format_lexer.consume();
|
||||||
|
conversion_specifier = OutputNumberOfBytes;
|
||||||
|
break;
|
||||||
|
case 'C':
|
||||||
|
format_lexer.consume();
|
||||||
|
length_modifier = Long;
|
||||||
|
conversion_specifier = Character;
|
||||||
|
break;
|
||||||
|
case 'S':
|
||||||
|
format_lexer.consume();
|
||||||
|
length_modifier = Long;
|
||||||
|
conversion_specifier = String;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
format_lexer.consume();
|
||||||
|
conversion_specifier = Invalid;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now try to read.
|
||||||
|
switch (conversion_specifier) {
|
||||||
|
case Invalid:
|
||||||
|
case Unspecified:
|
||||||
|
default:
|
||||||
|
// "undefined behaviour", let's be nice and crash.
|
||||||
|
dbgln("Invalid conversion specifier {} in scanf!", (int)conversion_specifier);
|
||||||
|
ASSERT_NOT_REACHED();
|
||||||
|
case Decimal:
|
||||||
|
if (!read_element<int, ReadKind::Normal> {}(length_modifier, input_lexer, &ap))
|
||||||
|
format_lexer.consume_all();
|
||||||
|
else
|
||||||
|
++elements_matched;
|
||||||
|
break;
|
||||||
|
case Integer:
|
||||||
|
if (!read_element<int, ReadKind::Infer> {}(length_modifier, input_lexer, &ap))
|
||||||
|
format_lexer.consume_all();
|
||||||
|
else
|
||||||
|
++elements_matched;
|
||||||
|
break;
|
||||||
|
case Octal:
|
||||||
|
if (!read_element<unsigned, ReadKind::Octal> {}(length_modifier, input_lexer, &ap))
|
||||||
|
format_lexer.consume_all();
|
||||||
|
else
|
||||||
|
++elements_matched;
|
||||||
|
break;
|
||||||
|
case Unsigned:
|
||||||
|
if (!read_element<unsigned, ReadKind::Normal> {}(length_modifier, input_lexer, &ap))
|
||||||
|
format_lexer.consume_all();
|
||||||
|
else
|
||||||
|
++elements_matched;
|
||||||
|
break;
|
||||||
|
case Hex:
|
||||||
|
if (!read_element<unsigned, ReadKind::Hex> {}(length_modifier, input_lexer, &ap))
|
||||||
|
format_lexer.consume_all();
|
||||||
|
else
|
||||||
|
++elements_matched;
|
||||||
|
break;
|
||||||
|
case Floating:
|
||||||
|
if (!read_element<float, ReadKind::Normal> {}(length_modifier, input_lexer, &ap))
|
||||||
|
format_lexer.consume_all();
|
||||||
|
else
|
||||||
|
++elements_matched;
|
||||||
|
break;
|
||||||
|
case String:
|
||||||
|
if (!read_element<char*, ReadKind::Normal> {}(length_modifier, input_lexer, &ap))
|
||||||
|
format_lexer.consume_all();
|
||||||
|
else
|
||||||
|
++elements_matched;
|
||||||
|
break;
|
||||||
|
case UseScanList:
|
||||||
|
if (!read_element<char*, ReadKind::Normal> { scanlist, invert_scanlist }(length_modifier, input_lexer, &ap))
|
||||||
|
format_lexer.consume_all();
|
||||||
|
else
|
||||||
|
++elements_matched;
|
||||||
|
break;
|
||||||
|
case Character:
|
||||||
|
if (!read_element<char, ReadKind::Normal> {}(length_modifier, input_lexer, &ap))
|
||||||
|
format_lexer.consume_all();
|
||||||
|
else
|
||||||
|
++elements_matched;
|
||||||
|
break;
|
||||||
|
case Pointer:
|
||||||
|
if (!read_element<void*, ReadKind::Normal> {}(length_modifier, input_lexer, &ap))
|
||||||
|
format_lexer.consume_all();
|
||||||
|
else
|
||||||
|
++elements_matched;
|
||||||
|
break;
|
||||||
|
case OutputNumberOfBytes: {
|
||||||
|
auto* ptr = va_arg(ap, int*);
|
||||||
|
*ptr = input_lexer.tell();
|
||||||
|
++elements_matched;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return elements_matched;
|
||||||
}
|
}
|
||||||
|
|
253
Userland/Tests/LibC/scanf.cpp
Normal file
253
Userland/Tests/LibC/scanf.cpp
Normal file
|
@ -0,0 +1,253 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2021, the SerenityOS developers.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||||
|
* list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <AK/Array.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
typedef long double longdouble;
|
||||||
|
typedef long long longlong;
|
||||||
|
typedef char charstar[32];
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
constexpr static Array<unsigned char, 32> to_value_t(T x)
|
||||||
|
{
|
||||||
|
// The endianness doesn't really matter, since we're going to convert both sides with this anyway.
|
||||||
|
union Value {
|
||||||
|
u8 v[32];
|
||||||
|
T t;
|
||||||
|
};
|
||||||
|
|
||||||
|
auto value = Value { .t = x };
|
||||||
|
|
||||||
|
return {
|
||||||
|
value.v[0],
|
||||||
|
value.v[1],
|
||||||
|
value.v[2],
|
||||||
|
value.v[3],
|
||||||
|
value.v[4],
|
||||||
|
value.v[5],
|
||||||
|
value.v[6],
|
||||||
|
value.v[7],
|
||||||
|
value.v[8],
|
||||||
|
value.v[9],
|
||||||
|
value.v[10],
|
||||||
|
value.v[11],
|
||||||
|
value.v[12],
|
||||||
|
value.v[13],
|
||||||
|
value.v[14],
|
||||||
|
value.v[15],
|
||||||
|
value.v[16],
|
||||||
|
value.v[17],
|
||||||
|
value.v[18],
|
||||||
|
value.v[19],
|
||||||
|
value.v[20],
|
||||||
|
value.v[21],
|
||||||
|
value.v[22],
|
||||||
|
value.v[23],
|
||||||
|
value.v[24],
|
||||||
|
value.v[25],
|
||||||
|
value.v[26],
|
||||||
|
value.v[27],
|
||||||
|
value.v[28],
|
||||||
|
value.v[29],
|
||||||
|
value.v[30],
|
||||||
|
value.v[31],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
template<size_t N>
|
||||||
|
constexpr static Array<unsigned char, 32> str_to_value_t(const char (&x)[N])
|
||||||
|
{
|
||||||
|
Array<unsigned char, 32> value { 0 };
|
||||||
|
for (size_t i = 0; i < N; ++i)
|
||||||
|
value[i] = x[i];
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Argument {
|
||||||
|
size_t size;
|
||||||
|
void* data;
|
||||||
|
};
|
||||||
|
|
||||||
|
static Array<u8, 32> arg_to_value_t(const Argument& arg)
|
||||||
|
{
|
||||||
|
if (arg.size == 1)
|
||||||
|
return to_value_t(*(u8*)arg.data);
|
||||||
|
|
||||||
|
if (arg.size == 2)
|
||||||
|
return to_value_t(*(u16*)arg.data);
|
||||||
|
|
||||||
|
if (arg.size == 4)
|
||||||
|
return to_value_t(*(u32*)arg.data);
|
||||||
|
|
||||||
|
if (arg.size == 8)
|
||||||
|
return to_value_t(*(u64*)arg.data);
|
||||||
|
|
||||||
|
if (arg.size == 16) {
|
||||||
|
auto& data = *(charstar*)arg.data;
|
||||||
|
Array<unsigned char, 32> value { 0 };
|
||||||
|
for (size_t i = 0; i < 16; ++i)
|
||||||
|
value[i] = data[i];
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (arg.size == 32) {
|
||||||
|
auto& data = *(charstar*)arg.data;
|
||||||
|
auto length = strlen(data);
|
||||||
|
Array<unsigned char, 32> value { 0 };
|
||||||
|
for (size_t i = 0; i < length; ++i)
|
||||||
|
value[i] = data[i];
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT_NOT_REACHED();
|
||||||
|
}
|
||||||
|
|
||||||
|
#define DECL_WITH_TYPE(ty) \
|
||||||
|
ty _##ty##arg0; \
|
||||||
|
ty _##ty##arg1; \
|
||||||
|
ty _##ty##arg2; \
|
||||||
|
Argument ty##arg0 { sizeof(ty), &_##ty##arg0 }; \
|
||||||
|
Argument ty##arg1 { sizeof(ty), &_##ty##arg1 }; \
|
||||||
|
Argument ty##arg2 { sizeof(ty), &_##ty##arg2 };
|
||||||
|
|
||||||
|
DECL_WITH_TYPE(int);
|
||||||
|
DECL_WITH_TYPE(unsigned);
|
||||||
|
DECL_WITH_TYPE(long);
|
||||||
|
DECL_WITH_TYPE(longlong);
|
||||||
|
DECL_WITH_TYPE(float);
|
||||||
|
DECL_WITH_TYPE(double);
|
||||||
|
DECL_WITH_TYPE(longdouble);
|
||||||
|
|
||||||
|
#undef DECL_WITH_TYPE
|
||||||
|
|
||||||
|
charstar _charstararg0;
|
||||||
|
charstar _charstararg1;
|
||||||
|
charstar _charstararg2;
|
||||||
|
Argument charstararg0 { sizeof(charstar), &_charstararg0[0] };
|
||||||
|
Argument charstararg1 { sizeof(charstar), &_charstararg1[0] };
|
||||||
|
Argument charstararg2 { sizeof(charstar), &_charstararg2[0] };
|
||||||
|
|
||||||
|
struct TestSuite {
|
||||||
|
const char* format;
|
||||||
|
const char* input;
|
||||||
|
int expected_output;
|
||||||
|
size_t argument_count;
|
||||||
|
Argument arguments[8];
|
||||||
|
Array<unsigned char, 32> expected_values[8]; // 32 bytes for each argument's value.
|
||||||
|
};
|
||||||
|
|
||||||
|
const TestSuite test_suites[] {
|
||||||
|
{ "%d", "", 0, 0, {}, {} },
|
||||||
|
{ "%x", "0x519", 1, 1, { unsignedarg0 }, { to_value_t(0x519) } },
|
||||||
|
{ "%x", "0x51g", 1, 1, { unsignedarg0 }, { to_value_t(0x51u) } },
|
||||||
|
{ "\"%%%d#", "\"%42#", 1, 1, { intarg0 }, { to_value_t(42) } },
|
||||||
|
{ " %d", "42", 1, 1, { intarg0 }, { to_value_t(42) } },
|
||||||
|
{ "%d", " 42", 1, 1, { intarg0 }, { to_value_t(42) } },
|
||||||
|
{ "%ld", "42", 1, 1, { longarg0 }, { to_value_t(42l) } },
|
||||||
|
{ "%lld", "42", 1, 1, { longlongarg0 }, { to_value_t(42ll) } },
|
||||||
|
{ "%f", "42", 1, 1, { floatarg0 }, { to_value_t(42.0f) } },
|
||||||
|
{ "%lf", "42", 1, 1, { doublearg0 }, { to_value_t(42.0) } },
|
||||||
|
{ "%s", "42", 1, 1, { charstararg0 }, { str_to_value_t("42") } },
|
||||||
|
{ "%d%s", "42yoinks", 2, 2, { intarg0, charstararg0 }, { to_value_t(42), str_to_value_t("yoinks") } },
|
||||||
|
{ "%[^\n]", "aaaa\n", 1, 1, { charstararg0 }, { str_to_value_t("aaaa") } },
|
||||||
|
{ "%u.%u.%u", "3.19", 2, 3, { unsignedarg0, unsignedarg1, unsignedarg2 }, { to_value_t(3u), to_value_t(19u) } },
|
||||||
|
// Failing test case from previous impl:
|
||||||
|
{ "SSH-%d.%d-%[^\n]\n", "SSH-2.0-OpenSSH_8.2p1 Ubuntu-4ubuntu0.1\n", 3, 3, { intarg0, intarg1, charstararg0 }, { to_value_t(2), to_value_t(0), str_to_value_t("OpenSSH_8.2p1 Ubuntu-4ubuntu0.1") } },
|
||||||
|
};
|
||||||
|
|
||||||
|
bool g_any_failed = false;
|
||||||
|
|
||||||
|
static bool check_value_conformance(const TestSuite& test)
|
||||||
|
{
|
||||||
|
bool fail = false;
|
||||||
|
for (int i = 0; i < test.expected_output; ++i) {
|
||||||
|
auto& arg = test.arguments[i];
|
||||||
|
auto arg_value = arg_to_value_t(arg);
|
||||||
|
auto& value = test.expected_values[i];
|
||||||
|
if (arg_value != value) {
|
||||||
|
auto arg_ptr = (const u32*)arg_value.data();
|
||||||
|
auto value_ptr = (const u32*)value.data();
|
||||||
|
printf(" value %d FAIL, expected %04x%04x%04x%04x%04x%04x%04x%04x but got %04x%04x%04x%04x%04x%04x%04x%04x\n",
|
||||||
|
i,
|
||||||
|
value_ptr[0], value_ptr[1], value_ptr[2], value_ptr[3],
|
||||||
|
value_ptr[4], value_ptr[5], value_ptr[6], value_ptr[7],
|
||||||
|
arg_ptr[0], arg_ptr[1], arg_ptr[2], arg_ptr[3],
|
||||||
|
arg_ptr[4], arg_ptr[5], arg_ptr[6], arg_ptr[7]);
|
||||||
|
fail = true;
|
||||||
|
} else {
|
||||||
|
printf(" value %d PASS\n", i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return !fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void do_one_test(const TestSuite& test)
|
||||||
|
{
|
||||||
|
printf("Testing '%s' against '%s'...\n", test.input, test.format);
|
||||||
|
|
||||||
|
#pragma GCC diagnostic push
|
||||||
|
#pragma GCC diagnostic ignored "-Wformat-nonliteral"
|
||||||
|
auto rc = sscanf(test.input, test.format,
|
||||||
|
test.arguments[0].data, test.arguments[1].data, test.arguments[2].data, test.arguments[3].data,
|
||||||
|
test.arguments[4].data, test.arguments[5].data, test.arguments[6].data, test.arguments[7].data);
|
||||||
|
#pragma GCC diagnostic pop
|
||||||
|
|
||||||
|
bool overall = true;
|
||||||
|
printf(" output value...\n");
|
||||||
|
if (rc != test.expected_output) {
|
||||||
|
printf(" output value FAIL, expected %d but got %d\n", test.expected_output, rc);
|
||||||
|
overall = false;
|
||||||
|
} else {
|
||||||
|
printf(" output value PASS\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
printf(" read values...\n");
|
||||||
|
if (check_value_conformance(test)) {
|
||||||
|
printf(" read values PASS\n");
|
||||||
|
} else {
|
||||||
|
printf(" read values FAIL\n");
|
||||||
|
overall = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (overall)
|
||||||
|
printf(" overall PASS\n");
|
||||||
|
else
|
||||||
|
printf(" overall FAIL\n");
|
||||||
|
|
||||||
|
g_any_failed = g_any_failed || !overall;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
for (auto& test : test_suites)
|
||||||
|
do_one_test(test);
|
||||||
|
|
||||||
|
return g_any_failed ? 1 : 0;
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue