view src/string.h @ 6211:28d9692eb682 draft

(svn r9003) -Codechange: Introduce a function Utf8PrevCharLen that finds the starting character of an UTF-8 sequence from a given position and returns the length to the first UTF-8 encoding byte of that sequence.
author Darkvater <Darkvater@openttd.org>
date Sun, 04 Mar 2007 00:49:40 +0000
parents f2a1b973c9b9
children 0716cb7258dd
line wrap: on
line source

/* $Id$ */

#ifndef STRING_H
#define STRING_H

#include "macros.h"

/*
 * dst: destination buffer
 * src: string to copy/concatenate
 * size: size of the destination buffer
 * usage: ttd_strlcpy(dst, src, lengthof(dst));
 */
void ttd_strlcat(char *dst, const char *src, size_t size);
void ttd_strlcpy(char *dst, const char *src, size_t size);

/*
 * dst: destination buffer
 * src: string to copy
 * last: pointer to the last element in the dst array
 *       if NULL no boundary check is performed
 * returns a pointer to the terminating \0 in the destination buffer
 * usage: strecpy(dst, src, lastof(dst));
 */
char* strecat(char* dst, const char* src, const char* last);
char* strecpy(char* dst, const char* src, const char* last);

char* CDECL str_fmt(const char* str, ...);

/** Scans the string for valid characters and if it finds invalid ones,
 * replaces them with a question mark '?' */
void str_validate(char *str);

/** Scans the string for colour codes and strips them */
void str_strip_colours(char *str);

/**
 * Valid filter types for IsValidChar.
 */
typedef enum CharSetFilter {
	CS_ALPHANUMERAL,      //! Both numeric and alphabetic and spaces and stuff
	CS_NUMERAL,           //! Only numeric ones
	CS_ALPHA,             //! Only alphabetic values
} CharSetFilter;

/** Convert the given string to lowercase, only works with ASCII! */
void strtolower(char *str);


static inline bool StrEmpty(const char* s) { return s[0] == '\0'; }


/** Get the length of a string, within a limited buffer */
static inline int ttd_strnlen(const char *str, int maxlen)
{
	const char *t;
	for (t = str; *t != '\0' && t - str < maxlen; t++);
	return t - str;
}

/** Convert the md5sum number to a 'hexadecimal' string, return next pos in buffer */
char *md5sumToString(char *buf, const char *last, const uint8 md5sum[16]);

typedef uint32 WChar;

/**
 * Only allow certain keys. You can define the filter to be used. This makes
 *  sure no invalid keys can get into an editbox, like BELL.
 * @param key character to be checked
 * @param afilter the filter to use
 * @return true or false depending if the character is printable/valid or not
 */
bool IsValidChar(WChar key, CharSetFilter afilter);

size_t Utf8Decode(WChar *c, const char *s);
size_t Utf8Encode(char *buf, WChar c);


static inline WChar Utf8Consume(const char **s)
{
	WChar c;
	*s += Utf8Decode(&c, *s);
	return c;
}


/** Return the length of a UTF-8 encoded character.
 * @param c Unicode character.
 * @return Length of UTF-8 encoding for character.
 */
static inline size_t Utf8CharLen(WChar c)
{
	if (c < 0x80)       return 1;
	if (c < 0x800)      return 2;
	if (c < 0x10000)    return 3;
	if (c < 0x110000)   return 4;

	/* Invalid valid, we encode as a '?' */
	return 1;
}


/* Check if the given character is part of a UTF8 sequence */
static inline bool IsUtf8Part(char c)
{
	return GB(c, 6, 2) == 2;
}

/**
 * Retrieve the (partial) length of the previous UNICODE character
 * in an UTF-8 encoded string.
 * @param s char pointer pointing to the first char of the next character
 * @returns the decoded length in bytes (size) of the UNICODE character
 * that was just before the one where 's' is pointing to
 * @note If 's' is not pointing to the first byte of the next UNICODE character
 * only a partial length of the sequence will be returned.
 * For example given this sequence: 0xE3 0x85 0x80, 0xE3 0x81 0x9E
 * 1. 's' is pointing to the second 0xE3, return value is 3
 * 2. 's' is pointing to 0x80, return value is 2.
 * So take care with the return values of this function. To get the real length
 * for an (invalid) sequence, pass the string offset of this function's return
 * value to Utf8EncodedCharLen() or Utf8Decode()
 */
static inline size_t Utf8PrevCharLen(const char *s)
{
	size_t len = 1;
	while (IsUtf8Part(*--s)) len++;
	return len;
}


static inline bool IsPrintable(WChar c)
{
	if (c < 0x20)   return false;
	if (c < 0xE000) return true;
	if (c < 0xE200) return false;
	return true;
}


#endif /* STRING_H */