You just wanted to brag about being able to use switch instead of if-then-else? Well believe me, it's nothing special. I'm in second year and we're studying design patterns.
>>8
oh, i was going to write something else but then changed my mind, deleted some text and wrote this - i guess i deleted too much. i'm very sorry, i had a weary day.
Writing some code to write code for me:
(defparameter *character->html-encoding-table*
'((#\< "<")
(#\> ">")
;; insert other cases here, or write something clever
;; to generate them from a source file (& and ; parts)
))
(defun html-escape-string (string)
(apply #'concatenate 'string
(map 'list
#'(lambda (x)
;; or I could use something like KMP's meta, but whatever.
#.`(case x
,@*character->html-encoding-table*
(otherwise (string x))))
string)))
;; Test
;CL-USER> (html-escape-string "abc><")
;"abc><"
Name:
Anonymous2010-01-08 11:37
only escape stuff like < and > and utf8 the rest. ü etc. is obsolete
PHPAPI char *php_escape_html_entities_ex(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset, zend_bool double_encode TSRMLS_DC)
{
int i, j, maxlen, len;
char *replaced;
enum entity_charset charset = determine_charset(hint_charset TSRMLS_CC);
int matches_map;
maxlen = 2 * oldlen;
if (maxlen < 128)
maxlen = 128;
replaced = emalloc (maxlen);
len = 0;
i = 0;
while (i < oldlen) {
unsigned char mbsequence[16]; /* allow up to 15 characters in a multibyte sequence */
int mbseqlen = sizeof(mbsequence);
int status = SUCCESS;
unsigned int this_char = get_next_char(charset, old, oldlen, &i, mbsequence, &mbseqlen, &status);
if (all) {
/* look for a match in the maps for this charset */
unsigned char *rep = NULL;
for (j = 0; entity_map[j].charset != cs_terminator; j++) {
if (entity_map[j].charset == charset
&& this_char >= entity_map[j].basechar
&& this_char <= entity_map[j].endchar) {
rep = (unsigned char*)entity_map[j].table[this_char - entity_map[j].basechar];
if (rep == NULL) {
/* there is no entity for this position; fall through and
* just output the character itself */
break;
}
matches_map = 1;
break;
}
}
if (matches_map) {
int l = strlen(rep);
/* increase the buffer size */
if (len + 2 + l >= maxlen) {
replaced = erealloc(replaced, maxlen += 128);
}
replaced[len++] = '&';
strlcpy(replaced + len, rep, maxlen);
len += l;
replaced[len++] = ';';
}
}
if (!matches_map) {
int is_basic = 0;
if (this_char == '&') {
if (double_encode) {
encode_amp:
memcpy(replaced + len, "&", sizeof("&") - 1);
len += sizeof("&") - 1;
} else {
char *e = memchr(old + i, ';', oldlen - i);
char *s = old + i;
if (!e || (e - s) > 10) { /* minor optimization to avoid "entities" over 10 chars in length */
goto encode_amp;
} else {
if (*s == '#') { /* numeric entities */
s++;
/* Hex (Z) */
if (*s == 'x' || *s == 'X') {
s++;
while (s < e) {
if (!isxdigit((int)*(unsigned char *)s++)) {
goto encode_amp;
}
}
/* Dec (Z)*/
} else {
while (s < e) {
if (!isdigit((int)*(unsigned char *)s++)) {
goto encode_amp;
}
}
}
} else { /* text entities */
while (s < e) {
if (!isalnum((int)*(unsigned char *)s++)) {
goto encode_amp;
}
}
}
replaced[len++] = '&';
}
}
is_basic = 1;
} else {
for (j = 0; basic_entities[j].charcode != 0; j++) {
if ((basic_entities[j].charcode != this_char) ||
(basic_entities[j].flags &&
(quote_style & basic_entities[j].flags) == 0)) {
continue;
}
memcpy(replaced + len, basic_entities[j].entity, basic_entities[j].entitylen);
len += basic_entities[j].entitylen;
is_basic = 1;
break;
}
}
if (!is_basic) {
/* a wide char without a named entity; pass through the original sequence */
if (mbseqlen > 1) {
memcpy(replaced + len, mbsequence, mbseqlen);
len += mbseqlen;
} else {
replaced[len++] = (unsigned char)this_char;
}
}
}
}
replaced[len] = '\0';
*newlen = len;
return replaced;
}
/* {{{ entity_charset determine_charset
* returns the charset identifier based on current locale or a hint.
* defaults to iso-8859-1 */
static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC)
{
int i;
enum entity_charset charset = cs_8859_1;
int len = 0;
zval *uf_result = NULL;
/* Guarantee default behaviour for backwards compatibility */
if (charset_hint == NULL)
return cs_8859_1;
if ((len = strlen(charset_hint)) != 0) {
goto det_charset;
}
#if HAVE_MBSTRING
#if !defined(COMPILE_DL_MBSTRING)
/* XXX: Ugly things. Why don't we look for a more sophisticated way? */
switch (MBSTRG(current_internal_encoding)) {
case mbfl_no_encoding_8859_1:
return cs_8859_1;
case mbfl_no_encoding_utf8:
return cs_utf_8;
case mbfl_no_encoding_euc_jp:
case mbfl_no_encoding_eucjp_win:
return cs_eucjp;
case mbfl_no_encoding_sjis:
case mbfl_no_encoding_sjis_win:
case mbfl_no_encoding_sjis_mac:
return cs_sjis;
case mbfl_no_encoding_cp1252:
return cs_cp1252;
case mbfl_no_encoding_8859_15:
return cs_8859_15;
case mbfl_no_encoding_big5:
return cs_big5;
case mbfl_no_encoding_euc_cn:
case mbfl_no_encoding_hz:
case mbfl_no_encoding_cp936:
return cs_gb2312;
dot = strchr(localename, '.');
if (dot) {
dot++;
/* locale specifies a codeset */
at = strchr(dot, '@');
if (at)
len = at - dot;
else
len = strlen(dot);
charset_hint = dot;
} else {
/* no explicit name; see if the name itself
* is the charset */
charset_hint = localename;
len = strlen(charset_hint);
}
}
#endif
det_charset:
if (charset_hint) {
int found = 0;
/* now walk the charset map and look for the codeset */
for (i = 0; charset_map[i].codeset; i++) {
if (len == strlen(charset_map[i].codeset) && strncasecmp(charset_hint, charset_map[i].codeset, len) == 0) {
charset = charset_map[i].charset;
found = 1;
break;
}
}
if (!found) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "charset `%s' not supported, assuming iso-8859-1",
charset_hint);
}
}
if (uf_result != NULL) {
zval_ptr_dtor(&uf_result);
}
return charset;
}
/* }}} */