vfwscanf.c raw

   1  #include <stdio.h>
   2  #include <stdlib.h>
   3  #include <stdarg.h>
   4  #include <ctype.h>
   5  #include <wchar.h>
   6  #include <wctype.h>
   7  #include <limits.h>
   8  #include <string.h>
   9  
  10  #include "stdio_impl.h"
  11  #include "shgetc.h"
  12  #include "intscan.h"
  13  #include "floatscan.h"
  14  
  15  #define SIZE_hh -2
  16  #define SIZE_h  -1
  17  #define SIZE_def 0
  18  #define SIZE_l   1
  19  #define SIZE_L   2
  20  #define SIZE_ll  3
  21  
  22  static void store_int(void *dest, int size, unsigned long long i)
  23  {
  24  	if (!dest) return;
  25  	switch (size) {
  26  	case SIZE_hh:
  27  		*(char *)dest = i;
  28  		break;
  29  	case SIZE_h:
  30  		*(short *)dest = i;
  31  		break;
  32  	case SIZE_def:
  33  		*(int *)dest = i;
  34  		break;
  35  	case SIZE_l:
  36  		*(long *)dest = i;
  37  		break;
  38  	case SIZE_ll:
  39  		*(long long *)dest = i;
  40  		break;
  41  	}
  42  }
  43  
  44  static void *arg_n(va_list ap, unsigned int n)
  45  {
  46  	void *p;
  47  	unsigned int i;
  48  	va_list ap2;
  49  	va_copy(ap2, ap);
  50  	for (i=n; i>1; i--) va_arg(ap2, void *);
  51  	p = va_arg(ap2, void *);
  52  	va_end(ap2);
  53  	return p;
  54  }
  55  
  56  static int in_set(const wchar_t *set, int c)
  57  {
  58  	int j;
  59  	const wchar_t *p = set;
  60  	if (*p == '-') {
  61  		if (c=='-') return 1;
  62  		p++;
  63  	} else if (*p == ']') {
  64  		if (c==']') return 1;
  65  		p++;
  66  	}
  67  	for (; *p && *p != ']'; p++) {
  68  		if (*p=='-' && p[1] && p[1] != ']')
  69  			for (j=p++[-1]; j<*p; j++)
  70  				if (c==j) return 1;
  71  		if (c==*p) return 1;
  72  	}
  73  	return 0;
  74  }
  75  
  76  #if 1
  77  #undef getwc
  78  #define getwc(f) \
  79  	((f)->rpos != (f)->rend && *(f)->rpos < 128 ? *(f)->rpos++ : (getwc)(f))
  80  
  81  #undef ungetwc
  82  #define ungetwc(c,f) \
  83  	((f)->rend && (c)<128U ? *--(f)->rpos : ungetwc((c),(f)))
  84  #endif
  85  
  86  int vfwscanf(FILE *restrict f, const wchar_t *restrict fmt, va_list ap)
  87  {
  88  	int width;
  89  	int size;
  90  	int alloc;
  91  	const wchar_t *p;
  92  	int c, t;
  93  	char *s;
  94  	wchar_t *wcs;
  95  	void *dest=NULL;
  96  	int invert;
  97  	int matches=0;
  98  	off_t pos = 0, cnt;
  99  	static const char size_pfx[][3] = { "hh", "h", "", "l", "L", "ll" };
 100  	char tmp[3*sizeof(int)+10];
 101  	const wchar_t *set;
 102  	size_t i, k;
 103  
 104  	FLOCK(f);
 105  
 106  	fwide(f, 1);
 107  
 108  	for (p=fmt; *p; p++) {
 109  
 110  		alloc = 0;
 111  
 112  		if (iswspace(*p)) {
 113  			while (iswspace(p[1])) p++;
 114  			while (iswspace((c=getwc(f)))) pos++;
 115  			ungetwc(c, f);
 116  			continue;
 117  		}
 118  		if (*p != '%' || p[1] == '%') {
 119  			if (*p == '%') {
 120  				p++;
 121  				while (iswspace((c=getwc(f)))) pos++;
 122  			} else {
 123  				c = getwc(f);
 124  			}
 125  			if (c!=*p) {
 126  				ungetwc(c, f);
 127  				if (c<0) goto input_fail;
 128  				goto match_fail;
 129  			}
 130  			pos++;
 131  			continue;
 132  		}
 133  
 134  		p++;
 135  		if (*p=='*') {
 136  			dest = 0; p++;
 137  		} else if (iswdigit(*p) && p[1]=='$') {
 138  			dest = arg_n(ap, *p-'0'); p+=2;
 139  		} else {
 140  			dest = va_arg(ap, void *);
 141  		}
 142  
 143  		for (width=0; iswdigit(*p); p++) {
 144  			width = 10*width + *p - '0';
 145  		}
 146  
 147  		if (*p=='m') {
 148  			wcs = 0;
 149  			s = 0;
 150  			alloc = !!dest;
 151  			p++;
 152  		} else {
 153  			alloc = 0;
 154  		}
 155  
 156  		size = SIZE_def;
 157  		switch (*p++) {
 158  		case 'h':
 159  			if (*p == 'h') p++, size = SIZE_hh;
 160  			else size = SIZE_h;
 161  			break;
 162  		case 'l':
 163  			if (*p == 'l') p++, size = SIZE_ll;
 164  			else size = SIZE_l;
 165  			break;
 166  		case 'j':
 167  			size = SIZE_ll;
 168  			break;
 169  		case 'z':
 170  		case 't':
 171  			size = SIZE_l;
 172  			break;
 173  		case 'L':
 174  			size = SIZE_L;
 175  			break;
 176  		case 'd': case 'i': case 'o': case 'u': case 'x':
 177  		case 'a': case 'e': case 'f': case 'g':
 178  		case 'A': case 'E': case 'F': case 'G': case 'X':
 179  		case 's': case 'c': case '[':
 180  		case 'S': case 'C':
 181  		case 'p': case 'n':
 182  			p--;
 183  			break;
 184  		default:
 185  			goto fmt_fail;
 186  		}
 187  
 188  		t = *p;
 189  
 190  		/* Transform S,C -> ls,lc */
 191  		if ((t&0x2f)==3) {
 192  			size = SIZE_l;
 193  			t |= 32;
 194  		}
 195  
 196  		if (t != 'n') {
 197  			if (t != '[' && (t|32) != 'c')
 198  				while (iswspace((c=getwc(f)))) pos++;
 199  			else
 200  				c=getwc(f);
 201  			if (c < 0) goto input_fail;
 202  			ungetwc(c, f);
 203  		}
 204  
 205  		switch (t) {
 206  		case 'n':
 207  			store_int(dest, size, pos);
 208  			/* do not increment match count, etc! */
 209  			continue;
 210  
 211  		case 's':
 212  		case 'c':
 213  		case '[':
 214  			if (t == 'c') {
 215  				if (width<1) width = 1;
 216  				invert = 1;
 217  				set = L"";
 218  			} else if (t == 's') {
 219  				invert = 1;
 220  				static const wchar_t spaces[] = {
 221  					' ', '\t', '\n', '\r', 11, 12,  0x0085,
 222  					0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
 223  					0x2006, 0x2008, 0x2009, 0x200a,
 224  					0x2028, 0x2029, 0x205f, 0x3000, 0 };
 225  				set = spaces;
 226  			} else {
 227  				if (*++p == '^') p++, invert = 1;
 228  				else invert = 0;
 229  				set = p;
 230  				if (*p==']') p++;
 231  				while (*p!=']') {
 232  					if (!*p) goto fmt_fail;
 233  					p++;
 234  				}
 235  			}
 236  
 237  			s = (size == SIZE_def) ? dest : 0;
 238  			wcs = (size == SIZE_l) ? dest : 0;
 239  
 240  			int gotmatch = 0;
 241  
 242  			if (width < 1) width = -1;
 243  
 244  			i = 0;
 245  			if (alloc) {
 246  				k = t=='c' ? width+1U : 31;
 247  				if (size == SIZE_l) {
 248  					wcs = malloc(k*sizeof(wchar_t));
 249  					if (!wcs) goto alloc_fail;
 250  				} else {
 251  					s = malloc(k);
 252  					if (!s) goto alloc_fail;
 253  				}
 254  			}
 255  			while (width) {
 256  				if ((c=getwc(f))<0) break;
 257  				if (in_set(set, c) == invert)
 258  					break;
 259  				if (wcs) {
 260  					wcs[i++] = c;
 261  					if (alloc && i==k) {
 262  						k += k+1;
 263  						wchar_t *tmp = realloc(wcs, k*sizeof(wchar_t));
 264  						if (!tmp) goto alloc_fail;
 265  						wcs = tmp;
 266  					}
 267  				} else if (size != SIZE_l) {
 268  					int l = wctomb(s?s+i:tmp, c);
 269  					if (l<0) goto input_fail;
 270  					i += l;
 271  					if (alloc && i > k-4) {
 272  						k += k+1;
 273  						char *tmp = realloc(s, k);
 274  						if (!tmp) goto alloc_fail;
 275  						s = tmp;
 276  					}
 277  				}
 278  				pos++;
 279  				width-=(width>0);
 280  				gotmatch=1;
 281  			}
 282  			if (width) {
 283  				ungetwc(c, f);
 284  				if (t == 'c' || !gotmatch) goto match_fail;
 285  			}
 286  
 287  			if (alloc) {
 288  				if (size == SIZE_l) *(wchar_t **)dest = wcs;
 289  				else *(char **)dest = s;
 290  			}
 291  			if (t != 'c') {
 292  				if (wcs) wcs[i] = 0;
 293  				if (s) s[i] = 0;
 294  			}
 295  			break;
 296  
 297  		case 'd': case 'i': case 'o': case 'u': case 'x':
 298  		case 'a': case 'e': case 'f': case 'g':
 299  		case 'A': case 'E': case 'F': case 'G': case 'X':
 300  		case 'p':
 301  			if (width < 1) width = 0;
 302  			snprintf(tmp, sizeof tmp, "%.*s%.0d%s%c%%lln",
 303  				1+!dest, "%*", width, size_pfx[size+2], t);
 304  			cnt = 0;
 305  			if (fscanf(f, tmp, dest?dest:&cnt, &cnt) == -1)
 306  				goto input_fail;
 307  			else if (!cnt)
 308  				goto match_fail;
 309  			pos += cnt;
 310  			break;
 311  		default:
 312  			goto fmt_fail;
 313  		}
 314  
 315  		if (dest) matches++;
 316  	}
 317  	if (0) {
 318  fmt_fail:
 319  alloc_fail:
 320  input_fail:
 321  		if (!matches) matches--;
 322  match_fail:
 323  		if (alloc) {
 324  			free(s);
 325  			free(wcs);
 326  		}
 327  	}
 328  	FUNLOCK(f);
 329  	return matches;
 330  }
 331  
 332  weak_alias(vfwscanf,__isoc99_vfwscanf);
 333