Name:
Anonymous
2008-01-16 5:32
Wasn't satisfied with shitty PDF "recovery" programs, so I wrote my own set. Here's the shortest one.
/* scan a PDF file for all objects
[0-9]+ +[0-9]+ +obj
+ output xref table in proper format
*/
#include <stdio.h>
#include <windows.h>
#include <string.h>
#define isdigit _isdigit
void __cdecl __getmainargs(int *, char ***, char ***, int, int *);
__inline char _isdigit(c) {
return (c>='0')&&(c<='9');
}
int objtotal=0;
struct _objlist {
int offset;
int num;
int gen;
struct _objlist *next;
} *objlist;
add_to_list(int offset, int num, int gen) {
struct _objlist *curnode = objlist;
while((curnode->next)&& curnode->next->num < num)
curnode = curnode->next;
if(curnode->num == num) {
printf("Warning: Found duplicate object #%d\n",num);
curnode->offset = offset;
curnode->gen = gen;
return;
} else {
struct _objlist *newlist = malloc(sizeof(struct _objlist));
if(!newlist) {
printf("Out of memory!\n");
exit(-1);
}
newlist->offset = offset;
newlist->num = num;
newlist->gen = gen;
newlist->next = curnode->next;
curnode->next = newlist;
}
}
init_list() {
objlist = malloc(sizeof(struct _objlist));
if(!objlist) {
printf("Out of memory!\n");
exit(-1);
}
objlist -> num = 0;
objlist -> offset = 0;
objlist -> gen = 65535;
objlist -> next = 0;
}
dump_list() {
struct _objlist *nodeptr = objlist;
int prevobj=-1;
while(nodeptr->next)
nodeptr = nodeptr -> next;
printf("xref\x0d\0120 %d\x0d\x0a",nodeptr->num +1);
nodeptr = objlist;
do {
int gap = nodeptr->num - prevobj;
printf("%010d %05d %c\x0d\x0a",nodeptr->offset,nodeptr->gen,(nodeptr->num)?'n':'f');
prevobj = nodeptr->num;
while(--gap) {
printf("%010d %05d f\x0d\x0a",0,0);
}
} while(nodeptr = nodeptr->next);
}
free_list() {
struct _objlist *nextptr = objlist;
do {
nextptr = objlist->next;
free(objlist);
} while(objlist = nextptr);
}
char iswhs(char c) {
return (c==0)||(c==9)||(c==10)||(c==12)||(c==13)||(c==32);
}
char isd(char c) {
return (c=='(')||(c==')')||(c=='<')||(c=='>')||(c=='[')||(c==']')||
(c=='{')||(c=='}')||(c=='/')||(c=='%');
}
char iswhd(char c) {
return iswhs(c)||isd(c);
}
scan_pdf_objects(char *filedata,int fsize) {
int i=0;
int obj_start,obj_num,obj_cnt;
state_1:
while(i<fsize) {
if(isdigit(filedata[i++])) {
obj_start=i-1;
obj_num=filedata[i-1]-'0';
goto state_2;
}
}
return;
state_2:
while(i<fsize) {
if(iswhs(filedata[i++])) goto state_3;
if(!isdigit(filedata[i-1])) goto state_1;
obj_num = obj_num*10 + filedata[i-1]-'0';
}
return;
state_3:
while(i<fsize) {
if(isdigit(filedata[i++])) {
obj_cnt = filedata[i-1]-'0';
goto state_4;
}
if(!iswhs(filedata[i-1])) goto state_1;
}
return;
state_4:
while(i<fsize) {
if(iswhs(filedata[i++])) goto state_5;
if(!isdigit(filedata[i-1])) goto state_1;
obj_cnt = obj_cnt*10 + filedata[i-1]-'0';
}
return;
state_5:
while(i<fsize) {
if(filedata[i++]=='o') goto state_6;
if(!iswhs(filedata[i-1])) goto state_1;
}
return;
state_6:
if(i<fsize) {
if(filedata[i++]=='b') goto state_7;
goto state_1;
}
return;
state_7:
if(i<fsize) {
if(filedata[i++]=='j') goto state_8;
goto state_1;
}
return;
state_8:
if(i<fsize) {
if(iswhd(filedata[i++])) {
printf("%010d %05d %05d\n",obj_start,obj_num,obj_cnt);
add_to_list(obj_start,obj_num,obj_cnt);
objtotal++;
goto state_9;
}
goto state_1;
}
return;
state_9:
while(i<fsize) {
if(filedata[i++]=='e') goto state_10;
}
return;
state_10:
if(i<fsize) {
if(filedata[i++]=='n') goto state_11;
goto state_9;
}
return;
state_11:
if(i<fsize) {
if(filedata[i++]=='d') goto state_12;
goto state_9;
}
return;
state_12:
if(i<fsize) {
if(filedata[i++]=='o') goto state_13;
goto state_9;
}
return;
state_13:
if(i<fsize) {
if(filedata[i++]=='b') goto state_14;
goto state_9;
}
return;
state_14:
if(i<fsize) {
if(filedata[i++]=='j') goto state_15;
goto state_9;
}
return;
state_15:
if(i<fsize) {
if(iswhs(filedata[i++])) goto state_1;
goto state_9;
}
return;
}
main() {
int argc;
char **argv;
int fhandle,fsize;
char *filedata;
__getmainargs(&argc,&argv,&fhandle,0,&fsize);
if(argc!=2) {
printf("Usage: objscan filename");
exit(1);
}
if((fhandle=CreateFile(argv[1],GENERIC_READ,FILE_SHARE_READ,0,OPEN_EXISTING,0,0))==-1) {
printf("Error opening file %s",argv[1]);
exit(2);
}
if(!(filedata=malloc(fsize=GetFileSize(fhandle,0)))) {
printf("Out of memory - could not allocate %s bytes",fsize);
free(filedata);
exit(3);
}
if(!ReadFile(fhandle,filedata,fsize,&fsize,0)) {
printf("Error reading file.");
exit(4);
}
CloseHandle(fhandle);
init_list();
scan_pdf_objects(filedata,fsize);
dump_list();
free_list();
printf("%d total objects found\n",objtotal);
free(filedata);
exit(0);
}