Commit 552ea368 authored by Tomáš Stefan's avatar Tomáš Stefan

Add parsing cross-reference table

for now without tests
parent 137cdbb6
#ifndef PDF_SIGIL_AUXILIARY_H
#define PDF_SIGIL_AUXILIARY_H
#include <stdlib.h>
#include <stdio.h>
#ifndef CHAR_T
#define CHAR_T
typedef char char_t;
typedef char char_t;
#endif /* CHAR_T */
......@@ -20,9 +20,11 @@ typedef char char_t;
void sigil_zeroize(void *a, size_t bytes);
int is_digit(const char_t c);
int is_whitespace(const char_t c);
int parse_number(FILE *in, size_t *number);
int parse_free_indicator(FILE *in, char_t *result);
void print_module_name(const char *module_name, int verbosity);
void print_module_result(int result, int verbosity);
void print_test_item(const char *test_name, int verbosity);
......
......@@ -8,6 +8,9 @@
// maximum bytes to read from the end of file to look for the "startxref"
#define XREF_SEARCH_OFFSET 1024
// capacity to choose for the first xref allocation
#define XREF_PREALLOCATION 10
// validate values
int sigil_config_self_test(int verbosity);
......
......@@ -5,7 +5,7 @@
#ifndef CHAR_T
#define CHAR_T
typedef char char_t;
typedef char char_t;
#endif /* CHAR_T */
......@@ -14,7 +14,7 @@ typedef char char_t;
#define ERR_PARAM 0x0002 // [_|_|_|x] 0000 0010
#define ERR_IO 0x0004 // [_|_|_|x] 0000 0100
#define ERR_PDF_CONT 0x0008 // [_|_|_|x] 0000 1000
#define ERR_5 0x0010 // [_|_|_|x] 0001 0000
#define ERR_NOT_IMPL 0x0010 // [_|_|_|x] 0001 0000
#define ERR_6 0x0020 // [_|_|_|x] 0010 0000
#define ERR_7 0x0040 // [_|_|_|x] 0100 0000
#define ERR_8 0x0080 // [_|_|_|x] 1000 0000
......
......@@ -6,9 +6,22 @@
#ifndef CHAR_T
#define CHAR_T
typedef char char_t;
typedef char char_t;
#endif /* CHAR_T */
#ifndef XREF_T
#define XREF_T
typedef struct {
size_t byte_offset;
size_t generation_num;
} xref_entry_t;
typedef struct {
xref_entry_t **entry;
size_t capacity;
} xref_t;
#endif /* XREF_T */
#define XREF_TYPE_UNSET 0
#define XREF_TYPE_TABLE 1
#define XREF_TYPE_STREAM 2
......@@ -18,6 +31,7 @@ typedef char char_t;
#define MODE_SIGN 2
typedef uint32_t mode_t;
struct xref_t;
typedef struct {
FILE *file;
......@@ -26,6 +40,7 @@ typedef struct {
short pdf_x, /* numbers from PDF header */
pdf_y; /* %PDF-<pdf_x>.<pdf_y> */
short xref_type;
xref_t *xref;
size_t file_size;
size_t pdf_start_offset; /* offset of %PDF-x.y */
size_t startxref;
......
......@@ -3,6 +3,7 @@
#include "error.h"
sigil_err_t process_trailer(sigil_t *sgl);
int sigil_trailer_self_test(int verbosity);
......
......@@ -5,8 +5,33 @@
#include "sigil.h"
#ifndef XREF_T
#define XREF_T
typedef struct {
size_t byte_offset;
size_t generation_num;
} xref_entry_t;
typedef struct {
xref_entry_t **entry;
size_t capacity;
} xref_t;
#endif /* XREF_T */
xref_t *new_xref();
void free_xref(xref_t *xref);
sigil_err_t read_startxref(sigil_t *sgl);
sigil_err_t read_xref_table(sigil_t *sgl);
sigil_err_t process_xref(sigil_t *sgl);
void print_xref(xref_t *xref);
int sigil_xref_self_test(int verbosity);
#endif /* PDF_SIGIL_XREF_H */
......@@ -29,6 +29,49 @@ int is_whitespace(const char_t c)
c == 0x20); // space
}
int parse_number(FILE *in, size_t *number)
{
char c;
int digits = 0;
*number = 0;
// skip leading whitespaces
while ((c = fgetc(in)) != EOF && is_whitespace(c))
;
// number
do {
if (!is_digit(c)) {
if (ungetc(c, in) != c)
return 1;
return digits == 0;
}
*number = 10 * *number + c - '0';
digits++;
} while ((c = fgetc(in)) != EOF);
return 1;
}
int parse_free_indicator(FILE *in, char_t *result)
{
char c;
// skip leading whitespaces
while ((c = fgetc(in)) != EOF && is_whitespace(c))
;
switch(c) {
case 'f':
case 'n':
*result = c;
return 0;
default:
return 1;
}
}
void print_module_name(const char *module_name, int verbosity)
{
if (verbosity < 1)
......
......@@ -22,6 +22,14 @@ int sigil_config_self_test(int verbosity)
print_test_result(1, verbosity);
// TEST: XREF_PREALLOCATION
print_test_item("XREF_PREALLOCATION", verbosity);
if (XREF_PREALLOCATION < 1)
goto failed;
print_test_result(1, verbosity);
// all tests done
print_module_result(1, verbosity);
return 0;
......
......@@ -21,6 +21,9 @@ const char_t *sigil_err_string(sigil_err_t err)
if (err & ERR_PDF_CONT)
return "ERROR corrupted PDF file";
if (err & ERR_NOT_IMPL)
return "ERROR not implemented";
return "ERROR unknown";
}
......@@ -31,11 +34,11 @@ int sigil_error_self_test(int verbosity)
// TEST: error codes
print_test_item("error codes", verbosity);
if ((ERR_NO + ERR_ALLOC + ERR_PARAM +
ERR_IO + ERR_PDF_CONT + ERR_5 +
ERR_6 + ERR_7 + ERR_8 +
ERR_9 + ERR_10 + ERR_11 +
ERR_12 + ERR_13 + ERR_14 +
if ((ERR_NO + ERR_ALLOC + ERR_PARAM +
ERR_IO + ERR_PDF_CONT + ERR_NOT_IMPL +
ERR_6 + ERR_7 + ERR_8 +
ERR_9 + ERR_10 + ERR_11 +
ERR_12 + ERR_13 + ERR_14 +
ERR_15 + ERR_16
) != 0xffff || ERR_NO != 0)
{
......
......@@ -34,6 +34,7 @@ sigil_err_t sigil_init(sigil_t **sgl)
(*sgl)->pdf_x = 0;
(*sgl)->pdf_y = 0;
(*sgl)->xref_type = XREF_TYPE_UNSET;
(*sgl)->xref = NULL;
(*sgl)->file_size = 0;
(*sgl)->pdf_start_offset = 0;
(*sgl)->startxref = 0;
......@@ -115,12 +116,12 @@ sigil_err_t sigil_process(sigil_t *sgl)
void sigil_free(sigil_t *sgl)
{
if (sgl) {
if (sgl->file) {
if (sgl->file)
fclose(sgl->file);
}
if (sgl->filepath) {
if (sgl->filepath)
free(sgl->filepath);
}
if (sgl->xref)
free_xref(sgl->xref);
free(sgl);
sgl = NULL;
}
......
......@@ -19,7 +19,7 @@ sigil_err_t process_trailer(sigil_t *sgl)
int sigil_trailer_self_test(int verbosity)
{
print_module_name("xxx", verbosity);
print_module_name("trailer", verbosity);
// TEST: fn determine_xref_type - STREAM
print_test_item("xxx", verbosity);
......
......@@ -26,6 +26,82 @@ static sigil_err_t determine_xref_type(sigil_t *sgl)
return (sigil_err_t)ERR_NO;
}
static sigil_err_t
add_xref_entry(xref_t *xref, size_t obj, size_t offset, size_t generation)
{
int resize_factor = 1;
if (xref == NULL)
return (sigil_err_t)ERR_PARAM;
// resize if needed
while (obj > resize_factor * xref->capacity - 1)
resize_factor *= 2;
if (resize_factor != 1) {
xref->entry = realloc(xref->entry,sizeof(xref_entry_t *) * xref->capacity * resize_factor);
if (xref->entry == NULL)
return (sigil_err_t)ERR_ALLOC;
sigil_zeroize(xref->entry + xref->capacity, sizeof(xref_entry_t *) * (xref->capacity * (resize_factor - 1)));
xref->capacity *= resize_factor;
}
if (xref->entry[obj] != NULL) {
if (xref->entry[obj]->generation_num < generation) {
xref->entry[obj]->byte_offset = offset;
xref->entry[obj]->generation_num = generation;
} else {
return (sigil_err_t)ERR_NO;
}
} else {
xref->entry[obj] = malloc(sizeof(xref_entry_t));
if (xref->entry[obj] == NULL)
return (sigil_err_t)ERR_ALLOC;
xref->entry[obj]->byte_offset = offset;
xref->entry[obj]->generation_num = generation;
}
return (sigil_err_t)ERR_ALLOC;
}
static void free_xref_entry(xref_entry_t *entry)
{
if (entry != NULL)
free(entry);
}
xref_t *new_xref()
{
xref_t *xref = malloc(sizeof(xref_t));
if (xref == NULL)
return NULL;
xref->entry = malloc(sizeof(xref_entry_t *) * XREF_PREALLOCATION);
if (xref->entry == NULL) {
free(xref);
return NULL;
}
xref->capacity = XREF_PREALLOCATION;
sigil_zeroize(xref->entry, sizeof(xref_entry_t *) * xref->capacity);
return xref;
}
void free_xref(xref_t *xref)
{
if (xref == NULL)
return;
if (xref->entry != NULL) {
for (int i = 0; i < xref->capacity; i++) {
free_xref_entry(xref->entry[i]);
}
free(xref->entry);
}
free(xref);
}
sigil_err_t read_startxref(sigil_t *sgl)
{
// function parameter checks
......@@ -95,6 +171,65 @@ sigil_err_t read_startxref(sigil_t *sgl)
return (sigil_err_t)ERR_NO;
}
sigil_err_t read_xref_table(sigil_t *sgl)
{
char tmp[5],
free_indicator;
size_t section_start = 0,
section_cnt = 0,
obj_offset,
obj_generation;
int xref_end = 0;
sigil_err_t err;
if (sgl->xref == NULL)
sgl->xref = new_xref();
if (sgl->xref == NULL)
return (sigil_err_t)ERR_ALLOC;
if (fseek(sgl->file, sgl->startxref, SEEK_SET) != 0)
return (sigil_err_t)ERR_IO;
// read "xref"
if (fgets(tmp, 5, sgl->file) == NULL)
return (sigil_err_t)ERR_IO;
if (strncmp(tmp, "xref", 4) != 0)
return (sigil_err_t)ERR_PDF_CONT;
while (!xref_end) { // for all xref sections
while (1) {
// read 2 numbers:
// - first object in subsection
// - number of entries in subsection
if (parse_number(sgl->file, &section_start) != 0) {
xref_end = 1;
break;
}
if (parse_number(sgl->file, &section_cnt) != 0)
return 1;
if (section_start < 0 || section_cnt < 1)
return 1;
// for all entries in one section
for (int section_offset = 0; section_offset < section_cnt; section_offset++) {
if (parse_number(sgl->file, &obj_offset) != 0)
return (sigil_err_t)ERR_PDF_CONT;
if (parse_number(sgl->file, &obj_generation) != 0)
return (sigil_err_t)ERR_PDF_CONT;
if (parse_free_indicator(sgl->file, &free_indicator) != 0)
return (sigil_err_t)ERR_PDF_CONT;
size_t obj_num = section_start + section_offset;
if (free_indicator == 'n') {
err = add_xref_entry(sgl->xref, obj_num, obj_offset, obj_generation); if (err != ERR_NO)
return err;
}
}
}
}
return (sigil_err_t)ERR_NO;
}
sigil_err_t process_xref(sigil_t *sgl)
{
sigil_err_t err;
......@@ -112,10 +247,31 @@ sigil_err_t process_xref(sigil_t *sgl)
if (err != ERR_NO)
return err;
switch (sgl->xref_type) {
case XREF_TYPE_TABLE:
read_xref_table(sgl);
break;
case XREF_TYPE_STREAM:
return (sigil_err_t)ERR_NOT_IMPL; // TODO
default:
return (sigil_err_t)ERR_PDF_CONT;
}
// TODO
return 0;
}
void print_xref(xref_t *xref)
{
if (xref == NULL)
return;
printf("\nXREF\n");
for (int i = 0; i < xref->capacity; i++) {
if (xref->entry[i] != NULL)
printf("obj %d | offset %zd | generation %zd\n", i, xref->entry[i]->byte_offset, xref->entry[i]->generation_num);
}
}
int sigil_xref_self_test(int verbosity)
......
......@@ -43,7 +43,7 @@ int main(int argc, char **argv)
}
if (verbosity >= 1)
printf("\n STARTING TEST PROCEDURE\n");
printf("\n ====== STARTING TEST PROCEDURE ======\n");
// call self_test function for each module
if (sigil_config_self_test(verbosity) != 0)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment