Commit 2f78f75d authored by Tomáš Stefan's avatar Tomáš Stefan

Big refactoring

now possible to set data as one of the following:
 - FILE *
 - path to the file (const char *)
 - char *, pointing to the pdf content

all data related operations got their custom functions

if the input source is file and its size is under threshold defined
 in config.h, all the content is loaded into memory
parent a069df3c
......@@ -14,15 +14,22 @@ void sigil_zeroize(void *a, size_t bytes);
int is_digit(const char_t c);
int is_whitespace(const char_t c);
sigil_err_t skip_leading_whitespaces(FILE *in);
sigil_err_t skip_dictionary(FILE *in);
sigil_err_t skip_dict_unknown_value(FILE *in);
sigil_err_t pdf_read(sigil_t *sgl, size_t size, char *result, size_t *res_size);
sigil_err_t pdf_get_char(sigil_t *sgl, char *result);
sigil_err_t pdf_peek_char(sigil_t *sgl, char *result);
sigil_err_t parse_number(FILE *in, size_t *number);
sigil_err_t parse_keyword(FILE *in, keyword_t *keyword);
sigil_err_t parse_free_indicator(FILE *in, free_indicator_t *result);
sigil_err_t parse_indirect_reference(FILE *in, reference_t *ref);
sigil_err_t parse_dict_key(FILE *in, dict_key_t *dict_key);
sigil_err_t pdf_move_pos_rel(sigil_t *sgl, ssize_t shift_bytes);
sigil_err_t pdf_move_pos_abs(sigil_t *sgl, size_t position);
sigil_err_t skip_leading_whitespaces(sigil_t *sgl);
sigil_err_t skip_dictionary(sigil_t *sgl);
sigil_err_t skip_dict_unknown_value(sigil_t *sgl);
sigil_err_t parse_number(sigil_t *sgl, size_t *number);
sigil_err_t parse_keyword(sigil_t *sgl, keyword_t *keyword);
sigil_err_t parse_free_indicator(sigil_t *sgl, free_indicator_t *result);
sigil_err_t parse_indirect_reference(sigil_t *sgl, reference_t *ref);
sigil_err_t parse_dict_key(sigil_t *sgl, dict_key_t *dict_key);
const char *sigil_err_string(sigil_err_t err);
......@@ -31,6 +38,9 @@ void print_module_result(int result, int verbosity);
void print_test_item(const char *test_name, int verbosity);
void print_test_result(int result, int verbosity);
sigil_t *test_prepare_sgl_content(char *content, size_t size);
sigil_t *test_prepare_sgl_path(const char *path);
int sigil_auxiliary_self_test(int verbosity);
#endif /* PDF_SIGIL_AUXILIARY_H */
......@@ -3,13 +3,16 @@
// maximum bytes to read from the beginning of file to look for the "%PDF-x.y"
#define HEADER_SEARCH_OFFSET 1024
#define HEADER_SEARCH_OFFSET 1024
// maximum bytes to read from the end of file to look for the "startxref"
#define XREF_SEARCH_OFFSET 1024
#define XREF_SEARCH_OFFSET 1024
// capacity to choose for the first xref allocation
#define XREF_PREALLOCATION 10
#define XREF_PREALLOCATION 10
// threshold in bytes for loading whole file into buffer
#define THRESHOLD_FILE_BUFFERING 10485760
// validate values
int sigil_config_self_test(int verbosity);
......
......@@ -6,8 +6,9 @@
#define COLOR_GREEN "\x1b[32m"
#define COLOR_RESET "\x1b[0m"
#define KEYWORD_xref 0
#define KEYWORD_trailer 1
#define KEYWORD_UNSET 0
#define KEYWORD_xref 1
#define KEYWORD_trailer 2
#define XREF_TYPE_UNSET 0
#define XREF_TYPE_TABLE 1
......@@ -16,27 +17,21 @@
#define IN_USE_ENTRY 0
#define FREE_ENTRY 1
#define DICT_KEY_Size 0
#define DICT_KEY_Prev 1
#define DICT_KEY_Root 2
#define DICT_KEY_unknown 3
#define ERR_NO 0
#define ERR_ALLOC 1
#define ERR_PARAM 2
#define ERR_IO 3
#define ERR_PDF_CONT 4
#define ERR_NOT_IMPL 5
#define ERR_6 6
#define ERR_7 7
#define ERR_8 8
#define ERR_9 9
#define ERR_10 10
#define ERR_11 11
#define ERR_12 12
#define ERR_13 13
#define ERR_14 14
#define ERR_15 15
#define ERR_16 16
#define DICT_KEY_UNKNOWN 0
#define DICT_KEY_Size 1
#define DICT_KEY_Prev 2
#define DICT_KEY_Root 3
#define DEALLOCATE_FILE 0x01
#define DEALLOCATE_BUFFER 0x02
#define ERR_NO 0
#define ERR_ALLOCATION 1
#define ERR_PARAMETER 2
#define ERR_IO 3
#define ERR_PDF_CONTENT 4
#define ERR_NOT_IMPLEMENTED 5
#define ERR_NO_DATA 6
#define ERR_END_OF_DICT 7
#endif /* PDF_SIGIL_CONSTANTS_H */
......@@ -6,11 +6,15 @@
sigil_err_t sigil_init(sigil_t **sgl);
sigil_err_t sigil_verify(sigil_t *sgl, const char *filepath);
sigil_err_t sigil_set_pdf_file(sigil_t *sgl, FILE *pdf_file);
sigil_err_t sigil_set_pdf_path(sigil_t *sgl, const char *path_to_pdf);
sigil_err_t sigil_set_pdf_buffer(sigil_t *sgl, char *pdf_content, size_t size);
sigil_err_t sigil_verify(sigil_t *sgl);
// ... get functions TODO
void sigil_free(sigil_t *sgl);
void sigil_free(sigil_t **sgl);
int sigil_sigil_self_test(int verbosity);
......
......@@ -26,19 +26,26 @@ typedef struct {
typedef struct {
xref_entry_t **entry;
size_t capacity;
size_t size_from_trailer;
size_t prev_section;
size_t capacity;
size_t size_from_trailer;
size_t prev_section;
} xref_t;
typedef struct {
FILE *file;
FILE *file;
char *buffer;
size_t buf_pos;
size_t size;
uint32_t deallocation_info;
} pdf_data_t;
typedef struct {
pdf_data_t pdf_data;
short pdf_x, /* numbers from PDF header */
pdf_y; /* %PDF-<pdf_x>.<pdf_y> */
short xref_type;
xref_t *xref;
reference_t ref_catalog_dict;
size_t file_size;
size_t pdf_start_offset; /* offset of %PDF-x.y */
size_t startxref;
} sigil_t;
......
This diff is collapsed.
......@@ -29,6 +29,14 @@ int sigil_config_self_test(int verbosity)
print_test_result(1, verbosity);
// TEST: THRESHOLD_FILE_BUFFERING
print_test_item("THRESHOLD_FILE_BUFFERING", verbosity);
if (THRESHOLD_FILE_BUFFERING < 0)
goto failed;
print_test_result(1, verbosity);
// all tests done
print_module_result(1, verbosity);
return 0;
......
......@@ -8,143 +8,120 @@
sigil_err_t process_header(sigil_t *sgl)
{
// function parameter checks
if (sgl == NULL || sgl->file == NULL)
return ERR_PARAM;
sigil_err_t err;
size_t offset;
char tmp[6],
c;
size_t read_size;
size_t pdf_x, pdf_y;
if (sgl == NULL)
return ERR_PARAMETER;
err = pdf_move_pos_abs(sgl, 0);
if (err != ERR_NO)
return err;
offset = 0;
while(1) {
if (offset > HEADER_SEARCH_OFFSET)
return ERR_PDF_CONTENT;
err = pdf_read(sgl, 5, tmp, &read_size);
if (err != ERR_NO)
return err;
if (read_size != 5)
return ERR_PDF_CONTENT;
if (strncmp(tmp, "\x25PDF-", 5) == 0) {
if ((err = parse_number(sgl, &pdf_x)) != ERR_NO)
return err;
if ((err = pdf_get_char(sgl, &c)) != ERR_NO)
return err;
if (c != '.')
return ERR_PDF_CONTENT;
if ((err = parse_number(sgl, &pdf_y)) != ERR_NO)
return err;
if ((pdf_x == 1 && pdf_y >= 0 && pdf_y <= 7) ||
(pdf_x == 2 && pdf_y == 0))
{
sgl->pdf_x = pdf_x;
sgl->pdf_y = pdf_y;
} else {
return ERR_PDF_CONTENT;
}
if (ftell(sgl->file) != 0) {
if (fseek(sgl->file, 0, SEEK_SET) != 0 || ftell(sgl->file) != 0)
return ERR_IO;
}
sgl->pdf_start_offset = offset;
const char_t expected[] = {'%', 'P', 'D', 'F', '-'};
size_t offset = 0;
int found = 0,
c;
return ERR_NO;
}
while ((c = fgetc(sgl->file)) != EOF && found < 8 &&
offset - found <= HEADER_SEARCH_OFFSET )
{
// count offset from start to '%' character
// PDF header size is subtracted later
if ((err = pdf_move_pos_rel(sgl, -4)) != ERR_NO)
return err;
offset++;
if (found < 5) {
if (c == (int)expected[found]) {
found++;
} else if (c == (int)expected[0]) {
found = 1;
} else {
found = 0;
}
} else if (found == 5) {
if (is_digit(c)) {
sgl->pdf_x = c - '0';
found++;
} else if (c == (int)expected[0]) {
found = 1;
} else {
found = 0;
}
} else if (found == 6) {
if (c == (int)'.') {
found++;
} else if (c == (int)expected[0]) {
found = 1;
} else {
found = 0;
}
} else if (found == 7) {
if (is_digit(c)) {
sgl->pdf_y = c - '0';
found++;
} else if (c == (int)expected[0]) {
found = 1;
} else {
found = 0;
}
}
}
if (found != 8)
return ERR_PDF_CONT;
// offset counted with header -> subtract header size
sgl->pdf_start_offset = offset - found;
if (sgl->pdf_start_offset > HEADER_SEARCH_OFFSET)
return ERR_PDF_CONT;
return ERR_NO;
return ERR_PDF_CONTENT;
}
int sigil_header_self_test(int verbosity)
{
print_module_name("header", verbosity);
sigil_t *sgl = NULL;
char c;
if (sigil_init(&sgl) != ERR_NO)
goto failed;
print_module_name("header", verbosity);
// TEST: correct_1
// TEST: fn process_header
print_test_item("fn process_header", verbosity);
char *sstream_1 = "\x25PDF-1.1\n" \
"abcdefghijklmnopqrstuvwxyz";
sgl->file = fmemopen(sstream_1,
(strlen(sstream_1) + 1) * sizeof(*sstream_1),
"r");
if (sgl->file == NULL)
goto failed;
if (process_header(sgl) != ERR_NO ||
sgl->pdf_x != 1 ||
sgl->pdf_y != 1 ||
sgl->pdf_start_offset != 0)
{
goto failed;
}
fclose(sgl->file);
sgl->file = NULL;
char *sstream_2 = "\x1a\x5e\x93\x7e\x6f\x3c\x6a\x71\xbf\xda\x54\x91\xe5"\
"\x86\x08\x84\xaf\x8e\x89\x44\xab\xc4\x58\x0c\xb9\x31"\
"\xd3\x8c\x0f\xc0\x43\x1a\xa5\x07\x4f\xe2\x98\xb3\xd8"\
"\x53\x4b\x5d\x4b\xd6\x48\x26\x98\x09\xde\x0d" \
"\x25PDF-1.2" \
"\x55\xa1\x77\xd3\x47\xab\xc6\x87\xf3\xbc\x2d\x8a\x9f"\
"\x0e\x47\xbb\x74\xd2\x71\x28\x94\x53\x92\xae\x2b\x17"\
"\xd0\x6a\x9c\x13\x84\xc1\x07\x44\xc0\x81\xb8\xd6\x9c"\
"\x31\x08\x13\xd4\xc2\xd6\x2d\xaf\xfb\xea\x6f";
sgl->file = fmemopen(sstream_2,
(strlen(sstream_2) + 1) * sizeof(*sstream_2),
"r");
if (sgl->file == NULL)
goto failed;
if (process_header(sgl) != ERR_NO ||
sgl->pdf_x != 1 ||
sgl->pdf_y != 2 ||
sgl->pdf_start_offset != 50 )
{
goto failed;
}
fclose(sgl->file);
sgl->file = NULL;
char *sstream_1 = "\x25PDF-1.1 x";
if ((sgl = test_prepare_sgl_content(sstream_1, strlen(sstream_1) + 1)) == NULL)
goto failed;
if (process_header(sgl) != ERR_NO ||
sgl->pdf_x != 1 ||
sgl->pdf_y != 1 ||
sgl->pdf_start_offset != 0)
{
goto failed;
}
char *sstream_3 = "\x25\x25PPD\x25PDF-\x25PDF-1\x25PDF-1..@PDF-1.3";
sgl->file = fmemopen(sstream_3,
(strlen(sstream_3) + 1) * sizeof(*sstream_3),
"r");
if (sgl->file == NULL)
goto failed;
if (skip_leading_whitespaces(sgl) != ERR_NO)
goto failed;
if ((pdf_get_char(sgl, &c)) != ERR_NO || c != 'x')
goto failed;
sigil_free(&sgl);
char *sstream_2 = "\x1a\x5e\x93\x7e\x6f\x3c\x6a\x71\xbf\xda\x54\x91\xe5"\
"\x86\x08\x84\xaf\x8e\x89\x44\xab\xc4\x58\x0c\xb9\x31"\
"\xd3\x8c\x0f\xc0\x43\x1a\xa5\x07\x4f\xe2\x98\xb3\xd8"\
"\x53\x4b\x5d\x4b\xd6\x48\x26\x98\x09\xde\x0d" \
"\x25PDF-1.2 x";
if ((sgl = test_prepare_sgl_content(sstream_2, strlen(sstream_2) + 1)) == NULL)
goto failed;
if (process_header(sgl) != ERR_NO ||
sgl->pdf_x != 1 ||
sgl->pdf_y != 2 ||
sgl->pdf_start_offset != 50)
{
goto failed;
}
if (process_header(sgl) == ERR_NO)
goto failed;
if (skip_leading_whitespaces(sgl) != ERR_NO)
goto failed;
if ((pdf_get_char(sgl, &c)) != ERR_NO || c != 'x')
goto failed;
sigil_free(sgl);
sigil_free(&sgl);
}
print_test_result(1, verbosity);
......@@ -154,9 +131,10 @@ int sigil_header_self_test(int verbosity)
failed:
if (sgl)
sigil_free(sgl);
sigil_free(&sgl);
print_test_result(0, verbosity);
print_module_result(0, verbosity);
return 1;
}
#include <stdio.h>
#include <stdlib.h>
#include "auxiliary.h"
#include "config.h"
#include "constants.h"
#include "header.h"
#include "sigil.h"
......@@ -11,41 +12,114 @@ sigil_err_t sigil_init(sigil_t **sgl)
{
// function parameter checks
if (sgl == NULL)
return ERR_PARAM;
return ERR_PARAMETER;
*sgl = malloc(sizeof(sigil_t));
if (*sgl == NULL)
return ERR_ALLOC;
return ERR_ALLOCATION;
sigil_zeroize(*sgl, sizeof(*sgl));
// set default values
(*sgl)->file = NULL;
(*sgl)->pdf_data.file = NULL;
(*sgl)->pdf_data.buffer = NULL;
(*sgl)->pdf_data.buf_pos = 0;
(*sgl)->pdf_data.size = 0;
(*sgl)->pdf_data.deallocation_info = 0;
(*sgl)->pdf_x = 0;
(*sgl)->pdf_y = 0;
(*sgl)->xref_type = XREF_TYPE_UNSET;
(*sgl)->xref = NULL;
(*sgl)->ref_catalog_dict.object_num = 0;
(*sgl)->ref_catalog_dict.generation_num = 0;
(*sgl)->file_size = 0;
(*sgl)->pdf_start_offset = 0;
(*sgl)->startxref = 0;
return ERR_NO;
}
sigil_err_t sigil_verify(sigil_t *sgl, const char *filepath)
sigil_err_t sigil_set_pdf_file(sigil_t *sgl, FILE *pdf_file)
{
sigil_err_t err;
size_t processed = 0;
char *content = NULL;
// function parameter checks
if (sgl == NULL || filepath == NULL)
return ERR_PARAM;
if (sgl == NULL || pdf_file == NULL)
return ERR_PARAMETER;
if (sgl->pdf_data.file != NULL && sgl->pdf_data.file != pdf_file)
fclose(sgl->pdf_data.file);
sgl->pdf_data.file = pdf_file;
// get file size
// - 1) jump to the end of file
if (fseek(sgl->pdf_data.file, 0, SEEK_END) != 0)
return ERR_IO;
// - 2) read current position
sgl->pdf_data.size = ftell(sgl->pdf_data.file);
if (sgl->pdf_data.size < 0)
return ERR_IO;
// - 3) jump back to the beginning
if (fseek(sgl->pdf_data.file, 0, SEEK_SET) != 0)
return ERR_IO;
if (sgl->pdf_data.size < THRESHOLD_FILE_BUFFERING) {
content = malloc(sizeof(char) * (sgl->pdf_data.size + 1));
if (content == NULL) {
// fallback to using the file
return ERR_NO;
}
processed = fread(content, sgl->pdf_data.size,
sizeof(char), sgl->pdf_data.file);
if (processed != sgl->pdf_data.size) {
// fallback to using the file
free(content);
return ERR_NO;
}
content[processed] = '\0';
sgl->pdf_data.buffer = content;
sgl->pdf_data.deallocation_info |= DEALLOCATE_BUFFER;
}
return ERR_NO;
}
sigil_err_t sigil_set_pdf_path(sigil_t *sgl, const char *path_to_pdf)
{
if (sgl == NULL || path_to_pdf == NULL)
return ERR_PARAMETER;
// open provided file
if ((sgl->file = fopen(filepath, "r")) == NULL)
FILE *pdf_file = NULL;
if ((pdf_file = fopen(path_to_pdf, "r")) == NULL)
return ERR_IO;
sgl->pdf_data.deallocation_info |= DEALLOCATE_FILE;
return sigil_set_pdf_file(sgl, pdf_file);
}
sigil_err_t sigil_set_pdf_buffer(sigil_t *sgl, char *pdf_content, size_t size)
{
if (sgl == NULL || pdf_content == NULL || size <= 0)
return ERR_PARAMETER;
sgl->pdf_data.buffer = pdf_content;
sgl->pdf_data.size = size;
return ERR_NO;
}
sigil_err_t sigil_verify(sigil_t *sgl)
{
sigil_err_t err;
// function parameter checks
if (sgl == NULL)
return ERR_PARAMETER;
// process header - %PDF-<pdf_x>.<pdf_y>
err = process_header(sgl);
......@@ -66,15 +140,25 @@ sigil_err_t sigil_verify(sigil_t *sgl, const char *filepath)
return ERR_NO;
}
void sigil_free(sigil_t *sgl)
void sigil_free(sigil_t **sgl)
{
if (sgl == NULL)
if (sgl == NULL || *sgl == NULL)
return;
if (sgl->file)
fclose(sgl->file);
if (sgl->xref)
xref_free(sgl->xref);
free(sgl);
if ((*sgl)->pdf_data.deallocation_info & DEALLOCATE_FILE) {
fclose((*sgl)->pdf_data.file);
(*sgl)->pdf_data.deallocation_info ^= DEALLOCATE_FILE;
}
if ((*sgl)->pdf_data.deallocation_info & DEALLOCATE_BUFFER) {
free((*sgl)->pdf_data.buffer);
(*sgl)->pdf_data.deallocation_info ^= DEALLOCATE_BUFFER;
}
if ((*sgl)->xref)
xref_free((*sgl)->xref);
free(*sgl);
*sgl = NULL;
}
int sigil_sigil_self_test(int verbosity)
......@@ -93,7 +177,7 @@ int sigil_sigil_self_test(int verbosity)
if (err != ERR_NO || sgl == NULL)
goto failed;
sigil_free(sgl);
sigil_free(&sgl);
}
print_test_result(1, verbosity);
......@@ -111,7 +195,7 @@ int sigil_sigil_self_test(int verbosity)
if (1)
goto failed;
sigil_free(sgl);
sigil_free(&sgl);
}
print_test_result(1, verbosity);
......@@ -122,7 +206,7 @@ int sigil_sigil_self_test(int verbosity)
failed:
if (sgl)
sigil_free(sgl);
sigil_free(&sgl);
print_test_result(0, verbosity);
print_module_result(0, verbosity);
......
......@@ -7,52 +7,50 @@ sigil_err_t process_trailer(sigil_t *sgl)
sigil_err_t err;
keyword_t keyword;
dict_key_t dict_key;
char c;
// function parameter checks
if (sgl == NULL || sgl->file == NULL)
return ERR_PARAM;
if (sgl == NULL)
return ERR_PARAMETER;
// read "trailer"
err = parse_keyword(sgl->file, &keyword);
err = parse_keyword(sgl, &keyword);
if (err != ERR_NO)
return err;
if (keyword != KEYWORD_trailer)
return ERR_PDF_CONT;
return ERR_PDF_CONTENT;
err = skip_leading_whitespaces(sgl->file);
err = skip_leading_whitespaces(sgl);
if (err != ERR_NO)
return err;
// if merged into one if statement with the '&&' operator in between, it's
// optimized out and position in the file is not changed
if (fgetc(sgl->file) != '<')
return 1;
if (fgetc(sgl->file) != '<')
return 1;
if ((pdf_get_char(sgl, &c)) != ERR_NO || c != '<')
return ERR_PDF_CONTENT;
if ((pdf_get_char(sgl, &c)) != ERR_NO || c != '<')
return ERR_PDF_CONTENT;
while ((err = parse_dict_key(sgl->file, &dict_key)) == ERR_NO) {
while ((err = parse_dict_key(sgl, &dict_key)) == ERR_NO) {
switch (dict_key) {
case DICT_KEY_Size:
err = parse_number(sgl->file, &sgl->xref->size_from_trailer);
err = parse_number(sgl, &(sgl->xref->size_from_trailer));
if (err != ERR_NO)
return err;
break;
case DICT_KEY_Prev:
err = parse_number(sgl->file, &sgl->xref->prev_section);
err = parse_number(sgl, &(sgl->xref->prev_section));
if (err != ERR_NO)
return err;
break;
case DICT_KEY_Root:
err = parse_indirect_reference(sgl->file, &sgl->ref_catalog_dict);
err = parse_indirect_reference(sgl, &(sgl->ref_catalog_dict));
if (err != ERR_NO)
return err;
break;
case DICT_KEY_unknown:
err = skip_dict_unknown_value(sgl->file);
case DICT_KEY_UNKNOWN:
err = skip_dict_unknown_value(sgl);
if (err != ERR_NO)
return err;
break;
default:
return ERR_PDF_CONT;
return ERR_PDF_CONTENT;
}
}
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment