SDDSlib
Loading...
Searching...
No Matches
SDDS_lzma.c
Go to the documentation of this file.
1/**
2 * @file SDDS_lzma.c
3 * @brief Implementation of LZMA-compressed file handling functions.
4 *
5 * This file provides a set of functions to work with files compressed using the LZMA
6 * compression algorithm. It abstracts the complexities of LZMA stream handling, offering
7 * a simple file-like interface for reading from and writing to compressed files.
8 *
9 * Features:
10 * - Open and close LZMA-compressed files.
11 * - Read and write data with automatic compression/decompression.
12 * - Support for reading lines and formatted output.
13 * - Error handling with descriptive messages.
14 *
15 * @copyright
16 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
17 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
18 *
19 * @license
20 * This file is distributed under the terms of the Software License Agreement
21 * found in the file LICENSE included with this distribution.
22 *
23 * @authors
24 * R. Soliday
25 */
26
27
28
29#include <assert.h>
30#include <errno.h>
31#include <fcntl.h>
32#include <stdio.h>
33#include <stdlib.h>
34#if defined(_WIN32)
35//#typedef long_ptr ssize_t
36#else
37# include <syslog.h>
38# include <unistd.h>
39#endif
40#include <sys/stat.h>
41#include <sys/types.h>
42#include <stdint.h>
43#include <string.h>
44#include <stdarg.h>
45#include <lzma.h>
46
47#if LZMA_VERSION <= UINT32_C(49990030)
48# define LZMA_EASY_ENCODER(a, b) lzma_easy_encoder_single(a, b)
49#elif LZMA_VERSION <= UINT32_C(49990050)
50# define LZMA_EASY_ENCODER(a, b) lzma_easy_encoder(a, b)
51#else
52# define LZMA_EASY_ENCODER(a, b) lzma_easy_encoder(a, b, LZMA_CHECK_CRC32)
53#endif
54
55#define BUF_SIZE 40960
56
57static const lzma_stream lzma_stream_init = LZMA_STREAM_INIT;
58
59struct lzmafile {
60 lzma_stream str; /* codec stream descriptor */
61 FILE *fp; /* backing file descriptor */
62 char mode; /* access mode ('r' or 'w') */
63 unsigned char rdbuf[BUF_SIZE]; /* read buffer used by lzmaRead */
64};
65
66/* lzma_open opens the file whose name is the string pointed to
67 by 'path' and associates a stream with it. The 'mode' argument
68 is expected to be 'r' or 'w'. Upon successful completion, a
69 lzmafile pointer will be returned. Upon error, NULL will be returned.*/
70void *lzma_open(const char *path, const char *mode) {
71 int ret;
72
73 /* initialize LZMA stream */
74 struct lzmafile *lf = malloc(sizeof(struct lzmafile));
75 lf->fp = fopen(path, mode);
76 lf->str = lzma_stream_init;
77 lf->mode = mode[0];
78 if (mode[0] == 'r') {
79#if LZMA_VERSION <= UINT32_C(49990030)
80 ret = lzma_auto_decoder(&lf->str, NULL, NULL);
81#else
82 ret = lzma_auto_decoder(&lf->str, -1, 0);
83#endif
84 lf->str.avail_in = 0;
85 } else {
86 /* I decided to use level 2 encoding */
87 /* Perhaps this should be user configurable in an environment variable */
88 ret = LZMA_EASY_ENCODER(&lf->str, 2);
89 }
90 if (ret != LZMA_OK) {
91 fprintf(stderr, "lzma_open error: %d\n", ret);
92 return NULL;
93 }
94 return (void *)lf;
95}
96
97/* lzma_close flushes the stream pointed to by the lzmafile pointer
98 and closes the underlying file descriptor. Upon successful
99 completion 0 is returned. On error, EOF is returned. */
100int lzma_close(struct lzmafile *file) {
101 int ret, outsize;
102 unsigned char buf[BUF_SIZE]; /* buffer used when flushing remaining
103 output data in write mode */
104 if (!file)
105 return -1;
106 if (file->mode == 'w') {
107 /* flush LZMA output buffer */
108 for (;;) {
109 file->str.next_out = buf;
110 file->str.avail_out = BUF_SIZE;
111 ret = lzma_code(&file->str, LZMA_FINISH);
112 if (ret != LZMA_STREAM_END && ret != LZMA_OK) {
113 fprintf(stderr, "lzma_close error: encoding failed: %d\n", ret);
114 lzma_end(&file->str);
115 fclose(file->fp);
116 free(file);
117 return EOF;
118 }
119 outsize = BUF_SIZE - file->str.avail_out;
120 if (fwrite(buf, 1, outsize, file->fp) != outsize) {
121 lzma_end(&file->str);
122 fclose(file->fp);
123 free(file);
124 return EOF;
125 }
126 if (ret == LZMA_STREAM_END)
127 break;
128 }
129 }
130 lzma_end(&file->str);
131 ret = fclose(file->fp);
132 free(file);
133 return ret;
134}
135
136/* lzma_read attempts to read up to 'count' bytes from the
137 lzmafile pointer into the buffer 'buf'. On success, the
138 number of bytes is returned. On error, -1 is returned.
139 The 'buf' variable is not terminated by '\0'. */
140long lzma_read(struct lzmafile *file, void *buf, size_t count) {
141 int ret;
142 lzma_stream *lstr;
143 if (file->mode != 'r')
144 return -1;
145
146 lstr = &file->str;
147 lstr->next_out = buf;
148 lstr->avail_out = count;
149
150 /* decompress until EOF or output buffer is full */
151 while (lstr->avail_out) {
152 if (lstr->avail_in == 0) {
153 /* refill input buffer */
154 ret = fread(file->rdbuf, 1, BUF_SIZE, file->fp);
155 if (ret == 0) {
156 break; /* EOF */
157 }
158 lstr->next_in = file->rdbuf; /* buffer containing lzma data just read */
159 lstr->avail_in = ret; /* number of bytes read */
160 }
161 ret = lzma_code(lstr, LZMA_RUN);
162 /* this fills up lstr->next_out and decreases lstr->avail_out */
163 /* it also emptys lstr->next_in and decreases lstr->avail_in */
164 if (ret != LZMA_OK && ret != LZMA_STREAM_END) {
165 fprintf(stderr, "lzma_read error: decoding failed: %d\n", ret);
166 return -1;
167 }
168 if (ret == LZMA_STREAM_END) {
169 break; /* EOF */
170 }
171 }
172 return count - lstr->avail_out; /* length of buf that has valid data */
173}
174
175/* lzma_gets reads in at most one less than 'size' characters from
176 the the lzmafile pointer into the buffer pointed to by 's'.
177 Reading stops after an EOF or a newline. If a newline is read
178 it is stored into the buffer. A '\0' is stored after the
179 last character in the buffer. Returns 's' on success and NULL on
180 error. */
181char *lzma_gets(char *s, int size, struct lzmafile *file) {
182 int ret;
183 int i = 0;
184 lzma_stream *lstr;
185 if (file->mode != 'r')
186 return NULL;
187 if (s == NULL || size < 1)
188 return NULL;
189 s[0] = '\0';
190 lstr = &file->str;
191 lstr->next_out = (void *)s;
192
193 /* decompress until newline or EOF or output buffer is full */
194 while (1) {
195 if (lstr->avail_in == 0) {
196 /* refill input buffer */
197 ret = fread(file->rdbuf, 1, BUF_SIZE, file->fp);
198 if (ret == 0) {
199 break; /* EOF */
200 }
201 lstr->next_in = file->rdbuf; /* buffer containing lzma data just read */
202 lstr->avail_in = ret; /* number of bytes read */
203 }
204 if (i + 1 == size) {
205 s[i] = '\0';
206 break;
207 }
208 lstr->avail_out = 1;
209 ret = lzma_code(lstr, LZMA_RUN);
210 /* this fills up lstr->next_out and decreases lstr->avail_out */
211 /* it also emptys lstr->next_in and decreases lstr->avail_in */
212 if (ret != LZMA_OK && ret != LZMA_STREAM_END) {
213 fprintf(stderr, "lzma_gets error: decoding failed: %d\n", ret);
214 return NULL;
215 }
216 if (ret == LZMA_STREAM_END) { /* EOF */
217 s[i + 1] = '\0';
218 break;
219 }
220 if (s[i] == 10) { /* 10 is the value for \n */
221 if (i > 0) { /* we sometimes get \10\10 */
222 if ((i == 1) && (s[0] == 32)) {
223 /* when uncompressing the lzma stream we some times end
224 up with \10\32\10 instead of a simple \10 */
225 } else {
226 s[i + 1] = '\0';
227 break;
228 }
229 }
230 }
231 i++;
232 }
233 return s;
234}
235
236/* lzma_write writes up to 'count' bytes from the buffer 'buf'
237 to the file referred to by the lzmafile pointer. On success,
238 the number of bytes written is returned. On error, -1 is returned. */
239long lzma_write(struct lzmafile *file, const void *buf, size_t count) {
240 int ret;
241 lzma_stream *lstr = &file->str;
242 unsigned char *bufout; /* compressed output buffer */
243 bufout = malloc(sizeof(char) * count);
244
245 if (file->mode != 'w') {
246 fprintf(stderr, "lzma_write error: file was not opened for writting\n");
247 free(bufout);
248 return -1;
249 }
250 lstr->next_in = buf;
251 lstr->avail_in = count;
252 while (lstr->avail_in) {
253 lstr->next_out = bufout;
254 lstr->avail_out = count;
255 ret = lzma_code(lstr, LZMA_RUN);
256 if (ret != LZMA_OK) {
257 fprintf(stderr, "lzma_write error: encoding failed: %d\n", ret);
258 free(bufout);
259 return -1;
260 }
261 ret = fwrite(bufout, 1, count - lstr->avail_out, file->fp);
262 if (ret != count - lstr->avail_out) {
263 fprintf(stderr, "lzma_write error\n");
264 free(bufout);
265 return -1;
266 }
267 }
268 free(bufout);
269 return count;
270}
271
272/* lzma_puts writes the string 's' to the lzmafile file pointer,
273 without its trailing '\0'. Returns a non-negative number on
274 success, or EOF on error. */
275int lzma_puts(const char *s, struct lzmafile *file) {
276 int ret;
277 lzma_stream *lstr = &file->str;
278 int count;
279 unsigned char *bufout; /* compressed output buffer */
280 char *buf;
281
282 if (file->mode != 'w') {
283 fprintf(stderr, "lzma_puts error: file was not opened for writting\n");
284 return EOF;
285 }
286 count = strlen(s);
287 bufout = malloc(sizeof(unsigned char) * count);
288 buf = malloc(sizeof(char) * count);
289 strncpy(buf, s, count);
290
291 lstr->next_in = (void *)buf;
292 lstr->avail_in = count;
293 while (lstr->avail_in) {
294 lstr->next_out = bufout;
295 lstr->avail_out = count;
296 ret = lzma_code(lstr, LZMA_RUN);
297 if (ret != LZMA_OK) {
298 fprintf(stderr, "lzma_puts error: encoding failed: %d\n", ret);
299 free(bufout);
300 free(buf);
301 return EOF;
302 }
303 ret = fwrite(bufout, 1, count - lstr->avail_out, file->fp);
304 if (ret != count - lstr->avail_out) {
305 fprintf(stderr, "lzma_puts error\n");
306 free(bufout);
307 free(buf);
308 return EOF;
309 }
310 }
311 free(bufout);
312 free(buf);
313 return count;
314}
315
316/* lzma_putc writes the character 'c', cast to an unsigned char,
317 to the lzmafile file pointer. Returns the character written as
318 an unsigned char cast to an int or EOF on error. */
319int lzma_putc(int c, struct lzmafile *file) {
320 int ret;
321 lzma_stream *lstr = &file->str;
322
323 unsigned char bufout[1]; /* compressed output buffer */
324 char buf[1];
325
326 if (file->mode != 'w') {
327 fprintf(stderr, "lzma_putc error: file was not opened for writting\n");
328 return EOF;
329 }
330 buf[0] = c;
331
332 lstr->next_in = (void *)buf;
333 lstr->avail_in = 1;
334 while (lstr->avail_in) {
335 lstr->next_out = bufout;
336 lstr->avail_out = 1;
337 ret = lzma_code(lstr, LZMA_RUN);
338 if (ret != LZMA_OK) {
339 fprintf(stderr, "lzma_putc error: encoding failed: %d\n", ret);
340 return EOF;
341 }
342 ret = fwrite(bufout, 1, 1 - lstr->avail_out, file->fp);
343 if (ret != 1 - lstr->avail_out) {
344 fprintf(stderr, "lzma_putc error\n");
345 return EOF;
346 }
347 }
348 return (unsigned char)c;
349}
350
351/* lzma_printf writes the output to the given lzmafile file pointer.
352 Upon success, it returns the number of characters printed (not
353 including the trailing '\0'). If an output error is encountered,
354 a negative value is returned. */
355int lzma_printf(struct lzmafile *file, const char *format, ...) {
356 size_t size = 32768;
357 int len;
358 unsigned char in[32768];
359 va_list va;
360 va_start(va, format);
361
362 in[size - 1] = 0;
363 (void)vsnprintf((char *)in, size, format, va);
364 va_end(va);
365 len = strlen((char *)in);
366
367 /* check that printf() results fit in buffer */
368 if (len <= 0 || len >= (int)size || in[size - 1] != 0) {
369 fprintf(stderr, "lzma_printf error: the printf results do not fit in the buffer\n");
370 return -1;
371 }
372 in[len] = '\0';
373 len = lzma_write(file, in, len);
374
375 return len;
376}
377
378int lzma_eof(struct lzmafile *file) {
379 lzma_stream *lstr;
380 lstr = &file->str;
381 if (lstr->avail_in == 0) {
382 return feof(file->fp);
383 } else {
384 return 0;
385 }
386}
387
388/* lzma_tell and lzma_seek will probably have to be
389 changed if they are seriously going to be used.
390 As far as I know they will only be called for updating
391 an existing file or when using fixed_row_count.
392 Both of which are not allowed when using lzma compression */
393long lzma_tell(struct lzmafile *file) {
394 return ftell(file->fp);
395}
396
397int lzma_seek(struct lzmafile *file, long offset, int whence) {
398 return fseek(file->fp, offset, whence);
399}
400
401void *UnpackLZMAOpen(char *filename) {
402 if (!filename)
403 return NULL;
404 return lzma_open(filename, "rb");
405}