SDDS ToolKit Programs and Libraries for C and Python
All Classes Files Functions Variables Macros Pages
SDDS_lzma.c
Go to the documentation of this file.
1/**
2 * @file SDDS_lzma.c
3 * @brief Implementation of LZMA-compressed file handling functions.
4 *
5 * This file provides a set of functions to work with files compressed using the LZMA
6 * compression algorithm. It abstracts the complexities of LZMA stream handling, offering
7 * a simple file-like interface for reading from and writing to compressed files.
8 *
9 * Features:
10 * - Open and close LZMA-compressed files.
11 * - Read and write data with automatic compression/decompression.
12 * - Support for reading lines and formatted output.
13 * - Error handling with descriptive messages.
14 *
15 * @copyright
16 * - (c) 2002 The University of Chicago, as Operator of Argonne National Laboratory.
17 * - (c) 2002 The Regents of the University of California, as Operator of Los Alamos National Laboratory.
18 *
19 * @license
20 * This file is distributed under the terms of the Software License Agreement
21 * found in the file LICENSE included with this distribution.
22 *
23 * @authors
24 * R. Soliday
25 */
26
27
28
29#include <assert.h>
30#include <errno.h>
31#include <fcntl.h>
32#include <stdio.h>
33#include <stdlib.h>
34#if defined(_WIN32)
35//#typedef long_ptr ssize_t
36#else
37# include <syslog.h>
38# include <unistd.h>
39#endif
40#include <sys/stat.h>
41#include <sys/types.h>
42#include <stdint.h>
43#include <string.h>
44#include <stdarg.h>
45#include <lzma.h>
46
47#if LZMA_VERSION <= UINT32_C(49990030)
48# define LZMA_EASY_ENCODER(a, b) lzma_easy_encoder_single(a, b)
49#elif LZMA_VERSION <= UINT32_C(49990050)
50# define LZMA_EASY_ENCODER(a, b) lzma_easy_encoder(a, b)
51#else
52# define LZMA_EASY_ENCODER(a, b) lzma_easy_encoder(a, b, LZMA_CHECK_CRC32)
53#endif
54
55#define BUF_SIZE 40960
56
57static const lzma_stream lzma_stream_init = LZMA_STREAM_INIT;
58
59struct lzmafile {
60 lzma_stream str; /* codec stream descriptor */
61 FILE *fp; /* backing file descriptor */
62 char mode; /* access mode ('r' or 'w') */
63 unsigned char rdbuf[BUF_SIZE]; /* read buffer used by lzmaRead */
64};
65
66/* lzma_open opens the file whose name is the string pointed to
67 by 'path' and associates a stream with it. The 'mode' argument
68 is expected to be 'r' or 'w'. Upon successful completion, a
69 lzmafile pointer will be returned. Upon error, NULL will be returned.*/
70void *lzma_open(const char *path, const char *mode) {
71 int ret;
72
73 /* initialize LZMA stream */
74 struct lzmafile *lf = malloc(sizeof(struct lzmafile));
75 lf->fp = fopen(path, mode);
76 lf->str = lzma_stream_init;
77 lf->mode = mode[0];
78 if (mode[0] == 'r') {
79#if LZMA_VERSION <= UINT32_C(49990030)
80 ret = lzma_auto_decoder(&lf->str, NULL, NULL);
81#else
82 ret = lzma_auto_decoder(&lf->str, -1, 0);
83#endif
84 lf->str.avail_in = 0;
85 } else {
86 /* I decided to use level 2 encoding */
87 /* Perhaps this should be user configurable in an environment variable */
88 ret = LZMA_EASY_ENCODER(&lf->str, 2);
89 }
90 if (ret != LZMA_OK) {
91 fprintf(stderr, "lzma_open error: %d\n", ret);
92 return NULL;
93 }
94 return (void *)lf;
95}
96
97/* lzma_close flushes the stream pointed to by the lzmafile pointer
98 and closes the underlying file descriptor. Upon successful
99 completion 0 is returned. On error, EOF is returned. */
100int lzma_close(struct lzmafile *file) {
101 int ret, outsize;
102 unsigned char buf[BUF_SIZE]; /* buffer used when flushing remaining
103 output data in write mode */
104 if (!file)
105 return -1;
106 if (file->mode == 'w') {
107 /* flush LZMA output buffer */
108 for (;;) {
109 file->str.next_out = buf;
110 file->str.avail_out = BUF_SIZE;
111 ret = lzma_code(&file->str, LZMA_FINISH);
112 if (ret != LZMA_STREAM_END && ret != LZMA_OK) {
113 fprintf(stderr, "lzma_close error: encoding failed: %d\n", ret);
114 lzma_end(&file->str);
115 fclose(file->fp);
116 free(file);
117 return EOF;
118 }
119 outsize = BUF_SIZE - file->str.avail_out;
120 if (fwrite(buf, 1, outsize, file->fp) != outsize) {
121 lzma_end(&file->str);
122 fclose(file->fp);
123 free(file);
124 return EOF;
125 }
126 if (ret == LZMA_STREAM_END)
127 break;
128 }
129 }
130 lzma_end(&file->str);
131 ret = fclose(file->fp);
132 free(file);
133 return ret;
134}
135
136/* lzma_read attempts to read up to 'count' bytes from the
137 lzmafile pointer into the buffer 'buf'. On success, the
138 number of bytes is returned. On error, -1 is returned.
139 The 'buf' variable is not terminated by '\0'. */
140long lzma_read(struct lzmafile *file, void *buf, size_t count) {
141 int ret;
142 lzma_stream *lstr;
143 if (file->mode != 'r')
144 return -1;
145
146 lstr = &file->str;
147 lstr->next_out = buf;
148 lstr->avail_out = count;
149
150 /* decompress until EOF or output buffer is full */
151 while (lstr->avail_out) {
152 if (lstr->avail_in == 0) {
153 /* refill input buffer */
154 ret = fread(file->rdbuf, 1, BUF_SIZE, file->fp);
155 if (ret == 0) {
156 break; /* EOF */
157 }
158 lstr->next_in = file->rdbuf; /* buffer containing lzma data just read */
159 lstr->avail_in = ret; /* number of bytes read */
160 }
161 ret = lzma_code(lstr, LZMA_RUN);
162 /* this fills up lstr->next_out and decreases lstr->avail_out */
163 /* it also emptys lstr->next_in and decreases lstr->avail_in */
164 if (ret != LZMA_OK && ret != LZMA_STREAM_END) {
165 fprintf(stderr, "lzma_read error: decoding failed: %d\n", ret);
166 return -1;
167 }
168 if (ret == LZMA_STREAM_END) {
169 break; /* EOF */
170 }
171 }
172 return count - lstr->avail_out; /* length of buf that has valid data */
173}
174
175/* lzma_gets reads in at most one less than 'size' characters from
176 the the lzmafile pointer into the buffer pointed to by 's'.
177 Reading stops after an EOF or a newline. If a newline is read
178 it is stored into the buffer. A '\0' is stored after the
179 last character in the buffer. Returns 's' on success and NULL on
180 error. */
181char *lzma_gets(char *s, int size, struct lzmafile *file) {
182 int ret;
183 int i = 0;
184 lzma_stream *lstr;
185 if (file->mode != 'r')
186 return NULL;
187 if (s == NULL || size < 1)
188 return NULL;
189 s[0] = '\0';
190 lstr = &file->str;
191 lstr->next_out = (void *)s;
192
193 /* decompress until newline or EOF or output buffer is full */
194 while (1) {
195 if (lstr->avail_in == 0) {
196 /* refill input buffer */
197 ret = fread(file->rdbuf, 1, BUF_SIZE, file->fp);
198 if (ret == 0) {
199 break; /* EOF */
200 }
201 lstr->next_in = file->rdbuf; /* buffer containing lzma data just read */
202 lstr->avail_in = ret; /* number of bytes read */
203 }
204 if (i + 1 == size) {
205 s[i] = '\0';
206 break;
207 }
208 lstr->avail_out = 1;
209 ret = lzma_code(lstr, LZMA_RUN);
210 /* this fills up lstr->next_out and decreases lstr->avail_out */
211 /* it also emptys lstr->next_in and decreases lstr->avail_in */
212 if (ret != LZMA_OK && ret != LZMA_STREAM_END) {
213 fprintf(stderr, "lzma_gets error: decoding failed: %d\n", ret);
214 return NULL;
215 }
216 if (ret == LZMA_STREAM_END) { /* EOF */
217 s[i + 1] = '\0';
218 break;
219 }
220 if (s[i] == 10) { /* 10 is the value for \n */
221 if (i > 0) { /* we sometimes get \10\10 */
222 if ((i == 1) && (s[0] == 32)) {
223 /* when uncompressing the lzma stream we some times end
224 up with \10\32\10 instead of a simple \10 */
225 } else {
226 s[i + 1] = '\0';
227 break;
228 }
229 }
230 }
231 i++;
232 }
233 return s;
234}
235
236/* lzma_write writes up to 'count' bytes from the buffer 'buf'
237 to the file referred to by the lzmafile pointer. On success,
238 the number of bytes written is returned. On error, -1 is returned. */
239long lzma_write(struct lzmafile *file, const void *buf, size_t count) {
240 int ret;
241 lzma_stream *lstr = &file->str;
242 unsigned char *bufout; /* compressed output buffer */
243 bufout = malloc(sizeof(char) * count);
244
245 if (file->mode != 'w') {
246 fprintf(stderr, "lzma_write error: file was not opened for writting\n");
247 free(bufout);
248 return -1;
249 }
250 lstr->next_in = buf;
251 lstr->avail_in = count;
252 while (lstr->avail_in) {
253 lstr->next_out = bufout;
254 lstr->avail_out = count;
255 ret = lzma_code(lstr, LZMA_RUN);
256 if (ret != LZMA_OK) {
257 fprintf(stderr, "lzma_write error: encoding failed: %d\n", ret);
258 free(bufout);
259 return -1;
260 }
261 ret = fwrite(bufout, 1, count - lstr->avail_out, file->fp);
262 if (ret != count - lstr->avail_out) {
263 fprintf(stderr, "lzma_write error\n");
264 free(bufout);
265 return -1;
266 }
267 }
268 free(bufout);
269 return count;
270}
271
272/* lzma_puts writes the string 's' to the lzmafile file pointer,
273 without its trailing '\0'. Returns a non-negative number on
274 success, or EOF on error. */
275int lzma_puts(const char *s, struct lzmafile *file) {
276 int ret;
277 lzma_stream *lstr = &file->str;
278 int count;
279 unsigned char *bufout; /* compressed output buffer */
280 char *buf;
281
282 if (file->mode != 'w') {
283 fprintf(stderr, "lzma_puts error: file was not opened for writting\n");
284 return EOF;
285 }
286 count = strlen(s);
287 bufout = malloc(sizeof(unsigned char) * count);
288 buf = malloc(sizeof(char) * count);
289 //strncpy(buf, s, count);
290 memcpy(buf, s, count);
291
292 lstr->next_in = (void *)buf;
293 lstr->avail_in = count;
294 while (lstr->avail_in) {
295 lstr->next_out = bufout;
296 lstr->avail_out = count;
297 ret = lzma_code(lstr, LZMA_RUN);
298 if (ret != LZMA_OK) {
299 fprintf(stderr, "lzma_puts error: encoding failed: %d\n", ret);
300 free(bufout);
301 free(buf);
302 return EOF;
303 }
304 ret = fwrite(bufout, 1, count - lstr->avail_out, file->fp);
305 if (ret != count - lstr->avail_out) {
306 fprintf(stderr, "lzma_puts error\n");
307 free(bufout);
308 free(buf);
309 return EOF;
310 }
311 }
312 free(bufout);
313 free(buf);
314 return count;
315}
316
317/* lzma_putc writes the character 'c', cast to an unsigned char,
318 to the lzmafile file pointer. Returns the character written as
319 an unsigned char cast to an int or EOF on error. */
320int lzma_putc(int c, struct lzmafile *file) {
321 int ret;
322 lzma_stream *lstr = &file->str;
323
324 unsigned char bufout[1]; /* compressed output buffer */
325 char buf[1];
326
327 if (file->mode != 'w') {
328 fprintf(stderr, "lzma_putc error: file was not opened for writting\n");
329 return EOF;
330 }
331 buf[0] = c;
332
333 lstr->next_in = (void *)buf;
334 lstr->avail_in = 1;
335 while (lstr->avail_in) {
336 lstr->next_out = bufout;
337 lstr->avail_out = 1;
338 ret = lzma_code(lstr, LZMA_RUN);
339 if (ret != LZMA_OK) {
340 fprintf(stderr, "lzma_putc error: encoding failed: %d\n", ret);
341 return EOF;
342 }
343 ret = fwrite(bufout, 1, 1 - lstr->avail_out, file->fp);
344 if (ret != 1 - lstr->avail_out) {
345 fprintf(stderr, "lzma_putc error\n");
346 return EOF;
347 }
348 }
349 return (unsigned char)c;
350}
351
352/* lzma_printf writes the output to the given lzmafile file pointer.
353 Upon success, it returns the number of characters printed (not
354 including the trailing '\0'). If an output error is encountered,
355 a negative value is returned. */
356int lzma_printf(struct lzmafile *file, const char *format, ...) {
357 size_t size = 32768;
358 int len;
359 unsigned char in[32768];
360 va_list va;
361 va_start(va, format);
362
363 in[size - 1] = 0;
364 (void)vsnprintf((char *)in, size, format, va);
365 va_end(va);
366 len = strlen((char *)in);
367
368 /* check that printf() results fit in buffer */
369 if (len <= 0 || len >= (int)size || in[size - 1] != 0) {
370 fprintf(stderr, "lzma_printf error: the printf results do not fit in the buffer\n");
371 return -1;
372 }
373 in[len] = '\0';
374 len = lzma_write(file, in, len);
375
376 return len;
377}
378
379int lzma_eof(struct lzmafile *file) {
380 lzma_stream *lstr;
381 lstr = &file->str;
382 if (lstr->avail_in == 0) {
383 return feof(file->fp);
384 } else {
385 return 0;
386 }
387}
388
389/* lzma_tell and lzma_seek will probably have to be
390 changed if they are seriously going to be used.
391 As far as I know they will only be called for updating
392 an existing file or when using fixed_row_count.
393 Both of which are not allowed when using lzma compression */
394long lzma_tell(struct lzmafile *file) {
395 return ftell(file->fp);
396}
397
398int lzma_seek(struct lzmafile *file, long offset, int whence) {
399 return fseek(file->fp, offset, whence);
400}
401
402void *UnpackLZMAOpen(char *filename) {
403 if (!filename)
404 return NULL;
405 return lzma_open(filename, "rb");
406}