1 | // ================================================================ // |
---|
2 | // // |
---|
3 | // File : arb_zfile.cxx // |
---|
4 | // Purpose : Compressed file I/O // |
---|
5 | // // |
---|
6 | // Coded by Ralf Westram (coder@reallysoft.de) in November 2015 // |
---|
7 | // http://www.arb-home.de/ // |
---|
8 | // // |
---|
9 | // ================================================================ // |
---|
10 | |
---|
11 | #include "arb_zfile.h" |
---|
12 | #include "arb_file.h" |
---|
13 | #include "arb_msg.h" |
---|
14 | #include "arb_misc.h" |
---|
15 | #include "arb_string.h" |
---|
16 | |
---|
17 | #include <string> |
---|
18 | #include <map> |
---|
19 | |
---|
20 | using namespace std; |
---|
21 | |
---|
22 | class zinfo { |
---|
23 | // info stored for each sucessfully opened file |
---|
24 | // to support proper error message on close. |
---|
25 | bool writing; // false -> reading |
---|
26 | string filename; |
---|
27 | string pipe_cmd; |
---|
28 | public: |
---|
29 | zinfo() {} |
---|
30 | zinfo(bool writing_, const char *filename_, const char *pipe_cmd_) |
---|
31 | : writing(writing_), |
---|
32 | filename(filename_), |
---|
33 | pipe_cmd(pipe_cmd_) |
---|
34 | {} |
---|
35 | |
---|
36 | bool isOutputPipe() const { return writing; } |
---|
37 | const char *get_filename() const { return filename.c_str(); } |
---|
38 | const char *get_pipecmd() const { return pipe_cmd.c_str(); } |
---|
39 | }; |
---|
40 | static map<FILE*,zinfo> zfile_info; |
---|
41 | |
---|
42 | FILE *ARB_zfopen(const char *name, const char *mode, FileCompressionMode cmode, GB_ERROR& error, bool hideStderr) { |
---|
43 | arb_assert(!error); |
---|
44 | |
---|
45 | if (strchr(mode, 'a')) { |
---|
46 | error = "Cannot append to file using ARB_zfopen"; |
---|
47 | return NULp; |
---|
48 | } |
---|
49 | if (strchr(mode, 't')) { |
---|
50 | error = "Cannot use textmode for ARB_zfopen"; |
---|
51 | return NULp; |
---|
52 | } |
---|
53 | if (strchr(mode, '+')) { |
---|
54 | error = "Cannot open file in read and write mode with ARB_zfopen"; |
---|
55 | return NULp; |
---|
56 | } |
---|
57 | |
---|
58 | bool forOutput = strchr(mode, 'w'); |
---|
59 | FILE *fp = NULp; |
---|
60 | |
---|
61 | if (cmode == ZFILE_AUTODETECT) { |
---|
62 | if (forOutput) { |
---|
63 | error = "Autodetecting compression mode only works for input files"; |
---|
64 | } |
---|
65 | else { |
---|
66 | fp = fopen(name, "rb"); |
---|
67 | if (!fp) error = GB_IO_error("opening", name); |
---|
68 | else { |
---|
69 | // detect compression and set 'cmode' |
---|
70 | const size_t MAGICSIZE = 5; |
---|
71 | char buffer[MAGICSIZE]; |
---|
72 | |
---|
73 | size_t bytes_read = fread(buffer, 1, MAGICSIZE, fp); |
---|
74 | fclose(fp); |
---|
75 | fp = NULp; |
---|
76 | |
---|
77 | if (bytes_read>=2 && strncmp(buffer, "\x1f\x8b", 2) == 0) cmode = ZFILE_GZIP; |
---|
78 | else if (bytes_read>=2 && strncmp(buffer, "BZ", 2) == 0) cmode = ZFILE_BZIP2; |
---|
79 | else if (bytes_read>=5 && strncmp(buffer, "\xfd" "7zXZ", 5) == 0) cmode = ZFILE_XZ; |
---|
80 | else { |
---|
81 | cmode = ZFILE_UNCOMPRESSED; |
---|
82 | } |
---|
83 | } |
---|
84 | } |
---|
85 | } |
---|
86 | |
---|
87 | if (cmode == ZFILE_UNCOMPRESSED) { |
---|
88 | fp = fopen(name, mode); |
---|
89 | if (!fp) error = GB_IO_error("opening", name); |
---|
90 | else { |
---|
91 | zfile_info[fp] = zinfo(forOutput, name, ""); |
---|
92 | } |
---|
93 | } |
---|
94 | else { |
---|
95 | if (!error) { |
---|
96 | const char *compressor = NULp; // command used to compress (and decompress) |
---|
97 | const char *decompress_flag = "-d"; // flag needed to decompress (assumes none to compress) |
---|
98 | |
---|
99 | switch (cmode) { |
---|
100 | case ZFILE_GZIP: { |
---|
101 | static char *pigz = ARB_executable("pigz", ARB_getenv_ignore_empty("PATH")); |
---|
102 | compressor = pigz ? pigz : "gzip"; |
---|
103 | break; |
---|
104 | } |
---|
105 | case ZFILE_BZIP2: compressor = "bzip2"; break; |
---|
106 | case ZFILE_XZ: compressor = "xz"; break; |
---|
107 | |
---|
108 | default: |
---|
109 | error = GBS_global_string("Invalid compression mode (%i)", int(cmode)); |
---|
110 | break; |
---|
111 | |
---|
112 | #if defined(USE_BROKEN_COMPRESSION) |
---|
113 | case ZFILE_BROKEN: |
---|
114 | compressor = "arb_weirdo"; // a non-existing command! |
---|
115 | break; |
---|
116 | #endif |
---|
117 | } |
---|
118 | |
---|
119 | if (!error) { |
---|
120 | char *pipeCmd = forOutput |
---|
121 | ? GBS_global_string_copy("%s > %s", compressor, name) |
---|
122 | : GBS_global_string_copy("%s %s < %s", compressor, decompress_flag, name); |
---|
123 | |
---|
124 | if (hideStderr) { |
---|
125 | freeset(pipeCmd, GBS_global_string_copy("( %s 2>/dev/null )", pipeCmd)); |
---|
126 | } |
---|
127 | |
---|
128 | // remove 'b' from mode (pipes are binary by default) |
---|
129 | char *impl_b_mode = ARB_strdup(mode); |
---|
130 | while (1) { |
---|
131 | char *b = strchr(impl_b_mode, 'b'); |
---|
132 | if (!b) break; |
---|
133 | strcpy(b, b+1); |
---|
134 | } |
---|
135 | |
---|
136 | if (forOutput) { // write to pipe |
---|
137 | fp = popen(pipeCmd, impl_b_mode); |
---|
138 | if (!fp) error = GB_IO_error("writing to pipe", pipeCmd); |
---|
139 | } |
---|
140 | else { // read from pipe |
---|
141 | fp = popen(pipeCmd, impl_b_mode); |
---|
142 | if (!fp) error = GB_IO_error("reading from pipe", pipeCmd); |
---|
143 | } |
---|
144 | |
---|
145 | if (!error) { |
---|
146 | zfile_info[fp] = zinfo(forOutput, name, pipeCmd); |
---|
147 | } |
---|
148 | |
---|
149 | free(impl_b_mode); |
---|
150 | free(pipeCmd); |
---|
151 | } |
---|
152 | } |
---|
153 | } |
---|
154 | |
---|
155 | arb_assert(contradicted(fp, error)); |
---|
156 | arb_assert(implicated(error, error[0])); // deny empty error |
---|
157 | return fp; |
---|
158 | } |
---|
159 | |
---|
160 | GB_ERROR ARB_zfclose(FILE *fp) { |
---|
161 | bool fifo = GB_is_fifo(fp); |
---|
162 | |
---|
163 | arb_assert(zfile_info.find(fp) != zfile_info.end()); // file was not opened using ARB_zfopen! |
---|
164 | |
---|
165 | zinfo info = zfile_info[fp]; |
---|
166 | zfile_info.erase(fp); |
---|
167 | |
---|
168 | int res; |
---|
169 | if (fifo) { |
---|
170 | res = pclose(fp); |
---|
171 | } |
---|
172 | else { |
---|
173 | res = fclose(fp); |
---|
174 | } |
---|
175 | |
---|
176 | GB_ERROR error = NULp; |
---|
177 | if (res != 0) { |
---|
178 | int exited = WIFEXITED(res); |
---|
179 | int status = WEXITSTATUS(res); |
---|
180 | #if defined(DEBUG) |
---|
181 | int signaled = WIFSIGNALED(res); |
---|
182 | #endif |
---|
183 | |
---|
184 | if (exited) { |
---|
185 | if (status) { |
---|
186 | if (fifo) { |
---|
187 | error = GBS_global_string("pipe %s\n" |
---|
188 | " file='%s'\n" |
---|
189 | " using cmd='%s'\n" |
---|
190 | " failed with exitcode=%i (broken pipe? corrupted archive?)\n", |
---|
191 | info.isOutputPipe() ? "writing to" : "reading from", |
---|
192 | info.get_filename(), |
---|
193 | info.get_pipecmd(), |
---|
194 | status); |
---|
195 | } |
---|
196 | } |
---|
197 | } |
---|
198 | if (!error) error = GB_IO_error("closing", info.get_filename()); |
---|
199 | #if defined(DEBUG) |
---|
200 | error = GBS_global_string("%s (res=%i, exited=%i, signaled=%i, status=%i)", error, res, exited, signaled, status); |
---|
201 | #endif |
---|
202 | } |
---|
203 | return error; |
---|
204 | } |
---|
205 | |
---|
206 | // -------------------------------------------------------------------------------- |
---|
207 | |
---|
208 | #ifdef UNIT_TESTS |
---|
209 | #ifndef TEST_UNIT_H |
---|
210 | #include <test_unit.h> |
---|
211 | #endif |
---|
212 | |
---|
213 | static char *fileContent(FILE *in, size_t& bytes_read) { |
---|
214 | const size_t BUFFERSIZE = 1000; |
---|
215 | char *buffer = ARB_alloc<char>(BUFFERSIZE+1); |
---|
216 | bytes_read = fread(buffer, 1, BUFFERSIZE, in); |
---|
217 | arb_assert(bytes_read<BUFFERSIZE); |
---|
218 | buffer[bytes_read] = 0; |
---|
219 | return buffer; |
---|
220 | } |
---|
221 | |
---|
222 | #define TEST_EXPECT_ZFOPEN_FAILS(name,mode,cmode,errpart) do{ \ |
---|
223 | GB_ERROR error = NULp; \ |
---|
224 | FILE *fp = ARB_zfopen(name, mode, cmode, error, false); \ |
---|
225 | \ |
---|
226 | if (fp) { \ |
---|
227 | TEST_EXPECT_NULL(error); \ |
---|
228 | error = ARB_zfclose(fp); \ |
---|
229 | } \ |
---|
230 | else { \ |
---|
231 | TEST_EXPECT_NULL(fp); \ |
---|
232 | } \ |
---|
233 | TEST_REJECT_NULL(error); \ |
---|
234 | TEST_EXPECT_CONTAINS(error, errpart); \ |
---|
235 | }while(0) |
---|
236 | |
---|
237 | void TEST_compressed_io() { |
---|
238 | const char *inText = "general/text.input"; |
---|
239 | const char *outFile = "compressed.out"; |
---|
240 | |
---|
241 | TEST_EXPECT_ZFOPEN_FAILS("", "", ZFILE_UNCOMPRESSED, "Invalid argument"); |
---|
242 | TEST_EXPECT_ZFOPEN_FAILS(outFile, "a", ZFILE_UNCOMPRESSED, "Cannot append to file using ARB_zfopen"); |
---|
243 | TEST_EXPECT_ZFOPEN_FAILS(outFile, "r", ZFILE_UNDEFINED, "Invalid compression mode"); |
---|
244 | TEST_EXPECT_ZFOPEN_FAILS(outFile, "w", ZFILE_AUTODETECT, "only works for input files"); |
---|
245 | TEST_EXPECT_ZFOPEN_FAILS(outFile, "rt", ZFILE_AUTODETECT, "Cannot use textmode"); |
---|
246 | TEST_EXPECT_ZFOPEN_FAILS(outFile, "r+", ZFILE_AUTODETECT, "Cannot open file in read and write mode"); |
---|
247 | |
---|
248 | #if defined(USE_BROKEN_COMPRESSION) |
---|
249 | TEST_EXPECT_ZFOPEN_FAILS(outFile, "r", ZFILE_BROKEN, "broken pipe"); |
---|
250 | TEST_EXPECT_ZFOPEN_FAILS(outFile, "w", ZFILE_BROKEN, "broken pipe"); |
---|
251 | #endif |
---|
252 | |
---|
253 | char *testText; |
---|
254 | const size_t TEST_TEXT_SIZE = 428; |
---|
255 | { |
---|
256 | GB_ERROR error = NULp; |
---|
257 | FILE *in = ARB_zfopen(inText, "r", ZFILE_UNCOMPRESSED, error, false); |
---|
258 | TEST_EXPECT_NULL(error); |
---|
259 | TEST_REJECT_NULL(in); |
---|
260 | |
---|
261 | size_t bytes_read; |
---|
262 | testText = fileContent(in, bytes_read); |
---|
263 | TEST_EXPECT_EQUAL(bytes_read, TEST_TEXT_SIZE); |
---|
264 | |
---|
265 | TEST_EXPECT_NO_ERROR(ARB_zfclose(in)); |
---|
266 | } |
---|
267 | |
---|
268 | int successful_compressions = 0; |
---|
269 | |
---|
270 | for (FileCompressionMode cmode = FileCompressionMode(ZFILE_AUTODETECT+1); |
---|
271 | cmode != ZFILE_UNDEFINED; |
---|
272 | cmode = FileCompressionMode(cmode+1)) |
---|
273 | { |
---|
274 | TEST_ANNOTATE(GBS_global_string("cmode=%i", int(cmode))); |
---|
275 | |
---|
276 | bool compressed_save_failed = false; |
---|
277 | { |
---|
278 | GB_ERROR error = NULp; |
---|
279 | FILE *out = ARB_zfopen(outFile, "w", cmode, error, false); |
---|
280 | |
---|
281 | TEST_EXPECT_NO_ERROR(error); |
---|
282 | TEST_REJECT_NULL(out); |
---|
283 | |
---|
284 | TEST_EXPECT_DIFFERENT(EOF, fputs(testText, out)); |
---|
285 | |
---|
286 | error = ARB_zfclose(out); |
---|
287 | if (error && strstr(error, "failed with exitcode=127") && cmode != ZFILE_UNCOMPRESSED) { |
---|
288 | // assume compression utility is not installed |
---|
289 | compressed_save_failed = true; |
---|
290 | } |
---|
291 | else { |
---|
292 | TEST_EXPECT_NO_ERROR(error); |
---|
293 | } |
---|
294 | } |
---|
295 | |
---|
296 | if (!compressed_save_failed) { |
---|
297 | for (int detect = 0; detect<=1; ++detect) { |
---|
298 | TEST_ANNOTATE(GBS_global_string("cmode=%i detect=%i", int(cmode), detect)); |
---|
299 | |
---|
300 | GB_ERROR error = NULp; |
---|
301 | FILE *in = ARB_zfopen(outFile, "r", detect ? ZFILE_AUTODETECT : cmode, error, false); |
---|
302 | |
---|
303 | TEST_REJECT(error); |
---|
304 | TEST_REJECT_NULL(in); |
---|
305 | |
---|
306 | size_t bytes_read; |
---|
307 | char *content = fileContent(in, bytes_read); |
---|
308 | TEST_EXPECT_NO_ERROR(ARB_zfclose(in)); |
---|
309 | TEST_EXPECT_EQUAL(content, testText); // if this fails for detect==1 -> detection does not work |
---|
310 | free(content); |
---|
311 | } |
---|
312 | successful_compressions++; |
---|
313 | } |
---|
314 | } |
---|
315 | |
---|
316 | TEST_EXPECT(successful_compressions>=3); // at least ZFILE_UNCOMPRESSED, ZFILE_GZIP and ZFILE_BZIP should succeed |
---|
317 | |
---|
318 | free(testText); |
---|
319 | TEST_EXPECT_DIFFERENT(GB_unlink(outFile), -1); |
---|
320 | } |
---|
321 | |
---|
322 | #endif // UNIT_TESTS |
---|
323 | |
---|
324 | // -------------------------------------------------------------------------------- |
---|
325 | |
---|