qLibc
qhash.c
Go to the documentation of this file.
1 /******************************************************************************
2  * qLibc
3  *
4  * Copyright (c) 2010-2015 Seungyoung Kim.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  *****************************************************************************/
28 
29 /**
30  * @file qhash.c Hash APIs.
31  */
32 
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <stdbool.h>
36 #include <stdint.h>
37 #include <string.h>
38 #include <unistd.h>
39 #include <fcntl.h>
40 #include <errno.h>
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #include "md5/md5.h"
44 #include "qinternal.h"
45 #include "utilities/qhash.h"
46 
47 /**
48  * Calculate 128-bit(16-bytes) MD5 hash.
49  *
50  * @param data source object
51  * @param nbytes size of data
52  * @param retbuf user buffer. It must be at leat 16-bytes long.
53  *
54  * @return true if successful, otherwise false.
55  *
56  * @code
57  * // get MD5
58  * unsigned char md5hash[16];
59  * qhashmd5((void*)"hello", 5, md5hash);
60  *
61  * // hex encode
62  * char *md5ascii = qhex_encode(md5hash, 16);
63  * printf("Hex encoded MD5: %s\n", md5ascii);
64  * free(md5ascii);
65  * @endcode
66  */
67 bool qhashmd5(const void *data, size_t nbytes, void *retbuf) {
68  if (data == NULL || retbuf == NULL) {
69  errno = EINVAL;
70  return false;
71  }
72 
73  MD5_CTX context;
74  MD5Init(&context);
75  MD5Update(&context, (unsigned char *) data, (unsigned int) nbytes);
76  MD5Final(retbuf, &context);
77 
78  return true;
79 }
80 
81 /**
82  * Get 128-bit MD5 hash of a file contents.
83  *
84  * @param filepath file path
85  * @param offset start offset. Set to 0 to digest from beginning of file.
86  * @param nbytes number of bytes to digest. Set to 0 to digest until end
87  * of file.
88  * @param retbuf user buffer. It must be at leat 16-bytes long.
89  *
90  * @return true if successful, otherwise false.
91  *
92  * @code
93  * unsigned char md5hash[16];
94  * qhashmd5_file("/tmp/test.dat", 0, 0, md5hash);
95  * @endcode
96  */
97 bool qhashmd5_file(const char *filepath, off_t offset, ssize_t nbytes,
98  void *retbuf) {
99  if (filepath == NULL || offset < 0 || nbytes < 0 || retbuf == NULL) {
100  errno = EINVAL;
101  return false;
102  }
103 
104  int fd = open(filepath, O_RDONLY, 0);
105  if (fd < 0)
106  return false;
107 
108  struct stat st;
109  if (fstat(fd, &st) < 0)
110  return false;
111  size_t size = st.st_size;
112 
113  // check filesize
114  if (size < offset + nbytes) {
115  errno = EINVAL;
116  close(fd);
117  return false;
118  }
119 
120  // seek
121  if (offset > 0) {
122  if (lseek(fd, offset, SEEK_SET) != offset) {
123  close(fd);
124  return false;
125  }
126  }
127 
128  MD5_CTX context;
129  MD5Init(&context);
130  ssize_t toread, nread;
131  unsigned char buf[32 * 1024];
132  for (toread = nbytes; toread > 0; toread -= nread) {
133  if (toread > sizeof(buf))
134  nread = read(fd, buf, sizeof(buf));
135  else
136  nread = read(fd, buf, toread);
137  if (nread < 0)
138  break;
139  MD5Update(&context, buf, nread);
140  }
141  close(fd);
142  if (toread != 0)
143  return false;
144  MD5Final(retbuf, &context);
145 
146  return true;
147 }
148 
149 /**
150  * Get 32-bit FNV1 hash.
151  *
152  * @param data source data
153  * @param nbytes size of data
154  *
155  * @return 32-bit unsigned hash value.
156  *
157  * @code
158  * uint32_t hashval = qhashfnv1_32((void*)"hello", 5);
159  * @endcode
160  *
161  * @code
162  * Fowler/Noll/Vo hash
163  *
164  * The basis of this hash algorithm was taken from an idea sent as reviewer
165  * comments to the IEEE POSIX P1003.2 committee by:
166  *
167  * Phong Vo (http://www.research.att.com/info/kpv/)
168  * Glenn Fowler (http://www.research.att.com/~gsf/)
169  *
170  * In a subsequent ballot round:
171  *
172  * Landon Curt Noll (http://www.isthe.com/chongo/)
173  *
174  * improved on their algorithm. Some people tried this hash and found that
175  * it worked rather well. In an EMail message to Landon, they named it the
176  * ``Fowler/Noll/Vo'' or FNV hash.
177  *
178  * FNV hashes are designed to be fast while maintaining a low collision rate.
179  * The FNV speed allows one to quickly hash lots of data while maintaining
180  * a reasonable collision rate. See:
181  *
182  * http://www.isthe.com/chongo/tech/comp/fnv/index.html
183  *
184  * for more details as well as other forms of the FNV hash.
185  * @endcode
186  */
187 uint32_t qhashfnv1_32(const void *data, size_t nbytes) {
188  if (data == NULL || nbytes == 0)
189  return 0;
190 
191  unsigned char *dp;
192  uint32_t h = 0x811C9DC5;
193 
194  for (dp = (unsigned char *) data; *dp && nbytes > 0; dp++, nbytes--) {
195 #ifdef __GNUC__
196  h += (h<<1) + (h<<4) + (h<<7) + (h<<8) + (h<<24);
197 #else
198  h *= 0x01000193;
199 #endif
200  h ^= *dp;
201  }
202 
203  return h;
204 }
205 
206 /**
207  * Get 64-bit FNV1 hash integer.
208  *
209  * @param data source data
210  * @param nbytes size of data
211  *
212  * @return 64-bit unsigned hash value.
213  *
214  * @code
215  * uint64_t fnv64 = qhashfnv1_64((void*)"hello", 5);
216  * @endcode
217  */
218 uint64_t qhashfnv1_64(const void *data, size_t nbytes) {
219  if (data == NULL || nbytes == 0)
220  return 0;
221 
222  unsigned char *dp;
223  uint64_t h = 0xCBF29CE484222325ULL;
224 
225  for (dp = (unsigned char *) data; *dp && nbytes > 0; dp++, nbytes--) {
226 #ifdef __GNUC__
227  h += (h << 1) + (h << 4) + (h << 5) +
228  (h << 7) + (h << 8) + (h << 40);
229 #else
230  h *= 0x100000001B3ULL;
231 #endif
232  h ^= *dp;
233  }
234 
235  return h;
236 }
237 
238 /**
239  * Get 32-bit Murmur3 hash.
240  *
241  * @param data source data
242  * @param nbytes size of data
243  *
244  * @return 32-bit unsigned hash value.
245  *
246  * @code
247  * uint32_t hashval = qhashmurmur3_32((void*)"hello", 5);
248  * @endcode
249  *
250  * @code
251  * MurmurHash3 was created by Austin Appleby in 2008. The initial
252  * implementation was published in C++ and placed in the public.
253  * https://sites.google.com/site/murmurhash/
254  * Seungyoung Kim has ported its implementation into C language
255  * in 2012 and published it as a part of qLibc component.
256  * @endcode
257  */
258 uint32_t qhashmurmur3_32(const void *data, size_t nbytes) {
259  if (data == NULL || nbytes == 0)
260  return 0;
261 
262  const uint32_t c1 = 0xcc9e2d51;
263  const uint32_t c2 = 0x1b873593;
264 
265  const int nblocks = nbytes / 4;
266  const uint32_t *blocks = (const uint32_t *) (data);
267  const uint8_t *tail = (const uint8_t *) (data + (nblocks * 4));
268 
269  uint32_t h = 0;
270 
271  int i;
272  uint32_t k;
273  for (i = 0; i < nblocks; i++) {
274  k = blocks[i];
275 
276  k *= c1;
277  k = (k << 15) | (k >> (32 - 15));
278  k *= c2;
279 
280  h ^= k;
281  h = (h << 13) | (h >> (32 - 13));
282  h = (h * 5) + 0xe6546b64;
283  }
284 
285  k = 0;
286  switch (nbytes & 3) {
287  case 3:
288  k ^= tail[2] << 16;
289  case 2:
290  k ^= tail[1] << 8;
291  case 1:
292  k ^= tail[0];
293  k *= c1;
294  k = (k << 15) | (k >> (32 - 15));
295  k *= c2;
296  h ^= k;
297  };
298 
299  h ^= nbytes;
300 
301  h ^= h >> 16;
302  h *= 0x85ebca6b;
303  h ^= h >> 13;
304  h *= 0xc2b2ae35;
305  h ^= h >> 16;
306 
307  return h;
308 }
309 
310 /**
311  * Get 128-bit Murmur3 hash.
312  *
313  * @param data source data
314  * @param nbytes size of data
315  * @param retbuf user buffer. It must be at leat 16-bytes long.
316  *
317  * @return true if successful, otherwise false.
318  *
319  * @code
320  * // get 128-bit Murmur3 hash.
321  * unsigned char hash[16];
322  * qhashmurmur3_128((void*)"hello", 5, hash);
323  *
324  * // hex encode
325  * char *ascii = qhex_encode(hash, 16);
326  * printf("Hex encoded Murmur3: %s\n", ascii);
327  * free(ascii);
328  * @endcode
329  */
330 bool qhashmurmur3_128(const void *data, size_t nbytes, void *retbuf) {
331  if (data == NULL || nbytes == 0)
332  return false;
333 
334  const uint64_t c1 = 0x87c37b91114253d5ULL;
335  const uint64_t c2 = 0x4cf5ad432745937fULL;
336 
337  const int nblocks = nbytes / 16;
338  const uint64_t *blocks = (const uint64_t *) (data);
339  const uint8_t *tail = (const uint8_t *) (data + (nblocks * 16));
340 
341  uint64_t h1 = 0;
342  uint64_t h2 = 0;
343 
344  int i;
345  uint64_t k1, k2;
346  for (i = 0; i < nblocks; i++) {
347  k1 = blocks[i * 2 + 0];
348  k2 = blocks[i * 2 + 1];
349 
350  k1 *= c1;
351  k1 = (k1 << 31) | (k1 >> (64 - 31));
352  k1 *= c2;
353  h1 ^= k1;
354 
355  h1 = (h1 << 27) | (h1 >> (64 - 27));
356  h1 += h2;
357  h1 = h1 * 5 + 0x52dce729;
358 
359  k2 *= c2;
360  k2 = (k2 << 33) | (k2 >> (64 - 33));
361  k2 *= c1;
362  h2 ^= k2;
363 
364  h2 = (h2 << 31) | (h2 >> (64 - 31));
365  h2 += h1;
366  h2 = h2 * 5 + 0x38495ab5;
367  }
368 
369  k1 = k2 = 0;
370  switch (nbytes & 15) {
371  case 15:
372  k2 ^= (uint64_t)(tail[14]) << 48;
373  case 14:
374  k2 ^= (uint64_t)(tail[13]) << 40;
375  case 13:
376  k2 ^= (uint64_t)(tail[12]) << 32;
377  case 12:
378  k2 ^= (uint64_t)(tail[11]) << 24;
379  case 11:
380  k2 ^= (uint64_t)(tail[10]) << 16;
381  case 10:
382  k2 ^= (uint64_t)(tail[9]) << 8;
383  case 9:
384  k2 ^= (uint64_t)(tail[8]) << 0;
385  k2 *= c2;
386  k2 = (k2 << 33) | (k2 >> (64 - 33));
387  k2 *= c1;
388  h2 ^= k2;
389 
390  case 8:
391  k1 ^= (uint64_t)(tail[7]) << 56;
392  case 7:
393  k1 ^= (uint64_t)(tail[6]) << 48;
394  case 6:
395  k1 ^= (uint64_t)(tail[5]) << 40;
396  case 5:
397  k1 ^= (uint64_t)(tail[4]) << 32;
398  case 4:
399  k1 ^= (uint64_t)(tail[3]) << 24;
400  case 3:
401  k1 ^= (uint64_t)(tail[2]) << 16;
402  case 2:
403  k1 ^= (uint64_t)(tail[1]) << 8;
404  case 1:
405  k1 ^= (uint64_t)(tail[0]) << 0;
406  k1 *= c1;
407  k1 = (k1 << 31) | (k1 >> (64 - 31));
408  k1 *= c2;
409  h1 ^= k1;
410  };
411 
412  //----------
413  // finalization
414 
415  h1 ^= nbytes;
416  h2 ^= nbytes;
417 
418  h1 += h2;
419  h2 += h1;
420 
421  h1 ^= h1 >> 33;
422  h1 *= 0xff51afd7ed558ccdULL;
423  h1 ^= h1 >> 33;
424  h1 *= 0xc4ceb9fe1a85ec53ULL;
425  h1 ^= h1 >> 33;
426 
427  h2 ^= h2 >> 33;
428  h2 *= 0xff51afd7ed558ccdULL;
429  h2 ^= h2 >> 33;
430  h2 *= 0xc4ceb9fe1a85ec53ULL;
431  h2 ^= h2 >> 33;
432 
433  h1 += h2;
434  h2 += h1;
435 
436  ((uint64_t *) retbuf)[0] = h1;
437  ((uint64_t *) retbuf)[1] = h2;
438 
439  return true;
440 }
bool qhashmurmur3_128(const void *data, size_t nbytes, void *retbuf)
Get 128-bit Murmur3 hash.
Definition: qhash.c:330
bool qhashmd5(const void *data, size_t nbytes, void *retbuf)
Calculate 128-bit(16-bytes) MD5 hash.
Definition: qhash.c:67
uint32_t qhashmurmur3_32(const void *data, size_t nbytes)
Get 32-bit Murmur3 hash.
Definition: qhash.c:258
bool qhashmd5_file(const char *filepath, off_t offset, ssize_t nbytes, void *retbuf)
Get 128-bit MD5 hash of a file contents.
Definition: qhash.c:97
uint32_t qhashfnv1_32(const void *data, size_t nbytes)
Get 32-bit FNV1 hash.
Definition: qhash.c:187
uint64_t qhashfnv1_64(const void *data, size_t nbytes)
Get 64-bit FNV1 hash integer.
Definition: qhash.c:218