1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 #ifndef INCLUDE_sys_hashsig_h__
8 #define INCLUDE_sys_hashsig_h__
9 
10 #include "git2/common.h"
11 
12 GIT_BEGIN_DECL
13 
14 /**
15  * Similarity signature of arbitrary text content based on line hashes
16  */
17 typedef struct git_hashsig git_hashsig;
18 
19 /**
20  * Options for hashsig computation
21  *
22  * The options GIT_HASHSIG_NORMAL, GIT_HASHSIG_IGNORE_WHITESPACE,
23  * GIT_HASHSIG_SMART_WHITESPACE are exclusive and should not be combined.
24  */
25 typedef enum {
26 	/**
27 	 * Use all data
28 	 */
29 	GIT_HASHSIG_NORMAL = 0,
30 
31 	/**
32 	 * Ignore whitespace
33 	 */
34 	GIT_HASHSIG_IGNORE_WHITESPACE = (1 << 0),
35 
36 	/**
37 	 * Ignore \r and all space after \n
38 	 */
39 	GIT_HASHSIG_SMART_WHITESPACE = (1 << 1),
40 
41 	/**
42 	 * Allow hashing of small files
43 	 */
44 	GIT_HASHSIG_ALLOW_SMALL_FILES = (1 << 2)
45 } git_hashsig_option_t;
46 
47 /**
48  * Compute a similarity signature for a text buffer
49  *
50  * If you have passed the option GIT_HASHSIG_IGNORE_WHITESPACE, then the
51  * whitespace will be removed from the buffer while it is being processed,
52  * modifying the buffer in place. Sorry about that!
53  *
54  * @param out The computed similarity signature.
55  * @param buf The input buffer.
56  * @param buflen The input buffer size.
57  * @param opts The signature computation options (see above).
58  * @return 0 on success, GIT_EBUFS if the buffer doesn't contain enough data to
59  * compute a valid signature (unless GIT_HASHSIG_ALLOW_SMALL_FILES is set), or
60  * error code.
61  */
62 GIT_EXTERN(int) git_hashsig_create(
63 	git_hashsig **out,
64 	const char *buf,
65 	size_t buflen,
66 	git_hashsig_option_t opts);
67 
68 /**
69  * Compute a similarity signature for a text file
70  *
71  * This walks through the file, only loading a maximum of 4K of file data at
72  * a time. Otherwise, it acts just like `git_hashsig_create`.
73  *
74  * @param out The computed similarity signature.
75  * @param path The path to the input file.
76  * @param opts The signature computation options (see above).
77  * @return 0 on success, GIT_EBUFS if the buffer doesn't contain enough data to
78  * compute a valid signature (unless GIT_HASHSIG_ALLOW_SMALL_FILES is set), or
79  * error code.
80  */
81 GIT_EXTERN(int) git_hashsig_create_fromfile(
82 	git_hashsig **out,
83 	const char *path,
84 	git_hashsig_option_t opts);
85 
86 /**
87  * Release memory for a content similarity signature
88  *
89  * @param sig The similarity signature to free.
90  */
91 GIT_EXTERN(void) git_hashsig_free(git_hashsig *sig);
92 
93 /**
94  * Measure similarity score between two similarity signatures
95  *
96  * @param a The first similarity signature to compare.
97  * @param b The second similarity signature to compare.
98  * @return [0 to 100] on success as the similarity score, or error code.
99  */
100 GIT_EXTERN(int) git_hashsig_compare(
101 	const git_hashsig *a,
102 	const git_hashsig *b);
103 
104 GIT_END_DECL
105 
106 #endif
107