Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
b70831d
Chromaprint decoding
lalinsky Dec 26, 2021
a35b41d
Create basic acoustid_fingerprint type
lalinsky Dec 27, 2021
b9d0f37
Fix serialization
lalinsky Dec 27, 2021
aa2db71
Make it more clear not to touch the varlena header directly
lalinsky Dec 27, 2021
c374e56
Test the extension
lalinsky Dec 27, 2021
4b47550
Use pq_sendint
lalinsky Dec 27, 2021
ccd4c98
Merge branch 'main' of github.com:acoustid/pg_acoustid into next2
lalinsky Dec 27, 2021
90d94c1
Use `char vl_len_[4]`
lalinsky Dec 27, 2021
e87ef28
Implement acoustid_fingerprint_decode
lalinsky Dec 28, 2021
9a90d0a
Add the base64 code
lalinsky Dec 28, 2021
563e964
Fix header type
lalinsky Dec 28, 2021
909ad68
Add debug logging
lalinsky Dec 28, 2021
20f8086
More logging
lalinsky Dec 28, 2021
5a317cd
Add postgresql 13 and 14
lalinsky Dec 28, 2021
9cc93b4
Print bits
lalinsky Dec 28, 2021
2ad899b
Only run on pg10
lalinsky Dec 28, 2021
afa8572
More debugging
lalinsky Dec 28, 2021
88e93a8
Use palloc0 for base64 destination
lalinsky Dec 28, 2021
955cd3e
Use base64 decoder from chromaprint
lalinsky Dec 28, 2021
e204b4f
Use __builtin_popcount
lalinsky Dec 28, 2021
a697eee
Enable all supported PG versions
lalinsky Dec 28, 2021
51db45f
Remove urlsafe_base64.{c,h}
lalinsky Dec 28, 2021
2f9ba6c
Add acoustid_fingerprint_decode for bytea
lalinsky Dec 28, 2021
3be5a2e
Reformat code
lalinsky Dec 28, 2021
fa37a4f
Remove debug output
lalinsky Dec 28, 2021
4e69d06
Add fingerprint encoding
lalinsky Dec 28, 2021
e2c18ab
More tests
lalinsky Dec 28, 2021
dc4f1d2
Move the PackInt* functions into c files
lalinsky Dec 28, 2021
6141f2f
Move all the inline code to c files
lalinsky Dec 28, 2021
3cf194d
Add base64.c
lalinsky Dec 28, 2021
1431256
More tests specifically for acoustid_fingerprint_decode
lalinsky Dec 28, 2021
902ee36
Move fingerprint encoding into standalone file
lalinsky Dec 29, 2021
65da22f
Move all the shared definitions to acoustid.h
lalinsky Dec 29, 2021
4d3f895
Add note about running tests
lalinsky Dec 29, 2021
21cd943
Store fingerprints in encoded form
lalinsky Dec 30, 2021
afa420c
Add missing files
lalinsky Dec 30, 2021
c28fdc4
Add support for expanded array
lalinsky Dec 30, 2021
5f8a461
WIP
lalinsky Nov 13, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Language: Cpp
BasedOnStyle: LLVM
AllowShortFunctionsOnASingleLine: None
AlwaysBreakAfterDefinitionReturnType: true
BreakBeforeBinaryOperators: false
BreakBeforeTernaryOperators: false
BreakConstructorInitializersBeforeComma: true
ColumnLimit: 82
IndentCaseLabels: true
MaxEmptyLinesToKeep: 3
IndentWidth: 4
TabWidth: 4
UseTab: Always
BreakBeforeBraces: Allman
SpaceAfterCStyleCast: true
ForEachMacros: [ foreach ]
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ jobs:
test:
strategy:
matrix:
pg: [12, 11, 10]
pg: [14, 13, 12, 11, 10]
name: Test on PostgreSQL ${{ matrix.pg }}
runs-on: ubuntu-latest
container: pgxn/pgxn-tools
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
*.so
*.o
*.bc
/results
/regression.*
23 changes: 21 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,29 @@
MODULE_big = acoustid
OBJS = acoustid_compare.o
OBJS = \
acoustid.o \
acoustid_fingerprint_type.o \
acoustid_fingerprint_encoding.o \
acoustid_compare.o \
pack_int3_array.o \
pack_int5_array.o \
unpack_int3_array.o \
unpack_int5_array.o \
base64.o \
uint8_vector.o \
fingerprint.o \
encode_fingerprint.o

EXTENSION = acoustid
DATA = acoustid--1.0.sql acoustid--unpackaged--1.0.sql
DATA = acoustid--1.0.sql acoustid--1.0--1.1.sql acoustid--unpackaged--1.0.sql
PGFILEDESC = "acoustid - AcoustID utility functions"

REGRESS = \
acoustid_fingerprint_type \
acoustid_fingerprint_encode \
acoustid_fingerprint_encode_binary \
acoustid_fingerprint_decode \
acoustid_fingerprint_decode_binary

PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ AcoustID utility functions for PostgreSQL
You need the PostgresSQL development page to build this:

```sh
sudo apt-get install postgresql-server-dev-9.1
sudo apt-get install postgresql-server-dev-12
```

If you have multiple PostgreSQL versions you need to specify which version to
build against by setting the `PG_CONFIG` variable:

```sh
export PG_CONFIG=/usr/lib/postgresql/9.1/bin/pg_config
export PG_CONFIG=/usr/lib/postgresql/12/bin/pg_config
```

Build and install the module:
Expand All @@ -21,6 +21,12 @@ make
sudo make install
```

Run tests:

```
sudo make installcheck PGUSER=postgres
```

Using in PostgreSQL:

```sql
Expand Down
43 changes: 43 additions & 0 deletions acoustid--1.0--1.1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
\echo Use "CREATE EXTENSION acoustid" to load this file. \quit

CREATE OR REPLACE FUNCTION acoustid_fingerprint_type_in(cstring)
RETURNS acoustid_fingerprint
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;

CREATE OR REPLACE FUNCTION acoustid_fingerprint_type_out(acoustid_fingerprint)
RETURNS cstring
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;

CREATE OR REPLACE FUNCTION acoustid_fingerprint_decode(text)
RETURNS acoustid_fingerprint
AS 'MODULE_PATHNAME', 'acoustid_fingerprint_decode_from_text'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;

CREATE OR REPLACE FUNCTION acoustid_fingerprint_decode_binary(bytea)
RETURNS acoustid_fingerprint
AS 'MODULE_PATHNAME', 'acoustid_fingerprint_decode_from_bytea'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;

CREATE OR REPLACE FUNCTION acoustid_fingerprint_encode(acoustid_fingerprint)
RETURNS text
AS 'MODULE_PATHNAME', 'acoustid_fingerprint_encode_to_text'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;

CREATE OR REPLACE FUNCTION acoustid_fingerprint_encode_binary(acoustid_fingerprint)
RETURNS bytea
AS 'MODULE_PATHNAME', 'acoustid_fingerprint_encode_to_bytea'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;

-- CREATE OR REPLACE FUNCTION acoustid_fingerprint(int4, int4[])
-- RETURNS acoustid_fingerprint
-- AS 'MODULE_PATHNAME', 'acoustid_fingerprint_from_int4array'
-- LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;

CREATE TYPE acoustid_fingerprint (
INPUT = acoustid_fingerprint_type_in,
OUTPUT = acoustid_fingerprint_type_out,
INTERNALLENGTH = VARIABLE,
STORAGE = EXTERNAL
);
41 changes: 41 additions & 0 deletions acoustid.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/* acoustid.c */

#include "acoustid.h"


#include "utils/array.h"
#include "catalog/pg_type.h"

PG_MODULE_MAGIC;

/*
PG_FUNCTION_INFO_V1(acoustid_fingerprint_from_int4array);

Datum acoustid_fingerprint_from_int4array(PG_FUNCTION_ARGS) {
int version = PG_GETARG_INT32(0);
ArrayType *input = PG_GETARG_ARRAYTYPE_P(1);

size_t num_terms;
uint32_t *terms;

if (ARR_ELEMTYPE(input) != INT4OID) {
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("array must contain int4 values")));
}

if (ARR_NDIM(input) != 1) {
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("array must be one-dimensional")));
}

if (ARR_HASNULL(input)) {
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("array must not contain nulls")));
}

num_terms = ARR_DIMS(input)[0];
terms = (uint32_t *)ARR_DATA_PTR(input);

PG_RETURN_FINGERPRINT_P(create_fingerprint(version, num_terms, terms));
}
*/
2 changes: 1 addition & 1 deletion acoustid.control
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# acoustid extension
comment = 'AcoustID utility functions'
default_version = '1.0'
default_version = '1.1'
module_pathname = '$libdir/acoustid'
relocatable = true
91 changes: 91 additions & 0 deletions acoustid.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#ifndef PG_ACOUSTID_H_
#define PG_ACOUSTID_H_

#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <memory.h>

#ifndef NO_PG_EXTENSION
#include "postgres.h"
#include "fmgr.h"
#include "utils/expandeddatum.h"
#include "utils/memutils.h"
#include "libpq/pqformat.h"
#else
#include "mock_postgres.h"
#endif

#include "fingerprint.h"

/*
* Packed fingerprint, implemented as PostgreSQL varlena object.
*
* The first 4 bytes contain the total object size in bytes. Be sure to use
* VARSIZE() and SET_VARSIZE() to access it.
*
* The remaining bytes contain the fingerprint data.
*/
typedef struct {
/* varlena header (do not touch directly!) */
char vl_len_[4];

/* fingerprint data */
uint8_t data[FLEXIBLE_ARRAY_MEMBER];
} FingerprintType;

/*
* An expanded fingerprint is contained within a private memory context (as
* all expanded objects must be) and has a control structure as below.
*
* The expanded fingerprint might contain a regular packed fingerprint if
* that was the original input. Otherwise, it contains the fingerprint data
* as an array of uint32_t values.
*/
#define EF_MAGIC 922322027

typedef struct ExpandedFingerprintHeader {
/* Standard header for expanded objects */
ExpandedObjectHeader hdr;

/* Magic number to detect corruption */
uint32_t ef_magic;

/*
* flat_size is the current space requirement for the flat equivalent of
* the expanded array, if known; otherwise it's 0. We store this to make
* consecutive calls of get_flat_size cheap.
*/
Size flat_size;

/*
* fdata points to the flat representation if it is valid, else it is
* NULL.
*/
FingerprintType *fdata;

/*
* data is the expanded fingerprint data.
*/
FingerprintData *data;
} ExpandedFingerprintHeader;

typedef union AnyFingerprintType {
FingerprintType fp;
ExpandedFingerprintHeader efp;
} AnyFingerprintType;

#define PG_GETARG_FINGERPRINT_P(x) ((FingerprintType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(x)))
#define PG_RETURN_FINGERPRINT_P(x) PG_RETURN_POINTER(x)

Datum make_expanded_fingerprint(FingerprintData *data, MemoryContext parentcontext);

Datum acoustid_fingerprint_type_in(PG_FUNCTION_ARGS);
Datum acoustid_fingerprint_type_out(PG_FUNCTION_ARGS);

Datum acoustid_fingerprint_encode_to_text(PG_FUNCTION_ARGS);
Datum acoustid_fingerprint_encode_to_bytea(PG_FUNCTION_ARGS);
Datum acoustid_fingerprint_decode_from_text(PG_FUNCTION_ARGS);
Datum acoustid_fingerprint_decode_from_bytea(PG_FUNCTION_ARGS);

#endif // PG_ACOUSTID_H_
59 changes: 19 additions & 40 deletions acoustid_compare.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,18 @@
#include "catalog/pg_type.h"
#include "popcount.h"

PG_FUNCTION_INFO_V1(acoustid_compare);
Datum acoustid_compare(PG_FUNCTION_ARGS);

PG_FUNCTION_INFO_V1(acoustid_compare2);
Datum acoustid_compare2(PG_FUNCTION_ARGS);

PG_FUNCTION_INFO_V1(acoustid_compare3);
Datum acoustid_compare3(PG_FUNCTION_ARGS);

PG_FUNCTION_INFO_V1(acoustid_extract_query);
Datum acoustid_extract_query(PG_FUNCTION_ARGS);

/* fingerprint matcher settings */
#define ACOUSTID_MAX_BIT_ERROR 2
#define ACOUSTID_MAX_ALIGN_OFFSET 120
Expand All @@ -24,20 +36,6 @@
#define UNIQ_MASK ((1 << MATCH_BITS) - 1)
#define UNIQ_STRIP(x) ((uint32_t)(x) >> (32 - MATCH_BITS))

PG_MODULE_MAGIC;

PG_FUNCTION_INFO_V1(acoustid_compare);
Datum acoustid_compare(PG_FUNCTION_ARGS);

PG_FUNCTION_INFO_V1(acoustid_compare2);
Datum acoustid_compare2(PG_FUNCTION_ARGS);

PG_FUNCTION_INFO_V1(acoustid_compare3);
Datum acoustid_compare3(PG_FUNCTION_ARGS);

PG_FUNCTION_INFO_V1(acoustid_extract_query);
Datum acoustid_extract_query(PG_FUNCTION_ARGS);

/* dimension of array */
#define NDIM 1

Expand All @@ -62,29 +60,10 @@ Datum acoustid_extract_query(PG_FUNCTION_ARGS);

#define ARRISVOID(x) ((x) == NULL || ARRNELEMS(x) == 0)

/* From http://en.wikipedia.org/wiki/Hamming_weight */

const uint64_t m1 = 0x5555555555555555ULL; /* binary: 0101... */
const uint64_t m2 = 0x3333333333333333ULL; /* binary: 00110011.. */
const uint64_t m4 = 0x0f0f0f0f0f0f0f0fULL; /* binary: 4 zeros, 4 ones ... */
const uint64_t m8 = 0x00ff00ff00ff00ffULL; /* binary: 8 zeros, 8 ones ... */
const uint64_t m16 = 0x0000ffff0000ffffULL; /* binary: 16 zeros, 16 ones ... */
const uint64_t m32 = 0x00000000ffffffffULL; /* binary: 32 zeros, 32 ones */
const uint64_t hff = 0xffffffffffffffffULL; /* binary: all ones */
const uint64_t h01 = 0x0101010101010101ULL; /* the sum of 256 to the power of 0,1,2,3... */

inline static int
popcount_3(uint64_t x)
{
x -= (x >> 1) & m1; /* put count of each 2 bits into those 2 bits */
x = (x & m2) + ((x >> 2) & m2); /* put count of each 4 bits into those 4 bits */
x = (x + (x >> 4)) & m4; /* put count of each 8 bits into those 8 bits */
return (x * h01) >> 56; /* returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ... */
}

#define BITCOUNT(x) popcount_lookup8(x)
#define BITCOUNT64(x) popcount_3(x)

Datum acoustid_compare(PG_FUNCTION_ARGS);
Datum acoustid_compare2(PG_FUNCTION_ARGS);
Datum acoustid_compare3(PG_FUNCTION_ARGS);
Datum acoustid_extract_query(PG_FUNCTION_ARGS);

static float4
match_fingerprints(int32 *a, int asize, int32 *b, int bsize)
Expand All @@ -97,7 +76,7 @@ match_fingerprints(int32 *a, int asize, int32 *b, int bsize)
int jbegin = Max(0, i - ACOUSTID_MAX_ALIGN_OFFSET);
int jend = Min(bsize, i + ACOUSTID_MAX_ALIGN_OFFSET);
for (j = jbegin; j < jend; j++) {
int biterror = BITCOUNT(a[i] ^ b[j]);
int biterror = POPCOUNT(a[i] ^ b[j]);
/* ereport(DEBUG5, (errmsg("comparing %d and %d with error %d", i, j, biterror))); */
if (biterror <= ACOUSTID_MAX_BIT_ERROR) {
int offset = i - j + bsize;
Expand Down Expand Up @@ -208,7 +187,7 @@ match_fingerprints2(int32 *a, int asize, int32 *b, int bsize, int maxoffset)
bdata = (uint64_t *)b;
biterror = 0;
for (i = 0; i < size; i++, adata++, bdata++) {
biterror += BITCOUNT64(*adata ^ *bdata);
biterror += POPCOUNT64(*adata ^ *bdata);
}
score = (size * 2.0 / minsize) * (1.0 - 2.0 * (float4)biterror / (64 * size));
if (score < 0.0) {
Expand Down Expand Up @@ -241,7 +220,7 @@ match_fingerprints3(int32 *a, int asize, int32 *b, int bsize, int maxoffset)
}
for (j = jbegin; j < jend; j++) {
int offset = i - j + bsize;
int biterror = BITCOUNT(a[i] ^ b[j]);
int biterror = POPCOUNT(a[i] ^ b[j]);
// Randomly selected blocks share around half their bits, so only count
// errors less than 16 bits
if (biterror < 16) {
Expand Down
Loading