X-Git-Url: http://www.sigaev.ru/git/gitweb.cgi?p=hstore.git;a=blobdiff_plain;f=hstore_compat.c;fp=hstore_compat.c;h=563456b623750df5c8e5ebee5584b55a18738ff7;hp=0000000000000000000000000000000000000000;hb=2d3cb5062568eab105ed554350ac99bae76ee0ec;hpb=77af220c462dd61507d6cca9b9f54ad3e102e1b6 diff --git a/hstore_compat.c b/hstore_compat.c new file mode 100644 index 0000000..563456b --- /dev/null +++ b/hstore_compat.c @@ -0,0 +1,449 @@ +/* + * contrib/hstore/hstore_compat.c + +Current nested hstore layout (see historical notes below): + +Hstore consists of varlena header, 4 bits (special flags), 28 bits for +nelems/npairs, array of hentries and element array. Special flags are +HS_FLAG_NEWVERSION, HS_FLAG_ARRAY, HS_FLAG_HASH, HS_FLAG_SCALAR. Hentry +starts from 4 special bits: 1st bit (ISFIRST) is set if there is no previous +entry, next 3 bits are used to indicate type of hstore value (ISSTRING, +ISNUMERIC, ISNEST, ISNULL, ISBOOL). Other 28 bits are ending position of +hentry value relative to the beginning of element array. + +Hash key-value accessed as follow: +first key starts from zero and ends at Hentry[0],i-th key starts from +Hentry[i*2-1] and ends at Hentry[i*2], i-th value starts from +align(HEntry[i*2]) and ends at HEntry[i*2 + 1]. Key-Value pairs are +lexicographically ordered by keys. + +Array's first element starts from zero and ends at Hentry[0], and +i-th element starts from align(HEntry[i - 1]) and ends at HEntry[i]. Elements +are not ordered. + +Notes: +Scalar stored as a single-element array. + +Limitations: +Number of elements in array: 2^28 +Number of pairs in hash: 2^28 +Length of string: 2^28 bytes +Length of nested hash or array: 2^28 bytes + + * + * Notes on old/new hstore format disambiguation. + * + * There are three formats to consider: + * 1) old contrib/hstore (referred to as hstore-old) + * 2) prerelease pgfoundry hstore + * 3) new contrib/hstore (v2) + * 4) nested contrib/hstore (v3) + * + * (3) and (4) are upward binary compatible. + * (2) and (3) are identical except for the HS_FLAG_NEWVERSION + * bit, which is set in (3) but not (2). + * + * Values that are already in format (3), or which are + * unambiguously in format (2), are handled by the first + * "return immediately" test in hstoreUpgrade(). + * + * To stress a point: we ONLY get here with possibly-ambiguous + * values if we're doing some sort of in-place migration from an + * old prerelease pgfoundry hstore-new; and we explicitly don't + * support that without fixing up any potentially padded values + * first. Most of the code here is serious overkill, but the + * performance penalty isn't serious (especially compared to the + * palloc() that we have to do anyway) and the belt-and-braces + * validity checks provide some reassurance. (If for some reason + * we get a value that would have worked on the old code, but + * which would be botched by the conversion code, the validity + * checks will fail it first so we get an error rather than bad + * data.) + * + * Note also that empty hstores are the same in (2) and (3), so + * there are some special-case paths for them. + * + * We tell the difference between formats (2) and (3) as follows (but + * note that there are some edge cases where we can't tell; see + * comments in hstoreUpgrade): + * + * First, since there must be at least one entry, we look at + * how the bits line up. The new format looks like: + * + * 10kkkkkkkkkkkkkkkkkkkkkkkkkkkkkk (k..k = keylen) + * 0nvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv (v..v = keylen+vallen) + * + * The old format looks like one of these, depending on endianness + * and bitfield layout: (k..k = keylen, v..v = vallen, p..p = pos, + * n = isnull) + * + * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv + * nppppppppppppppppppppppppppppppp + * + * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv + * pppppppppppppppppppppppppppppppn + * + * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk + * nppppppppppppppppppppppppppppppp + * + * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk + * pppppppppppppppppppppppppppppppn (usual i386 format) + * + * If the entry is in old format, for the first entry "pos" must be 0. + * We can obviously see that either keylen or vallen must be >32768 + * for there to be any ambiguity (which is why lengths less than that + * are fasttracked in hstore.h) Since "pos"==0, the "v" field in the + * new-format interpretation can only be 0 or 1, which constrains all + * but three bits of the old-format's k and v fields. But in addition + * to all of this, the data length implied by the keylen and vallen + * must fit in the varlena size. So the only ambiguous edge case for + * hstores with only one entry occurs between a new-format entry with + * an excess (~32k) of padding, and an old-format entry. But we know + * which format to use in that case based on how we were compiled, so + * no actual data corruption can occur. + * + * If there is more than one entry, the requirement that keys do not + * decrease in length, and that positions increase contiguously, and + * that the end of the data not be beyond the end of the varlena + * itself, disambiguates in almost all other cases. There is a small + * set of ambiguous cases which could occur if the old-format value + * has a large excess of padding and just the right pattern of key + * sizes, but these are also handled based on how we were compiled. + * + * The otherwise undocumented function hstore_version_diag is provided + * for testing purposes. + */ +#include "postgres.h" + + +#include "hstore.h" + +/* + * This is the structure used for entries in the old contrib/hstore + * implementation. Notice that this is the same size as the new entry + * (two 32-bit words per key/value pair) and that the header is the + * same, so the old and new versions of ARRPTR, STRPTR, CALCDATASIZE + * etc. are compatible. + * + * If the above statement isn't true on some bizarre platform, we're + * a bit hosed (see StaticAssertStmt in hstoreValidOldFormat). + */ +typedef struct +{ + uint16 keylen; + uint16 vallen; + uint32 + valisnull:1, + pos:31; +} HOldEntry; + +/* + * New Old version (new not-nested version of hstore, v2 version) + * V2 and v3 (nested) are upward binary compatible. But + * framework was fully changed. Keep here old definitions (v2) + */ + + +typedef struct +{ + int32 vl_len_; /* varlena header (do not touch directly!) */ + uint32 size_; /* flags and number of items in hstore */ + /* array of HEntry follows */ +} HStoreV2; + +static int hstoreValidNewFormat(HStoreV2 *hs); +static int hstoreValidOldFormat(HStoreV2 *hs); + +#define HS_COUNT(hsp_) (HS_ISEMPTY(hsp_) ? 0 : ((hsp_)->size_ & HS_COUNT_MASK)) +#define HS_SETCOUNT(hsp_,c_) ((hsp_)->size_ = (c_) | HS_FLAG_NEWVERSION | ((hsp_)->size_ & ~HS_COUNT_MASK)) + +/* + * "x" comes from an existing HS_COUNT() (as discussed, <= INT_MAX/24) or a + * Pairs array length (due to MaxAllocSize, <= INT_MAX/40). "lenstr" is no + * more than INT_MAX, that extreme case arising in hstore_from_arrays(). + * Therefore, this calculation is limited to about INT_MAX / 5 + INT_MAX. + */ +#define HSHRDSIZE (sizeof(HStoreV2)) +#define CALCDATASIZE(x, lenstr) ( (x) * 2 * sizeof(HEntry) + HSHRDSIZE + (lenstr) ) +/* note multiple evaluations of x */ +#define ARRPTR(x) ( (HEntry*) ( (HStoreV2*)(x) + 1 ) ) +#define STRPTR(x) ( (char*)(ARRPTR(x) + HS_ROOT_COUNT((HStoreV2*)(x)) * 2) ) + +/* note multiple/non evaluations */ +#define HS_KEYLEN(arr_,i_) (HSE_LEN((arr_)[2*(i_)])) + +/* ensure the varlena size of an existing hstore is correct */ +#define HS_FIXSIZE(hsp_,count_) \ + do { \ + int bl = (count_) ? HSE_ENDPOS(ARRPTR(hsp_)[2*(count_)-1]) : 0; \ + SET_VARSIZE((hsp_), CALCDATASIZE((count_),bl)); \ + } while (0) + +/* + * Validity test for a new-format hstore. + * 0 = not valid + * 1 = valid but with "slop" in the length + * 2 = exactly valid + */ +static int +hstoreValidNewFormat(HStoreV2 *hs) +{ + int count = HS_COUNT(hs); + HEntry *entries = ARRPTR(hs); + int buflen = (count) ? HSE_ENDPOS(entries[2 * (count) - 1]) : 0; + int vsize = CALCDATASIZE(count, buflen); + int i; + + if (hs->size_ & HS_FLAG_NEWVERSION) + return 2; + + if (count == 0) + return 2; + + if (!HSE_ISFIRST(entries[0])) + return 0; + + if (vsize > VARSIZE(hs)) + return 0; + + /* entry position must be nondecreasing */ + + for (i = 1; i < 2 * count; ++i) + { + if (HSE_ISFIRST(entries[i]) + || (HSE_ENDPOS(entries[i]) < HSE_ENDPOS(entries[i - 1]))) + return 0; + } + + /* key length must be nondecreasing and keys must not be null */ + + for (i = 1; i < count; ++i) + { + if (HS_KEYLEN(entries, i) < HS_KEYLEN(entries, i - 1)) + return 0; + if (HSE_ISNULL(entries[2 * i])) + return 0; + } + + if (vsize != VARSIZE(hs)) + return 1; + + return 2; +} + +/* + * Validity test for an old-format hstore. + * 0 = not valid + * 1 = valid but with "slop" in the length + * 2 = exactly valid + */ +static int +hstoreValidOldFormat(HStoreV2 *hs) +{ + int count = hs->size_; + HOldEntry *entries = (HOldEntry *) ARRPTR(hs); + int vsize; + int lastpos = 0; + int i; + + if (hs->size_ & HS_FLAG_NEWVERSION) + return 0; + + /* New format uses an HEntry for key and another for value */ + StaticAssertStmt(sizeof(HOldEntry) == 2 * sizeof(HEntry), + "old hstore format is not upward-compatible"); + + if (count == 0) + return 2; + + if (count > 0xFFFFFFF) + return 0; + + if (CALCDATASIZE(count, 0) > VARSIZE(hs)) + return 0; + + if (entries[0].pos != 0) + return 0; + + /* key length must be nondecreasing */ + + for (i = 1; i < count; ++i) + { + if (entries[i].keylen < entries[i - 1].keylen) + return 0; + } + + /* + * entry position must be strictly increasing, except for the first entry + * (which can be ""=>"" and thus zero-length); and all entries must be + * properly contiguous + */ + + for (i = 0; i < count; ++i) + { + if (entries[i].pos != lastpos) + return 0; + lastpos += (entries[i].keylen + + ((entries[i].valisnull) ? 0 : entries[i].vallen)); + } + + vsize = CALCDATASIZE(count, lastpos); + + if (vsize > VARSIZE(hs)) + return 0; + + if (vsize != VARSIZE(hs)) + return 1; + + return 2; +} + + +/* + * hstoreUpgrade: PG_DETOAST_DATUM plus support for conversion of old hstores + */ +HStore * +hstoreUpgrade(Datum orig) +{ + HStoreV2 *hs = (HStoreV2 *) PG_DETOAST_DATUM(orig); + int valid_new; + int valid_old; + bool writable; + + /* Return immediately if no conversion needed */ + if (VARSIZE_ANY(hs) <= VARHDRSZ || + (hs->size_ & HS_FLAG_NEWVERSION) || + hs->size_ == 0 || + (VARSIZE(hs) < 32768 && HSE_ISFIRST((ARRPTR(hs)[0])))) + { + if (VARSIZE_ANY_EXHDR(hs) == sizeof(hs->size_)) + { + /* 'new' format but not nested. And empty */ + hs = palloc(sizeof(VARHDRSZ)); + SET_VARSIZE(hs, VARHDRSZ); + } + + return (HStore*)hs; + } + + valid_new = hstoreValidNewFormat(hs); + valid_old = hstoreValidOldFormat(hs); + /* Do we have a writable copy? */ + writable = ((void *) hs != (void *) DatumGetPointer(orig)); + + if (!valid_old || hs->size_ == 0) + { + if (valid_new) + { + /* + * force the "new version" flag and the correct varlena length, + * but only if we have a writable copy already (which we almost + * always will, since short new-format values won't come through + * here) + */ + if (writable) + { + HS_SETCOUNT(hs, HS_COUNT(hs)); + HS_FIXSIZE(hs, HS_COUNT(hs)); + } + return (HStore*)hs; + } + else + { + elog(ERROR, "invalid hstore value found"); + } + } + + /* + * this is the tricky edge case. It is only possible in some quite extreme + * cases (the hstore must have had a lot of wasted padding space at the + * end). But the only way a "new" hstore value could get here is if we're + * upgrading in place from a pre-release version of hstore-new (NOT + * contrib/hstore), so we work off the following assumptions: 1. If you're + * moving from old contrib/hstore to hstore-new, you're required to fix up + * any potential conflicts first, e.g. by running ALTER TABLE ... USING + * col::text::hstore; on all hstore columns before upgrading. 2. If you're + * moving from old contrib/hstore to new contrib/hstore, then "new" values + * are impossible here 3. If you're moving from pre-release hstore-new to + * hstore-new, then "old" values are impossible here 4. If you're moving + * from pre-release hstore-new to new contrib/hstore, you're not doing so + * as an in-place upgrade, so there is no issue So the upshot of all this + * is that we can treat all the edge cases as "new" if we're being built + * as hstore-new, and "old" if we're being built as contrib/hstore. + * + * XXX the WARNING can probably be downgraded to DEBUG1 once this has been + * beta-tested. But for now, it would be very useful to know if anyone can + * actually reach this case in a non-contrived setting. + */ + + if (valid_new) + { +#if HSTORE_IS_HSTORE_NEW + elog(WARNING, "ambiguous hstore value resolved as hstore-new"); + + /* + * force the "new version" flag and the correct varlena length, but + * only if we have a writable copy already (which we almost always + * will, since short new-format values won't come through here) + */ + if (writable) + { + HS_SETCOUNT(hs, HS_COUNT(hs)); + HS_FIXSIZE(hs, HS_COUNT(hs)); + } + return hs; +#else + elog(WARNING, "ambiguous hstore value resolved as hstore-old"); +#endif + } + + /* + * must have an old-style value. Overwrite it in place as a new-style one, + * making sure we have a writable copy first. + */ + + if (!writable) + hs = (HStoreV2 *) PG_DETOAST_DATUM_COPY(orig); + + { + int count = hs->size_; + HEntry *new_entries = ARRPTR(hs); + HOldEntry *old_entries = (HOldEntry *) ARRPTR(hs); + int i; + + for (i = 0; i < count; ++i) + { + uint32 pos = old_entries[i].pos; + uint32 keylen = old_entries[i].keylen; + uint32 vallen = old_entries[i].vallen; + bool isnull = old_entries[i].valisnull; + + if (isnull) + vallen = 0; + + new_entries[2 * i].entry = (pos + keylen) & HENTRY_POSMASK; + new_entries[2 * i + 1].entry = (((pos + keylen + vallen) & HENTRY_POSMASK) + | ((isnull) ? HENTRY_ISNULL : 0)); + } + + if (count) + new_entries[0].entry |= HENTRY_ISFIRST; + HS_SETCOUNT(hs, count); + HS_FIXSIZE(hs, count); + } + + return (HStore*)hs; +} + + +PG_FUNCTION_INFO_V1(hstore_version_diag); +Datum hstore_version_diag(PG_FUNCTION_ARGS); +Datum +hstore_version_diag(PG_FUNCTION_ARGS) +{ + HStoreV2 *hs = (HStoreV2 *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + int valid_new = hstoreValidNewFormat(hs); + int valid_old = hstoreValidOldFormat(hs); + + PG_RETURN_INT32(valid_old * 10 + valid_new); +}