2 * contrib/hstore/hstore_compat.c
4 Current nested hstore layout (see historical notes below):
6 Hstore consists of varlena header, 4 bits (special flags), 28 bits for
7 nelems/npairs, array of hentries and element array. Special flags are
8 HS_FLAG_NEWVERSION, HS_FLAG_ARRAY, HS_FLAG_HASH, HS_FLAG_SCALAR. Hentry
9 starts from 4 special bits: 1st bit (ISFIRST) is set if there is no previous
10 entry, next 3 bits are used to indicate type of hstore value (ISSTRING,
11 ISNUMERIC, ISNEST, ISNULL, ISBOOL). Other 28 bits are ending position of
12 hentry value relative to the beginning of element array.
14 Hash key-value accessed as follow:
15 first key starts from zero and ends at Hentry[0],i-th key starts from
16 Hentry[i*2-1] and ends at Hentry[i*2], i-th value starts from
17 align(HEntry[i*2]) and ends at HEntry[i*2 + 1]. Key-Value pairs are
18 lexicographically ordered by keys.
20 Array's first element starts from zero and ends at Hentry[0], and
21 i-th element starts from align(HEntry[i - 1]) and ends at HEntry[i]. Elements
25 Scalar stored as a single-element array.
28 Number of elements in array: 2^28
29 Number of pairs in hash: 2^28
30 Length of string: 2^28 bytes
31 Length of nested hash or array: 2^28 bytes
34 * Notes on old/new hstore format disambiguation.
36 * There are three formats to consider:
37 * 1) old contrib/hstore (referred to as hstore-old)
38 * 2) prerelease pgfoundry hstore
39 * 3) new contrib/hstore (v2)
40 * 4) nested contrib/hstore (v3)
42 * (3) and (4) are upward binary compatible.
43 * (2) and (3) are identical except for the HS_FLAG_NEWVERSION
44 * bit, which is set in (3) but not (2).
46 * Values that are already in format (3), or which are
47 * unambiguously in format (2), are handled by the first
48 * "return immediately" test in hstoreUpgrade().
50 * To stress a point: we ONLY get here with possibly-ambiguous
51 * values if we're doing some sort of in-place migration from an
52 * old prerelease pgfoundry hstore-new; and we explicitly don't
53 * support that without fixing up any potentially padded values
54 * first. Most of the code here is serious overkill, but the
55 * performance penalty isn't serious (especially compared to the
56 * palloc() that we have to do anyway) and the belt-and-braces
57 * validity checks provide some reassurance. (If for some reason
58 * we get a value that would have worked on the old code, but
59 * which would be botched by the conversion code, the validity
60 * checks will fail it first so we get an error rather than bad
63 * Note also that empty hstores are the same in (2) and (3), so
64 * there are some special-case paths for them.
66 * We tell the difference between formats (2) and (3) as follows (but
67 * note that there are some edge cases where we can't tell; see
68 * comments in hstoreUpgrade):
70 * First, since there must be at least one entry, we look at
71 * how the bits line up. The new format looks like:
73 * 10kkkkkkkkkkkkkkkkkkkkkkkkkkkkkk (k..k = keylen)
74 * 0nvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv (v..v = keylen+vallen)
76 * The old format looks like one of these, depending on endianness
77 * and bitfield layout: (k..k = keylen, v..v = vallen, p..p = pos,
80 * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
81 * nppppppppppppppppppppppppppppppp
83 * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
84 * pppppppppppppppppppppppppppppppn
86 * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
87 * nppppppppppppppppppppppppppppppp
89 * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
90 * pppppppppppppppppppppppppppppppn (usual i386 format)
92 * If the entry is in old format, for the first entry "pos" must be 0.
93 * We can obviously see that either keylen or vallen must be >32768
94 * for there to be any ambiguity (which is why lengths less than that
95 * are fasttracked in hstore.h) Since "pos"==0, the "v" field in the
96 * new-format interpretation can only be 0 or 1, which constrains all
97 * but three bits of the old-format's k and v fields. But in addition
98 * to all of this, the data length implied by the keylen and vallen
99 * must fit in the varlena size. So the only ambiguous edge case for
100 * hstores with only one entry occurs between a new-format entry with
101 * an excess (~32k) of padding, and an old-format entry. But we know
102 * which format to use in that case based on how we were compiled, so
103 * no actual data corruption can occur.
105 * If there is more than one entry, the requirement that keys do not
106 * decrease in length, and that positions increase contiguously, and
107 * that the end of the data not be beyond the end of the varlena
108 * itself, disambiguates in almost all other cases. There is a small
109 * set of ambiguous cases which could occur if the old-format value
110 * has a large excess of padding and just the right pattern of key
111 * sizes, but these are also handled based on how we were compiled.
113 * The otherwise undocumented function hstore_version_diag is provided
114 * for testing purposes.
116 #include "postgres.h"
122 * This is the structure used for entries in the old contrib/hstore
123 * implementation. Notice that this is the same size as the new entry
124 * (two 32-bit words per key/value pair) and that the header is the
125 * same, so the old and new versions of ARRPTR, STRPTR, CALCDATASIZE
126 * etc. are compatible.
128 * If the above statement isn't true on some bizarre platform, we're
129 * a bit hosed (see StaticAssertStmt in hstoreValidOldFormat).
141 * New Old version (new not-nested version of hstore, v2 version)
142 * V2 and v3 (nested) are upward binary compatible. But
143 * framework was fully changed. Keep here old definitions (v2)
149 int32 vl_len_; /* varlena header (do not touch directly!) */
150 uint32 size_; /* flags and number of items in hstore */
151 /* array of HEntry follows */
154 static int hstoreValidNewFormat(HStoreV2 *hs);
155 static int hstoreValidOldFormat(HStoreV2 *hs);
157 #define HS_COUNT(hsp_) (HS_ISEMPTY(hsp_) ? 0 : ((hsp_)->size_ & HS_COUNT_MASK))
158 #define HS_SETCOUNT(hsp_,c_) ((hsp_)->size_ = (c_) | HS_FLAG_NEWVERSION | ((hsp_)->size_ & ~HS_COUNT_MASK))
161 * "x" comes from an existing HS_COUNT() (as discussed, <= INT_MAX/24) or a
162 * Pairs array length (due to MaxAllocSize, <= INT_MAX/40). "lenstr" is no
163 * more than INT_MAX, that extreme case arising in hstore_from_arrays().
164 * Therefore, this calculation is limited to about INT_MAX / 5 + INT_MAX.
166 #define HSHRDSIZE (sizeof(HStoreV2))
167 #define CALCDATASIZE(x, lenstr) ( (x) * 2 * sizeof(HEntry) + HSHRDSIZE + (lenstr) )
168 /* note multiple evaluations of x */
169 #define ARRPTR(x) ( (HEntry*) ( (HStoreV2*)(x) + 1 ) )
170 #define STRPTR(x) ( (char*)(ARRPTR(x) + HS_ROOT_COUNT((HStoreV2*)(x)) * 2) )
172 /* note multiple/non evaluations */
173 #define HS_KEYLEN(arr_,i_) (HSE_LEN((arr_)[2*(i_)]))
175 /* ensure the varlena size of an existing hstore is correct */
176 #define HS_FIXSIZE(hsp_,count_) \
178 int bl = (count_) ? HSE_ENDPOS(ARRPTR(hsp_)[2*(count_)-1]) : 0; \
179 SET_VARSIZE((hsp_), CALCDATASIZE((count_),bl)); \
183 * Validity test for a new-format hstore.
185 * 1 = valid but with "slop" in the length
189 hstoreValidNewFormat(HStoreV2 *hs)
191 int count = HS_COUNT(hs);
192 HEntry *entries = ARRPTR(hs);
193 int buflen = (count) ? HSE_ENDPOS(entries[2 * (count) - 1]) : 0;
194 int vsize = CALCDATASIZE(count, buflen);
197 if (hs->size_ & HS_FLAG_NEWVERSION)
203 if (!HSE_ISFIRST(entries[0]))
206 if (vsize > VARSIZE(hs))
209 /* entry position must be nondecreasing */
211 for (i = 1; i < 2 * count; ++i)
213 if (HSE_ISFIRST(entries[i])
214 || (HSE_ENDPOS(entries[i]) < HSE_ENDPOS(entries[i - 1])))
218 /* key length must be nondecreasing and keys must not be null */
220 for (i = 1; i < count; ++i)
222 if (HS_KEYLEN(entries, i) < HS_KEYLEN(entries, i - 1))
224 if (HSE_ISNULL(entries[2 * i]))
228 if (vsize != VARSIZE(hs))
235 * Validity test for an old-format hstore.
237 * 1 = valid but with "slop" in the length
241 hstoreValidOldFormat(HStoreV2 *hs)
243 int count = hs->size_;
244 HOldEntry *entries = (HOldEntry *) ARRPTR(hs);
249 if (hs->size_ & HS_FLAG_NEWVERSION)
252 /* New format uses an HEntry for key and another for value */
253 StaticAssertStmt(sizeof(HOldEntry) == 2 * sizeof(HEntry),
254 "old hstore format is not upward-compatible");
259 if (count > 0xFFFFFFF)
262 if (CALCDATASIZE(count, 0) > VARSIZE(hs))
265 if (entries[0].pos != 0)
268 /* key length must be nondecreasing */
270 for (i = 1; i < count; ++i)
272 if (entries[i].keylen < entries[i - 1].keylen)
277 * entry position must be strictly increasing, except for the first entry
278 * (which can be ""=>"" and thus zero-length); and all entries must be
279 * properly contiguous
282 for (i = 0; i < count; ++i)
284 if (entries[i].pos != lastpos)
286 lastpos += (entries[i].keylen
287 + ((entries[i].valisnull) ? 0 : entries[i].vallen));
290 vsize = CALCDATASIZE(count, lastpos);
292 if (vsize > VARSIZE(hs))
295 if (vsize != VARSIZE(hs))
303 * hstoreUpgrade: PG_DETOAST_DATUM plus support for conversion of old hstores
306 hstoreUpgrade(Datum orig)
308 HStoreV2 *hs = (HStoreV2 *) PG_DETOAST_DATUM(orig);
313 /* Return immediately if no conversion needed */
314 if (VARSIZE_ANY(hs) <= VARHDRSZ ||
315 (hs->size_ & HS_FLAG_NEWVERSION) ||
317 (VARSIZE(hs) < 32768 && HSE_ISFIRST((ARRPTR(hs)[0]))))
319 if (VARSIZE_ANY_EXHDR(hs) == sizeof(hs->size_))
321 /* 'new' format but not nested. And empty */
322 hs = palloc(sizeof(VARHDRSZ));
323 SET_VARSIZE(hs, VARHDRSZ);
329 valid_new = hstoreValidNewFormat(hs);
330 valid_old = hstoreValidOldFormat(hs);
331 /* Do we have a writable copy? */
332 writable = ((void *) hs != (void *) DatumGetPointer(orig));
334 if (!valid_old || hs->size_ == 0)
339 * force the "new version" flag and the correct varlena length,
340 * but only if we have a writable copy already (which we almost
341 * always will, since short new-format values won't come through
346 HS_SETCOUNT(hs, HS_COUNT(hs));
347 HS_FIXSIZE(hs, HS_COUNT(hs));
353 elog(ERROR, "invalid hstore value found");
358 * this is the tricky edge case. It is only possible in some quite extreme
359 * cases (the hstore must have had a lot of wasted padding space at the
360 * end). But the only way a "new" hstore value could get here is if we're
361 * upgrading in place from a pre-release version of hstore-new (NOT
362 * contrib/hstore), so we work off the following assumptions: 1. If you're
363 * moving from old contrib/hstore to hstore-new, you're required to fix up
364 * any potential conflicts first, e.g. by running ALTER TABLE ... USING
365 * col::text::hstore; on all hstore columns before upgrading. 2. If you're
366 * moving from old contrib/hstore to new contrib/hstore, then "new" values
367 * are impossible here 3. If you're moving from pre-release hstore-new to
368 * hstore-new, then "old" values are impossible here 4. If you're moving
369 * from pre-release hstore-new to new contrib/hstore, you're not doing so
370 * as an in-place upgrade, so there is no issue So the upshot of all this
371 * is that we can treat all the edge cases as "new" if we're being built
372 * as hstore-new, and "old" if we're being built as contrib/hstore.
374 * XXX the WARNING can probably be downgraded to DEBUG1 once this has been
375 * beta-tested. But for now, it would be very useful to know if anyone can
376 * actually reach this case in a non-contrived setting.
381 #if HSTORE_IS_HSTORE_NEW
382 elog(WARNING, "ambiguous hstore value resolved as hstore-new");
385 * force the "new version" flag and the correct varlena length, but
386 * only if we have a writable copy already (which we almost always
387 * will, since short new-format values won't come through here)
391 HS_SETCOUNT(hs, HS_COUNT(hs));
392 HS_FIXSIZE(hs, HS_COUNT(hs));
396 elog(WARNING, "ambiguous hstore value resolved as hstore-old");
401 * must have an old-style value. Overwrite it in place as a new-style one,
402 * making sure we have a writable copy first.
406 hs = (HStoreV2 *) PG_DETOAST_DATUM_COPY(orig);
409 int count = hs->size_;
410 HEntry *new_entries = ARRPTR(hs);
411 HOldEntry *old_entries = (HOldEntry *) ARRPTR(hs);
414 for (i = 0; i < count; ++i)
416 uint32 pos = old_entries[i].pos;
417 uint32 keylen = old_entries[i].keylen;
418 uint32 vallen = old_entries[i].vallen;
419 bool isnull = old_entries[i].valisnull;
424 new_entries[2 * i].entry = (pos + keylen) & HENTRY_POSMASK;
425 new_entries[2 * i + 1].entry = (((pos + keylen + vallen) & HENTRY_POSMASK)
426 | ((isnull) ? HENTRY_ISNULL : 0));
430 new_entries[0].entry |= HENTRY_ISFIRST;
431 HS_SETCOUNT(hs, count);
432 HS_FIXSIZE(hs, count);
439 PG_FUNCTION_INFO_V1(hstore_version_diag);
440 Datum hstore_version_diag(PG_FUNCTION_ARGS);
442 hstore_version_diag(PG_FUNCTION_ARGS)
444 HStoreV2 *hs = (HStoreV2 *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
445 int valid_new = hstoreValidNewFormat(hs);
446 int valid_old = hstoreValidOldFormat(hs);
448 PG_RETURN_INT32(valid_old * 10 + valid_new);