| 1 | /** |
|---|
| 2 | * @param varchar _str The input string. |
|---|
| 3 | * @return varchar The unaccented string. |
|---|
| 4 | */ |
|---|
| 5 | CREATE OR REPLACE FUNCTION public.unaccent(varchar) |
|---|
| 6 | RETURNS varchar |
|---|
| 7 | AS $$ |
|---|
| 8 | DECLARE |
|---|
| 9 | _str varchar := $1; |
|---|
| 10 | _find constant varchar := 'ÀÁÂÃÄÅĀĄĂÆÈÉÊËĒĘĚĔĖÐÌÍÎÏĪĨĬĮİIJÒÓÔÕÖØŌŐŎŒÙÚÛÜŪŮŰŬŨŲÝŶŸ' || |
|---|
| 11 | 'àáâãäåāąăæèéêëēęěĕėðìíîïīĩĭįıijòóôõöøōőŏœùúûüūůűŭũųýÿŷ' || |
|---|
| 12 | 'ÇĆČĈĊĎĐƑĜĞĠĢĤĦĴĶŁĽĹĻĿÑŃŇŅŊŔŘŖŚŠŞŜȘſŤŢŦȚÞŴŹŽŻ' || |
|---|
| 13 | 'çćčĉċďđƒĝğġģĥħĵĸķłľĺļŀñńňņʼnŋŕřŗśšşŝșßťţŧțþŵžżź'; |
|---|
| 14 | _repl constant varchar := 'AAAAAAAAAAEEEEEEEEEEIIIIIIIIIIOOOOOOOOOOUUUUUUUUUUYYY' || |
|---|
| 15 | 'aaaaaaaaaaeeeeeeeeeeiiiiiiiiiioooooooooouuuuuuuuuuyyy' || |
|---|
| 16 | 'CCCCCDDFGGGGHHJKLLLLLNNNNNRRRSSSSSSTTTTTWZZZ' || |
|---|
| 17 | 'cccccddfgggghhjkklllllnnnnnnrrrsssssstttttwzzz'; |
|---|
| 18 | BEGIN |
|---|
| 19 | -- Bail if the string is empty |
|---|
| 20 | IF _str = '' |
|---|
| 21 | THEN |
|---|
| 22 | RETURN _str; |
|---|
| 23 | END IF; |
|---|
| 24 | |
|---|
| 25 | -- Common expansions |
|---|
| 26 | _str := replace(_str, 'ſ', 'SS'); |
|---|
| 27 | _str := replace(_str, 'ß', 'ss'); |
|---|
| 28 | _str := replace(_str, 'Å', 'AA'); |
|---|
| 29 | _str := replace(_str, 'å', 'aa'); |
|---|
| 30 | _str := replace(_str, 'IJ', 'IJ'); |
|---|
| 31 | _str := replace(_str, 'ij', 'ij'); |
|---|
| 32 | _str := regexp_replace(_str, '([ÄÆÖŒÜ])', E'\\1E', 'g'); |
|---|
| 33 | _str := regexp_replace(_str, '([äæöü])', E'\\1e', 'g'); |
|---|
| 34 | |
|---|
| 35 | -- Translate what's left |
|---|
| 36 | _str := translate(_str, _find, _repl); |
|---|
| 37 | |
|---|
| 38 | RETURN _str; |
|---|
| 39 | END; |
|---|
| 40 | $$ IMMUTABLE STRICT LANGUAGE plpgsql COST 1; |
|---|
| 41 | |
|---|
| 42 | COMMENT ON FUNCTION public.unaccent(varchar) IS |
|---|
| 43 | 'Strips accents from a string. Useful if the built-in unaccent() won''t |
|---|
| 44 | compile properly, as is the case for the one from macports...'; |
|---|
| 45 | |
|---|
| 46 | /** |
|---|
| 47 | * @param varchar _str The input string. |
|---|
| 48 | * @param char _sep The separator. |
|---|
| 49 | * @return varchar The output slug. |
|---|
| 50 | */ |
|---|
| 51 | CREATE OR REPLACE FUNCTION public.to_slug(varchar, varchar = '-') |
|---|
| 52 | RETURNS varchar |
|---|
| 53 | AS $$ |
|---|
| 54 | DECLARE |
|---|
| 55 | _str varchar := $1; |
|---|
| 56 | _sep constant varchar(1) := $2; |
|---|
| 57 | BEGIN |
|---|
| 58 | -- Bail if the string is empty |
|---|
| 59 | IF _str = '' |
|---|
| 60 | THEN |
|---|
| 61 | RETURN _str; |
|---|
| 62 | END IF; |
|---|
| 63 | |
|---|
| 64 | -- Strip accents and lower the case |
|---|
| 65 | _str := lower(public.unaccent(_str)); |
|---|
| 66 | |
|---|
| 67 | -- Replace nonsensical characters |
|---|
| 68 | _str := regexp_replace(_str, '[^a-z0-9]+', _sep, 'g'); |
|---|
| 69 | |
|---|
| 70 | -- Trim the result |
|---|
| 71 | _str := trim(both _sep from _str); |
|---|
| 72 | |
|---|
| 73 | RETURN _str; |
|---|
| 74 | END; |
|---|
| 75 | $$ IMMUTABLE STRICT LANGUAGE plpgsql COST 1; |
|---|
| 76 | |
|---|
| 77 | COMMENT ON FUNCTION public.to_slug(varchar, varchar) IS |
|---|
| 78 | '"Slugifies" a string, e.g. turns A1@bC! into a1-bc for use as slugs.'; |
|---|
| 79 | |
|---|
| 80 | /** |
|---|
| 81 | * @param varchar The input string. |
|---|
| 82 | * @return varchar The output string for consumption in natural sorting. |
|---|
| 83 | */ |
|---|
| 84 | CREATE OR REPLACE FUNCTION public.natsort(varchar) |
|---|
| 85 | RETURNS varchar |
|---|
| 86 | AS $$ |
|---|
| 87 | DECLARE |
|---|
| 88 | _str varchar := $1; |
|---|
| 89 | _pad int := 15; -- Maximum precision for PostgreSQL floats |
|---|
| 90 | BEGIN |
|---|
| 91 | -- Bail if the string is empty |
|---|
| 92 | IF trim(_str) = '' |
|---|
| 93 | THEN |
|---|
| 94 | RETURN ''; |
|---|
| 95 | END IF; |
|---|
| 96 | |
|---|
| 97 | -- Strip accents and lower the case |
|---|
| 98 | _str := lower(public.unaccent(_str)); |
|---|
| 99 | |
|---|
| 100 | -- Replace nonsensical characters |
|---|
| 101 | _str := regexp_replace(_str, E'[^a-z0-9$¢£¥₤€@&%\\(\\)\\[\\]\\{\\}_:;,\\.\\?!\\+\\-]+', ' ', 'g'); |
|---|
| 102 | |
|---|
| 103 | -- Trim the result |
|---|
| 104 | _str := trim(_str); |
|---|
| 105 | |
|---|
| 106 | -- We're done if the string contains no numbers |
|---|
| 107 | IF _str !~ '[0-9]' |
|---|
| 108 | THEN |
|---|
| 109 | RETURN _str; |
|---|
| 110 | END IF; |
|---|
| 111 | |
|---|
| 112 | -- Force spaces between numbers, so we can use regexp_split_to_table() |
|---|
| 113 | _str := regexp_replace(_str, E'((?:[0-9]+|[0-9]*\\.[0-9]+)(?:e[+-]?[0-9]+\\M)?)', E' \\1 ', 'g'); |
|---|
| 114 | |
|---|
| 115 | -- Pad zeros to obtain a reasonably natural looking sort order |
|---|
| 116 | RETURN array_to_string(ARRAY ( |
|---|
| 117 | SELECT CASE |
|---|
| 118 | WHEN val !~ E'^\\.?[0-9]' THEN |
|---|
| 119 | -- Not a number; return as is |
|---|
| 120 | val |
|---|
| 121 | ELSE |
|---|
| 122 | -- Do our best... |
|---|
| 123 | COALESCE(lpad(substring(val::numeric::text from '^[0-9]+'), _pad, '0'), '') || |
|---|
| 124 | COALESCE(rpad(substring(val::numeric::text from E'\\.[0-9]+'), _pad, '0'), '') |
|---|
| 125 | END |
|---|
| 126 | FROM regexp_split_to_table(_str, E'\\s+') as val |
|---|
| 127 | WHERE val <> '' |
|---|
| 128 | ), ' '); |
|---|
| 129 | END; |
|---|
| 130 | $$ IMMUTABLE STRICT LANGUAGE plpgsql COST 1; |
|---|
| 131 | |
|---|
| 132 | COMMENT ON FUNCTION public.natsort(varchar) IS |
|---|
| 133 | 'Rewrites a string so it can be used in natural sorting. |
|---|
| 134 | |
|---|
| 135 | It''s by no means bullet proof, but it works for reasonably small positive |
|---|
| 136 | numbers, and it''s fast enough to be used in a trigger or an index.'; |
|---|