SoundexESP
From PostgreSQL wiki
(Difference between revisions)
(Created page with "{{SnippetInfo|Compute string soundex|lang=PL/pgSQL|category=Library|version=Tested on 9.0, 9.1. Probably works in earlier versions}} '''Soundex Spanish''' is a function written …") |
|||
| Line 6: | Line 6: | ||
In my personal case, I needed a spanish version of the [http://en.wikipedia.org/wiki/Soundex soundex algorithm], so, based on [http://www.blogger.com/profile/00618392085702183279 lfer] work [http://oraclenotepad.blogspot.com/2008/03/soundex-en-espaol.html Soundex en Español], I made the PL/pgSQL version for the Spanish Soundex Algorithm. | In my personal case, I needed a spanish version of the [http://en.wikipedia.org/wiki/Soundex soundex algorithm], so, based on [http://www.blogger.com/profile/00618392085702183279 lfer] work [http://oraclenotepad.blogspot.com/2008/03/soundex-en-espaol.html Soundex en Español], I made the PL/pgSQL version for the Spanish Soundex Algorithm. | ||
| + | |||
| + | Thanks to [http://www.grupovesica.com/gv/ Grupo Vesica]. | ||
| Line 12: | Line 14: | ||
---- | ---- | ||
<source lang="sql"> | <source lang="sql"> | ||
| + | -- oliver mazariegos http:// | ||
CREATE OR REPLACE FUNCTION soundexesp(input text) RETURNS text | CREATE OR REPLACE FUNCTION soundexesp(input text) RETURNS text | ||
IMMUTABLE STRICT COST 500 LANGUAGE plpgsql | IMMUTABLE STRICT COST 500 LANGUAGE plpgsql | ||
Revision as of 19:30, 19 November 2012
Compute string soundex
Works with PostgreSQL
Tested on 9.0, 9.1. Probably works in earlier versions
Written in
PL/pgSQL
Depends on
Nothing
Soundex Spanish is a function written to calculate soundex codes for spanish languages.
The soundex algorithm is written for english language, so it is not advisable to use it in other languages.
In my personal case, I needed a spanish version of the soundex algorithm, so, based on lfer work Soundex en Español, I made the PL/pgSQL version for the Spanish Soundex Algorithm.
Thanks to Grupo Vesica.
Hope this helps someone else:
-- oliver mazariegos http:// CREATE OR REPLACE FUNCTION soundexesp(input text) RETURNS text IMMUTABLE STRICT COST 500 LANGUAGE plpgsql AS $$ DECLARE soundex text=''; -- para determinar la primera letra pri_letra text; resto text; sustituida text =''; -- para quitar adyacentes anterior text; actual text; corregido text; BEGIN -- 1: LIMPIEZA: -- pasar a mayuscula, eliminar la letra "H" inicial, los acentos y la enie -- 'holá coñó' => 'OLA CONO' input=translate(ltrim(trim(upper(input)),'H'),'ÑÁÉÍÓÚÀÈÌÒÙÜ','NAEIOUAEIOUU'); -- eliminar caracteres no alfabéticos (números, símbolos como &,%,",*,!,+, etc. input=regexp_replace(input, '[^a-zA-Z]', '', 'g'); -- 2: PRIMERA LETRA ES IMPORTANTE, DEBO ASOCIAR LAS SIMILARES -- 'vaca' se convierte en 'baca' y 'zapote' se convierte en 'sapote' -- un fenomeno importante es GE y GI se vuelven JE y JI; CA se vuelve KA, etc pri_letra =substr(input,1,1); resto =substr(input,2); CASE when pri_letra IN ('V') then sustituida='B'; when pri_letra IN ('Z','X') then sustituida='S'; when pri_letra IN ('G') AND substr(input,2,1) IN ('E','I') then sustituida='J'; when pri_letra IN('C') AND substr(input,2,1) NOT IN ('H','E','I') then sustituida='K'; else sustituida=pri_letra; end case; --corregir el parametro con las consonantes sustituidas: input=sustituida || resto; -- 3: corregir "letras compuestas" y volverlas una sola input=REPLACE(input,'CH','V'); input=REPLACE(input,'QU','K'); input=REPLACE(input,'LL','J'); input=REPLACE(input,'CE','S'); input=REPLACE(input,'CI','S'); input=REPLACE(input,'YA','J'); input=REPLACE(input,'YE','J'); input=REPLACE(input,'YI','J'); input=REPLACE(input,'YO','J'); input=REPLACE(input,'YU','J'); input=REPLACE(input,'GE','J'); input=REPLACE(input,'GI','J'); input=REPLACE(input,'NY','N'); -- para debug: --return input; -- EMPIEZA EL CALCULO DEL SOUNDEX -- 4: OBTENER PRIMERA letra pri_letra=substr(input,1,1); -- 5: retener el resto del string resto=substr(input,2); --6: en el resto del string, quitar vocales y vocales fonéticas resto=translate(resto,'@AEIOUHWY','@'); --7: convertir las letras foneticamente equivalentes a numeros (esto hace que B sea equivalente a V, C con S y Z, etc.) resto=translate(resto, 'BPFVCGKSXZDTLMNRQJ', '111122222233455677'); -- así va quedando la cosa soundex=pri_letra || resto; --8: eliminar números iguales adyacentes (A11233 se vuelve A123) anterior=substr(soundex,1,1); corregido=anterior; FOR i IN 2 .. length(soundex) LOOP actual = substr(soundex, i, 1); IF actual <> anterior THEN corregido=corregido || actual; anterior=actual; END IF; END LOOP; -- así va la cosa soundex=corregido; -- 9: siempre retornar un string de 4 posiciones soundex=rpad(soundex,4,'0'); soundex=substr(soundex,1,4); -- YA ESTUVO RETURN soundex; END; $$
Example
db=#select soundexesp('ordoñez'),
soundexesp('ordónez'),
soundexesp('ordondes'),
soundexesp('karla'),
soundexesp('CARLA');
O635 | O635 | O635 | K640 | K640