// Substitution rules used in a paper: // Deorowicz S., Ciura M. G., "Correcting spelling errors by modelling their causes", 2005 // Thresholds threshold=10 max_threshold=14 // Mistypings - basic // Letters reordering *a*b>2*b*a *a*b*c>4*b*c*a *a*b*c>4*c*a*b *a*b*c>4*c*b*a // Substitution of letters *>7* // Removing a letter *>5. // Inserting a letter .>4* $>4* ^>4* // Splitting a word .>9_ // Mistypings - close letters a>6q,6w,5s,6z b>5v,6g,6h,5n c>5x,5d,5f,5v d>5s,6e,6r,5f,6c,6x e>5w,6s,6d,5r f>5d,6r,6t,5g,6v,6c g>5f,6t,6y,5h,6b,6v h>5g,6y,6u,5j,6n,6b i>5u,6j,6k,5o j>5h,6u,6i,5k,6m,6n k>5j,6i,6o,5l,6m l>5k,6o,6p m>5n,6j,6k n>5b,6h,6j,5m o>5i,5p,6l,6k p>5o,6l q>5w,6a r>5e,5t,6f,6d s>5a,6w,6e,5d,6x,6z t>5r,5y,6g,6f u>5y,5i,6j,6h v>5c,6f,6g,5b w>5q,5e,6s,6a x>5z,6s,6d,5c y>5t,5u,6h,6g z>6a,6s,5x A>6Q,6W,5S,6Z B>5V,6G,6H,5N C>5X,5D,5F,5V D>5S,6E,6R,5F,6C,6X E>5W,6S,6D,5R F>5D,6R,6T,5G,6V,6C G>5F,6T,6Y,5H,6B,6V H>5G,6Y,6U,5J,6N,6B I>5U,6J,6K,5O J>5H,6U,6I,5K,6M,6N K>5J,6I,6O,5L,6M L>5K,6O,6P M>5N,6J,6K N>5B,6H,6J,5M O>5I,5P,6L,6K P>5O,6L Q>5W,6A R>5E,5T,6F,6D S>5A,6W,6E,5D,6X,6Z T>5R,5Y,6G,6F U>5Y,5I,6J,6H V>5C,6F,6G,5B W>5Q,5E,6S,6A X>5Z,6S,6D,5C Y>5T,5U,6H,6G Z>6A,6S,5X // Mistypings - lower / upper cased letters a>5A b>5B c>5C d>5D e>5E f>5F g>5G h>5H i>5I j>5J k>5K l>5L m>5M n>5N o>5O p>5P q>5Q r>5R s>5S t>5T u>5U v>5V w>5W x>5X y>5Y z>5Z A>5a B>5b C>5c D>5d E>5e F>5f G>5g H>5h I>5i J>5j K>5k L>5l M>5m N>5n O>5o P>5p Q>5q R>5r S>5s T>5t U>5u V>5v W>5w X>5x Y>5y Z>5z // Misspellings - vowels // a, e, i, o, u a>3e,3i,3o,5u e>3a,3i,4o,5u i>3a,3e,5o,5u o>3a,4e,5i,3u u>5a,5e,5i,3o // ay, a, ai, ei, ey, ea ay>5a,5ai,5ei,5ey,5ea a>5ay,5ai,5ei,5ey,5ea ai>5ay,5a,5ei,5ey,5ea ei>5ay,5a,5ai,5ey,5ea ey>5ay,5a,5ai,5ei,5ea ea>5ay,5a,5ai,5ei,5ey // e, ee, ea, ie, i, ei e>5ee,5ea,5ie,5i,5ei ee>5e,5ea,5ie,5i,5ei ea>5e,5ee,5ie,5i,5ei ie>5e,5ee,5ea,5i,5ei i>5e,5ee,5ea,5ie,5ei ei>5e,5ee,5ea,5ie,5i // o, oa, oe, ow, ou, eau o>5oa,5oe,5ow,5ou,5eau oa>5o,5oe,5ow,5ou,5eau oe>5o,5oa,5ow,5ou,5eau ow>5o,5oa,5oe,5ou,5eau ou>5o,5oa,5oe,5ow,5eau eau>5o,5oa,5oe,5ow,5ou // u, o, oo, ou u>5o,5oo,5ou o>5u,5oo,5ou oo>5u,5o,5ou ou>5u,5o,5oo // y, i, ie, ye, ei y>5i,5ie,5ye,5ei i>5y,5ie,5ye,5ei ie>5y,5i,5ye,5ei ye>5y,5i,5ie,5ei ei>5y,5i,5ie,5ye // e, ea e>5ea ea>5e // i, y i>5y y>5i // u, o, ou u>5o,5ou o>5u,5ou ou>5u,5o // air, are, ear, aer, ere, eir, ay air>5are,5ear,5aer,5ere,5eir,5ay are>5air,5ear,5aer,5ere,5eir,5ay ear>5air,5are,5aer,5ere,5eir,5ay aer>5air,5are,5ear,5ere,5eir,5ay ere>5air,5are,5ear,5aer,5eir,5ay eir>5air,5are,5ear,5aer,5ere,5ay ay>5air,5are,5ear,5aer,5ere,5eir // oo, u, o, ou, oe 5oo>5u,5o,5ou,5oe 5u>5oo,5o,5ou,5oe 5o>5oo,5u,5ou,5oe 5ou>5oo,5u,5o,5oe 5oe>5oo,5u,5o,5ou // y, i, ea, ee y>5i,5ea,5ee i>5y,5ea,5ee ea>5y,5i,5ee ee>5y,5i,5ea // er, ir, ur, yr, ear, our, or er>5ir,5ur,5yr,5ear,5our,5or ir>5er,5ur,5yr,5ear,5our,5or ur>5er,5ir,5yr,5ear,5our,5or yr>5er,5ir,5ur,5ear,5our,5or ear>5er,5ir,5ur,5yr,5our,5or our>5er,5ir,5ur,5yr,5ear,5or or>5er,5ir,5ur,5yr,5ear,5our // aw, au, or, ore aw>5au,5or,5ore au>5aw,5or,5ore or>5aw,5au,5ore ore>5aw,5au,5or // o, ou, a, au o>5ou,5a,5au ou>5o,5a,5au a>5o,5ou,5au au>5o,5ou,5a // ar, al, a| au, er, ear ar>5al,5a,6au,6er,6ear al>5ar,5a,6au,6er,6ear a>5ar,5al,6au,6er,6ear au>6ar,6al,6a,6er,6ear er>6ar,6al,6a,6au,6ear ear>6ar,6al,6a,6au,6er // ow, ou ow>5ou ou>5ow // ear, eer, ea, eir, ere, ier ear>5eer,5ea,5eir,5ere,5ier eer>5ear,5ea,5eir,5ere,5ier ea>5ear,5eer,5eir,5ere,5ier eir>5ear,5eer,5ea,5ere,5ier ere>5ear,5eer,5ea,5eir,5ier ier>5ear,5eer,5ea,5eir,5ere // oy, oi oy>5oi oi>5oy // oor, ure, ur, ou oor>5ure,5ur,5ou ure>5oor,5ur,5ou ur>5oor,5ure,5ou ou>5oor,5ure,5ur // Misspellings - consonants // b, bb b>4bb bb>4b // d, dd, ed d>4dd,4ed dd>4d,4ed ed>4d,4dd // p, pp p>4pp pp>4p // t, tt, th t>4tt,4th tt>4t,4th th>4t,4tt // c, cc, ck, k, ch, qu c>4cc,4ck,4k,4ch,4que cc>4c,4ck,4k,4ch,4que ck>4c,4cc,4k,4ch,4que k>4c,4cc,4ck,4ch,4que ch>4c,4cc,4ck,4k,4que que>4c,4cc,4ck,4k,4ch // l, ll l>4ll ll>4l // m, mm m>4mm mm>4m // f, ff, ph, gh f>4ff,4ph,4gh ff>4f,4ph,4gh ph>4f,4ff,4gh gh>4f,4ff,4ph // h, wh h>4wh wh>4h // n, nn, kn, en n>4nn,4kn,4en nn>4n,4kn,4en kn>4n,4nn,4en en>4n,4nn,4kn // r, rr, wr r>4rr,4wr rr>4r,4wr wr>4r,4rr // s, ss, c, sc s>4ss,4c,4sc ss>4s,4c,4sc c>4s,4ss,4sc sc>4s,4ss,4c // w, u w>4u u>4w // z, s, zz, ss tz z>4s,4zz,4ss,4tz,4x s>4z,4zz,4ss,4tz,4x zz>4z,4s,4ss,4tz,4x ss>4z,4s,4zz,4tz,4x tz>4z,4s,4zz,4ss,4x x>4z,4s,4zz,4ss,4tz // g, gg, gu, gh g>4gg,4gu,4gh gg>4g,4gu,4gh gu>4g,4gg,4gh gh>4g,4gg,4gu // ch, tch, t ch>4tch,4t tch>4ch,4t t>4ch,4tch // j, g, dg j>4g,4dg g>4j,4dg dg>4j,4g // ng, n ng>4n n>4ng // s, z, ge s>4z,4ge z>4s,4ge ge>4s,4z // sh, s, ss, ci, sci, ce, si, ti, ch, t sh>4s,4ss,4ci,4sci,4ce,4si,4ti,4ch,4t s>4sh,4ss,4ci,4sci,4ce,4si,4ti,4ch,4t ss>4sh,4s,4ci,4sci,4ce,4si,4ti,4ch,4t ci>4sh,4s,4ss,4sci,4ce,4si,4ti,4ch,4t sci>4sh,4s,4ss,4ci,4ce,4si,4ti,4ch,4t ce>4sh,4s,4ss,4ci,4sci,4si,4ti,4ch,4t si>4sh,4s,4ss,4ci,4sci,4ce,4ti,4ch,4t ti>4sh,4s,4ss,4ci,4sci,4ce,4si,4ch,4t ch>4sh,4s,4ss,4ci,4sci,4ce,4si,4ti,4t t>4sh,4s,4ss,4ci,4sci,4ce,4si,4ti,4ch // Misspellings - between phonemes // b, bb <-> p, pp b>5p,5pp bb>5p,5pp p>5b,5bb pp>5b,5bb // d, dd <-> t, tt, th d>5t,5tt,5th dd>5t,5tt,5th t>5d,5dd tt>5d,5dd th>5d,5dd // c, cc, ck, k, ch, qu // g, gg, gu, gh // k <-> g c>5g,5gg,5gu,5gh cc>5g,5gg,5gu,5gh ck>5g,5gg,5gu,5gh k>5g,5gg,5gu,5gh ch>5g,5gg,5gu,5gh qu>5g,5gg,5gu,5gh g>5c,5cc,5ck,5k,5ch,5qu gg>5c,5cc,5ck,5k,5ch,5qu gu>5c,5cc,5ck,5k,5ch,5qu gh>5c,5cc,5ck,5k,5ch,5qu // v|, f, ph // f, ff, ph, gh // f <-> v v>5f,5ff,5ph,5gh f>5v ff>5v ph>5v gh>5v // [k][s] - x x>5ks,5kz ks>5x,5kz kz>5x,5ks // [g][z] - x x>5gz,5gs gz>5x,5gs gs>5x,5gz // Misspellings - omitted, inserted phonemes // h .>3h h>4. // e .>3e $>2e e>3. e$>2. // a a>4. .>3a // o .>3o // Misspellings / mistypings - doubled letters b>2bb bb>2b c>2cc cc>2c d>2dd dd>2d f>2ff ff>2f g>2gg gg>2g l>2ll ll>2l m>2mm mm>2m n>2nn nn>2n p>2pp pp>2p r>2rr rr>2r s>2ss ss>2s t>2tt tt>2t zz>2z z>2zz e>2ee ee>2e o>2oo oo>2o // Vocabulary incompetence - negative prefixes // un, in, im, il, ir ^un>5in,5im,5il,5ir ^in>5un,5im,5il,5ir ^im>5un,5in,5il,5ir ^il>5un,5in,5im,5ir ^ir>5un,5in,5im,5il // mis, mal ^mis>5mal ^mal>5mis // a, an ^a>5an ^an>5a // de, dis ^de>5dis ^dis>5de // Vocabulary incompetence - special prefixes // macro, mega ^macro>5mega ^mega>5macro // com, con, col ^com>5con,5col ^con>5com,5col ^col>5com,5con // en, em, in ^en>5em,5in ^em>5en,5in ^in>5en,5em // poly, multi ^poly>5multi ^multi>5poly // Vocabulary incompetence - nationality suffixes // an, e, er, i, ian, ese, ean an$>5e,5er,5i,5ian,5ese,5ean e$>5an,5er,5i,5ian,5ese,5ean er$>5an,5e,5i,5ian,5ese,5ean i$>5an,5e,5er,5ian,5ese,5ean ian$>5an,5e,5er,5i,5ese,5ean ese$>5an,5e,5er,5i,5ian,5ean ean$>5an,5e,5er,5i,5ian,5ese // Vocabulary incompetence - verb suffixes // ize, ise, ate, ify, en ize$>5ate,5ify,5en ise$>5ate,5ify,5en ate$>5ize,5ise,5ify,5en ify$>5ize,5ise,5ate,5en en$>5ize,5ise,5ate,5ify // t, ed t$>4ed ed$>4t // Vocabulary incompetence - noun suffixes //er, or, ist, ian er$>5or,5ist,5ian or$>5er,5ist,5ian ist$>5er,5or,5ian ian$>5er,5or,5ist // tion, ation tion$>5ation ation$>5tion // ness, ism, dom, ship ness$>5dom,5ship dom$>5ness,5ship ship$>5ness,5dom ion$>5ment ment$>5ion ion$>5ness ness$>5ion ism$>5ity ity$>5ism our$>5or or$>5our // ist, alist ist$>5alist alist$>5ist // ty, ness ty$>5ness ness$>5ty // Vocabulary incompetence - adjective / adverb suffixes // wise, ways, way wise$>5ways,5way,5ward,5wards ways$>5wise,5way,5ward,5wards way$>5wise,5ways,5ward,5wards ward$>5wise,5ways,5way,5wards wards$>5wise,5ways,5way,5ward // al, ar, ic, ical, ed, ive al$>5ar,5ic,5ical,5ed,5ive ar$>5al,5ic,5ical,5ed,5ive ic$>5al,5ar,5ical,5ed,5ive ical$>5al,5ar,5ic,5ed,5ive ed$>5al,5ar,5ic,5ical,5ive ive$>5al,5ar,5ic,5ical,5ed // al$, ly$, ally$ al$>6ly,6ally ly$>6al,6ally ally$>6al,6ly // Other nt$>5n't s$>5's