Find which character occurs the most times in a string

With an Oracle SQL query, we can do the following:

      Input       Output
    'aaaabcd' --->  'a'
    '0001001' --->  '0'

That is, find the character that occurs the most times in a string?

+4
source share
4 answers

Yes, it is possible with CONNECT BY. However a little harder:

SELECT xchar, xcount FROM (
    SELECT xchar, COUNT(*) AS xcount, RANK() OVER ( ORDER BY COUNT(*) DESC) AS rn
      FROM (
        SELECT SUBSTR('aaaabcd', LEVEL, 1) AS xchar
          FROM dual
       CONNECT BY LEVEL <= LENGTH('aaaabcd')
   ) GROUP BY xchar
) WHERE rn = 1;

What we do in the innermost query breaks the string into separate characters. Then we just get COUNT(), grouped by character, and use RANK()max to find it (note that this will return more than one result if there is a relationship for the most common character).

, , , .

, - :

WITH strlen AS (
  SELECT LEVEL AS strind
    FROM dual
 CONNECT BY LEVEL <= 30
)
SELECT id, xchar, xcount FROM (
    SELECT id, xchar, COUNT(*) AS xcount, RANK() OVER ( PARTITION BY id ORDER BY COUNT(*) DESC) AS rn
      FROM (
        SELECT s.id, SUBSTR(s.str, sl.strind, 1) AS xchar
          FROM strings s, strlen sl
         WHERE LENGTH(s.str) >= sl.strind
   ) GROUP BY id, xchar
) WHERE rn = 1;

30 - , . . SQL Fiddle. , :

WITH strlen AS (
  SELECT LEVEL AS strind
    FROM dual
 CONNECT BY LEVEL <= ( SELECT MAX(LENGTH(str)) FROM strings )
)
SELECT id, xchar, xcount FROM (
    SELECT id, xchar, COUNT(*) AS xcount, RANK() OVER ( PARTITION BY id ORDER BY COUNT(*) DESC) AS rn
      FROM (
        SELECT s.id, SUBSTR(s.str, sl.strind, 1) AS xchar
          FROM strings s, strlen sl
         WHERE LENGTH(s.str) >= sl.strind
   ) GROUP BY id, xchar
) WHERE rn = 1;

SQL.

+6

- :

with sample_data as (select 'aaaabcd' str from dual union all
                     select '0001001' str from dual union all
                     select '11002' str from dual),
         pivoted as (select str, substr(str, level, 1) letter
                     from   sample_data
                     connect by level <= length(str)
                                and prior str = str
                                and prior dbms_random.value is not null),
             grp as (select str, letter, count(*) cnt
                     from   pivoted
                     group by str, letter),
          ranked as (select str,
                            letter,
                            dense_rank() over (partition by str order by cnt desc) dr
                     from   grp)
select str, letter
from   ranked
where  dr = 1;

STR     LETTER
------- ------
0001001 0     
11002   1     
11002   0     
aaaabcd a     

, dense_rank() row_number.

(, ), listagg , .

+2

- PL/SQL. - PLSQL?

PLSQL, , , , , . , , SQL- , , ... , ...

, .

(, ) , PLSQL CONNECT BY 10K- 11- , 40 CONNECT BY 2 PLSQL.

CREATE OR REPLACE
FUNCTION get_most_freq_char( p_input VARCHAR2 )
RETURN VARCHAR2
IS  
  TYPE t_charcount IS TABLE OF SIMPLE_INTEGER
                      INDEX BY VARCHAR2(1);
  l_map      t_charcount;
  l_value    VARCHAR2(1);
  l_maxchar  VARCHAR2(1);
BEGIN
  FOR i IN 1 .. LENGTH( p_input )
  LOOP
    l_value := SUBSTR( p_input, i ,1 );

    l_map( l_value ) := CASE WHEN l_map.EXISTS( l_value )
                             THEN l_map( l_value ) + 1
                             ELSE 1 END;

    IF l_maxchar IS NULL OR l_map( l_value ) > l_map( l_maxchar )
    THEN
      l_maxchar := l_value;
    END IF;

  END LOOP;

  RETURN l_maxchar;
END;
/

SELECT get_most_freq_char( 'abcdeffffffbbbaaaaaa' ) FROM DUAL;
+1
source

In addition to all the great answers. Suppose you have a table like this:

FULL_STRING
-----------
0001230
aaaabcd
bbbbcdef


SELECT * FROM
(
 SELECT full_str
      , str max_char_in_string
      , ROW_NUMBER() OVER (PARTITION BY full_str ORDER BY full_str) rno
   FROM
   (
    SELECT distinct full_str, SUBSTR(full_str, LEVEL, 1) AS str
      FROM drop_tab
   CONNECT BY LEVEL <= LENGTH(full_str)
   ORDER BY 1
   )
 ORDER BY 3, 1
 )
WHERE rno = 1
/

FULL_STRING MAX_CHAR RNO
-------------------------
0001230      0       1
aaaabcd      a       1
bbbbcdef     b       1
0
source

All Articles