Generate rows from string of numbers

618 Views Asked by At

by I have an Oracle 18c table that has strings like this:

select
    '((0 5 0, 10 10 11.18, 30 0 33.54),(50 10 33.54, 60 10 43.54))' as multipart_lines
    --There are more rows in the actual table.
from
    dual

    MULTIPART_LINES                                              
    -------------------------------------------------------------
    ((0 5 0, 10 10 11.18, 30 0 33.54),(50 10 33.54, 60 10 43.54))
--      v1      v2           v3           v4           v5
--  |            part 1             | |          part 2         | 
  • Individual coordinates are separated by spaces.
  • Vertices (X Y Z coordinates) are separated by commas.
  • Line parts are wrapped in brackets and separated by commas.

In a query, I want to generate rows for each vertex:

PART_NUM   VERTEX_NUM          X          Y          Z
---------- ---------- ---------- ---------- ----------
         1          1          0          5          0
         1          2         10         10      11.18
         1          3         30          0      33.54
         2          1         50         10      33.54
         2          2         60         10      43.54
  • I want to do this in a query. I don't want to insert rows into a table.
  • Unfortunately, I don't have CREATE TYPE privileges in the database. But I can create functions (and of course, inline functions are an option too).

How can I generate rows from the numbers (vertices) in the string?

Related: Oracle Ideas - generate_series() function

6

There are 6 best solutions below

6
On BEST ANSWER

As an alternative - here is how you can process the input strings to convert them to proper JSON strings; then the task becomes trivial. Showing just the JSON-ization first, separately, as it really is the meaningful part of this solution; then after I show the query and result, I will complete the solution by adding the JSON manipulation.

with
  inputs (id, multipart_lines) as (
    select 2810,
      '((0 5 0, 10 10 11.18, 30 0 33.54),(50 10 33.54, 60 10 43.54))'
                                  from dual union all
    select 7284, '((-2.3 0.2 3))' from dual
  )
, j (id, ml) as (
    select id,
           regexp_replace(
             regexp_replace(
               regexp_replace(
                 regexp_replace(
                   regexp_replace(multipart_lines
                   , '\(\s*\(\s*', '[[[')
                 , '\s*\)\s*\)', ']]]')
               , '\s*\)\s*,\s*\(\s*', '],[')
             , '\s*,\s*', '],[')
           , '\s+', ',')
    from   inputs
  )
select * from j;



   ID ML                                                                  
----- --------------------------------------------------------------------
 2810 [[[0,5,0],[10,10,11.18],[30,0,33.54]],[[50,10,33.54],[60,10,43.54]]]
 7284 [[[-2.3,0.2,3]]] 

Your inputs should really look like the strings in column ml in my subquery j - then you could process them like this:

with
  inputs (id, multipart_lines) as (
            ........
  )
, j (id, ml) as (
            ........
  )
select id, part_num, vertex_num, x, y, z
from   j,
       json_table(ml, '$[*]'
                  columns (
                    part_num for ordinality,
                    nested path '$[*]'
                    columns (
                      vertex_num for ordinality,
                      x number path '$[0]',
                      y number path '$[1]',
                      z number path '$[2]'
                    )
                  )
       )
order by id, part_num, vertex_num   --  if needed
;

The output is the same as in my other answer.

0
On

You can do it with only simple string functions (which are much faster than regular expressions) and recursive sub-queries:

WITH line_bounds (id, multipart_lines, line_no, spos, epos) AS (
    SELECT id,
           multipart_lines,
           1,
           2,
           INSTR(multipart_lines, ')', 2)
    FROM   table_name
UNION ALL
    SELECT id,
           multipart_lines,
           line_no + 1,
           epos + 2,
           INSTR(multipart_lines, ')', epos + 2)
    FROM   line_bounds
    WHERE  epos > 0
)
SEARCH DEPTH FIRST BY id SET line_order,
row_bounds (id, line, line_no, row_no, spos, epos) AS (
  SELECT id,
         SUBSTR(multipart_lines, spos + 1, epos - spos - 1),
         line_no,
         1,
         1,
         INSTR(
           SUBSTR(multipart_lines, spos + 1, epos - spos - 1),
           ',',
           1
         )
  FROM   line_bounds
  WHERE  epos > 0
UNION ALL
  SELECT id,
         line,
         line_no,
         row_no + 1,
         epos + 2,
         INSTR(line, ',', epos + 2)
  FROM   row_bounds
  WHERE  epos > 0
)
SEARCH DEPTH FIRST BY id, line_no SET row_order,
column_bounds (id, line, line_no, row_no, xpos, ypos, zpos, epos) AS (
  SELECT id,
         line,
         line_no,
         row_no,
         spos,
         INSTR(line, ' ', spos, 1),
         INSTR(line, ' ', spos, 2),
         epos
  FROM   row_bounds
)
SELECT id,
       line_no,
       row_no,
       SUBSTR(line, xpos + 0, ypos - xpos) AS x,
       SUBSTR(line, ypos + 1, zpos - ypos - 1) AS y,
       CASE epos
       WHEN 0
       THEN SUBSTR(line, zpos + 1)
       ELSE SUBSTR(line, zpos + 1, epos - zpos - 1)
       END AS z
FROM   column_bounds

Which, for the sample data:

CREATE TABLE table_name (id, multipart_lines) AS
  SELECT 1, '((0 5 0, 10 10 11.18, 30 0 33.54),(50 10 33.54, 60 10 43.54))' FROM DUAL UNION ALL
  SELECT 2, '((0 1 0, 0 2 0, 0 3 0),(0 4 0, 0 5 0))' FROM DUAL;

Outputs:

ID LINE_NO ROW_NO X Y Z
1 1 1 0 5 0
1 1 2 10 10 11.18
1 1 3 30 0 33.54
1 2 1 50 10 33.54
1 2 2 60 10 43.54
2 1 1 0 1 0
2 1 2 0 2 0
2 1 3 0 3 0
2 2 1 0 4 0
2 2 2 0 5 0

db<>fiddle here

2
On

Use SDO_GEOMETRY to parse the string and then use SDO_UTIL.EXTRACT to get each element and then SDO_UTIL.GETVERTICES to get the vertices:

SELECT t.id,
       e.elem_no,
       v.id AS coord_id,
       x,
       y,
       z
FROM   ( SELECT id,
                SDO_GEOMETRY('MULTILINESTRING '||multipart_lines) AS shape
         FROM   table_name
       ) t
       CROSS JOIN LATERAL (
         SELECT LEVEL AS elem_no,
                SDO_UTIL.EXTRACT(t.shape, LEVEL) AS elem
         FROM   DUAL
         CONNECT BY LEVEL <= SDO_UTIL.GETNUMELEM(t.shape)
       ) e
       CROSS APPLY TABLE(SDO_UTIL.GETVERTICES(e.elem)) v

Which, for the sample data:

CREATE TABLE table_name (id, multipart_lines) AS
  SELECT 1, '((0 5 0, 10 10 11.18, 30 0 33.54),(50 10 33.54, 60 10 43.54))' FROM DUAL UNION ALL
  SELECT 2, '((0 1 0, 0 2 0, 0 3 0),(0 4 0, 0 5 0))' FROM DUAL;

Outputs:

ID ELEM_NO COORD_ID X Y Z
1 1 1 0 5 0
1 1 2 10 10 11.18
1 1 3 30 0 33.54
1 2 1 50 10 33.54
1 2 2 60 10 43.54
2 1 1 0 1 0
2 1 2 0 2 0
2 1 3 0 3 0
2 2 1 0 4 0
2 2 2 0 5 0

db<>fiddle here

2
On

It would be so much simpler if the input was in some standard format - for example JSON. Then the task would be trivial. Do you have any power over that?

If not, you could either transform the input into proper JSON (or similar), or you could attack the problem directly. I illustrate the latter below, assuming Oracle version 12.1 or higher.

with
  inputs (id, multipart_lines) as (
    select 2810,
      '((0 5 0, 10 10 11.18, 30 0 33.54),(50 10 33.54, 60 10 43.54))'
                                  from dual union all
    select 7284, '((-2.3 0.2 3))' from dual
  )
select id, part_num, vertex_num, x, y, z
from   inputs 
       cross join lateral
       ( select  level as part_num,
                 regexp_substr(multipart_lines,
                               '\(([^()]+)\)', 1, level, null, 1) as part
         from    dual
         connect by level <= regexp_count(multipart_lines, '\(') - 1
       )
       cross join lateral
       (
         select  level as vertex_num,
                 regexp_substr(part, '[^,]+', 1, level) as vertex
         from    dual
         connect by level <= regexp_count(part, ',') + 1
       )
       cross join lateral
       (
         select   to_number(regexp_substr(vertex, '[^ ]+', 1, 1)) as x,
                  to_number(regexp_substr(vertex, '[^ ]+', 1, 2)) as y,
                  to_number(regexp_substr(vertex, '[^ ]+', 1, 3)) as z
         from     dual
       )
order by id, part_num, vertex_num   --  if needed
;

Output (from the sample inputs I included in the query):

        ID   PART_NUM VERTEX_NUM          X          Y          Z
---------- ---------- ---------- ---------- ---------- ----------
      2810          1          1          0          5          0
      2810          1          2         10         10      11.18
      2810          1          3         30          0      33.54
      2810          2          1         50         10      33.54
      2810          2          2         60         10      43.54
      7284          1          1       -2.3         .2          3
1
On

I see that mathguy has got there. I was working with from json_table but I can't unnest 2 rows at a time otherwise I'd be basically there with a second use of row_number() over (partition by Paru_num).

create table sample(value varchar(100));
insert into sample values 
('((0 5 0, 10 10 11.18, 30 0 33.54),(50 10 33.54, 60 10 43.54))')

1 rows affected

with a as (
select '["'|| regexp_replace(value,'\(|\)','') ||'"]' a from sample
),
b as (
select regexp_replace(a,', ?','","') b from a
),
c as (
SELECT value c
FROM json_table( (select b from b) , '$[*]'
                COLUMNS (value PATH '$')
               )
),
d as (
SELECT
  c d,
  instr(c,' ') s1,
  instr(c,' ',instr(c,' ')+1) s2
from c)
select 
 substr(d,0,s1) x,
 substr(d,s1+1,s2-s1) y,
 substr(d,s2+1) z
from d
X   | Y   | Z    
:-- | :-- | :----
0   | 5   | 0    
10  | 10  | 11.18
30  | 0   | 33.54
50  | 10  | 33.54
60  | 10  | 43.54
with a as (
select '["'|| regexp_replace(value,'\(+|\)+','"') ||'"]' a from sample
),
b as(
select replace(a,'""','"')b from a
),
c as (
SELECT 
  row_number() over (order by 'zero') pn,
  value c
FROM json_table( (select b from b) , '$[*]'
                COLUMNS (value PATH '$')
)    ),
d as (
select '["'|| pn ||' '|| regexp_replace(c,', ?','","'||pn||' ')||'"]' d from c
)
select * from d
| D                                          |
| :----------------------------------------- |
| ["1 0 5 0","1 10 10 11.18","1 30 0 33.54"] |
| ["2 50 10 33.54","2 60 10 43.54"]          |
with a as (
select '["'|| regexp_replace(value,'\(+|\)+','"') ||'"]' a from sample
),
b as(
select replace(a,'""','"')b from a
),
c as (
SELECT 
  row_number() over (order by 'zero') pn,
  value c
FROM json_table( (select b from b) , '$[*]'
                COLUMNS (value PATH '$')
)    ),
d as (
select '["'|| pn ||' '|| regexp_replace(c,', ?','","'||pn||' ')||'"]' d from c
),
e as (
SELECT 
  row_number() over (order by 'zero') pn,
  value c
FROM json_table( (select d from d) , '$[*]'
                COLUMNS (value PATH '$')
)    )
select * from e
ORA-01427: single-row subquery returns more than one row

db<>fiddle here

0
On

@SolomonYakobson provided this answer in an Oracle Community post.

with sample as (
                select '((0 5 0, 10 10 11.18, 30 0 33.54),(50 10 33.54, 60 10 43.54))' as multipart_lines
                        --There are more rows in the actual table.
                  from  dual
               )
select  part_num,
        vertex_num,
        to_number(regexp_substr(vertex,'[^ ]+')) x,
        to_number(regexp_substr(vertex,'[^ ]+',1,2)) y,
        to_number(regexp_substr(vertex,'[^ ]+',1,3)) z
  from  sample,
        lateral(
                select  level part_num,
                        regexp_substr(multipart_lines,'\(([^()]+)',1,level,null,1) part
                  from  dual
                  connect by level < regexp_count(multipart_lines,'\(')
               ),
        lateral(
                select  level vertex_num,
                        regexp_substr(part,'[^,]+',1,level) vertex
                  from  dual
                  connect by level <= regexp_count(part,',') + 1
               )
/

  PART_NUM VERTEX_NUM          X          Y          Z
---------- ---------- ---------- ---------- ----------
         1          1          0          5          0
         1          2         10         10      11.18
         1          3         30          0      33.54
         2          1         50         10      33.54
         2          2         60         10      43.54

As well as an Oracle 19c solution:

create or replace
  function split_multipart_line(
                                p_line varchar2
                               )
    return varchar2
    sql_macro
    is
    begin
        return q'[
select  part_num,
        vertex_num,
        to_number(regexp_substr(vertex,'[^ ]+')) x,
        to_number(regexp_substr(vertex,'[^ ]+',1,2)) y,
        to_number(regexp_substr(vertex,'[^ ]+',1,3)) z
  from  dual,
        lateral(
                select  level part_num,
                        regexp_substr(p_line,'\(([^()]+)',1,level,null,1) part
                  from  dual
                  connect by level < regexp_count(p_line,'\(')
               ),
        lateral(
                select  level vertex_num,
                        regexp_substr(part,'[^,]+',1,level) vertex
                  from  dual
                  connect by level <= regexp_count(part,',') + 1
               )]';
end;
/

Function created.

SQL> with sample as (
  2                  select '((0 5 0, 10 10 11.18, 30 0 33.54),(50 10 33.54, 60 10 43.54))' multipart_lines from dual union all
  3                  select '((1 2 3, 4 5 6, 7 8 9, 10 11 12),(22 33 44, 55 66 77))' multipart_lines from dual
  4                 )
  5  select  l.*
  6    from  sample,
  7          lateral(
  8                  select  *
  9                    from  split_multipart_line(multipart_lines)
 10                 ) l
 11  /


  PART_NUM VERTEX_NUM          X          Y          Z
---------- ---------- ---------- ---------- ----------
         1          1          0          5          0
         1          2         10         10      11.18
         1          3         30          0      33.54
         2          1         50         10      33.54
         2          2         60         10      43.54
         1          1          1          2          3
         1          2          4          5          6
         1          3          7          8          9
         1          4         10         11         12
         2          1         22         33         44
         2          2         55         66         77


11 rows selected.

SQL>