_ | 覦覈襦 | 豕蠏手 | 殊螳 | 譯殊碁
FrontPage › LCS

https://stackoverflow.com/questions/66247155/longest-common-substring-bigquery-sql
select A, B, 
  (
    select string_agg(word, ' ' order by a_pos) phrase 
    from unnest(split(A, ' ')) word with offset a_pos
    join unnest(split(B, ' ')) word with offset b_pos
    using(word)
    group by b_pos - a_pos
    order by length(phrase) desc
    limit 1
  ) as C
from `project.dataset.table`  


る numbers朱 企 伎 .

--source: https://www.red-gate.com/simple-talk/blogs/string-comparisons-in-sql-the-longest-common-subsequence/
IF OBJECT_ID (N'LongestCommonSubsequence') IS NOT NULL
   DROP FUNCTION LongestCommonSubsequence
GO
 
Create FUNCTION LongestCommonSubsequence
/**
summary:   >
 The longest common subsequence (LCS) problem is the problem of finding the
 longest subsequence common to all sequences in two sequences. It differs
 from problems of finding common substrings: unlike substrings, subsequences
 are not required to occupy consecutive positions within the original
 sequences. For example, the sequences "1234" and "1224533324" have an LCS
 of "1234":
Author: Phil Factor
Revision: 1.0
date: 05 Dec 2014
example:
 code: |
     Select dbo.LongestCommonSubsequence ('1234', '1224533324')
     Select dbo.LongestCommonSubsequence ('thisisatest', 'testing123testing')
     Select dbo.LongestCommonSubsequence ( 'XMJYAUZ', 'MZJAWXU') 
     Select dbo.LongestCommonSubsequence ( 'beginning-middle-ending',
       'beginning-diddle-dum-ending')
returns:   >
  the longest common subsequence as a string
**/    
(
 @firstString Varchar(max),
 @SecondString Varchar(max)
)
RETURNS varchar(max)
as begin
 
Declare @Array Varchar(MAX)
Declare @ArrayMax int
Declare @west char(1)
Declare @Lines Varchar(max)
Declare @ii int, @jj int, @iiMax int, @jjMax int, @index int
 
Select @iiMax=len(@FirstString), --the length of the first string
       @jjMax=len(@SecondString), --the length of the second string
       @index=@jjMax+1 --where the first new item (matrix has an extra row & column)
Select @ArrayMax=(@iiMax)*(@jjMax)--total length of the array iterated 
Select @Array=replicate(char(0), @jjMax+1) --add in the first nought-filled row
 
Select 
  @Index=@Index+case when (number-1) % @jjMax = 0 then 2 else 1 end, --current index
  @west=case when (number-1) % @jjMax = 0 then Char(0) else substring(@Array,@index-1,1) end,
  --as we add an extra column in, this is a dangerous operation otherwise
  @Array=@Array --we add a character to the string array, Char(0)--char
        + case when (number-1) % @jjMax = 0 then Char(0) else '' end -- begin new row always zero
        + case --check first to see if there was a match. If so, take north west number incremented
            when substring(@firstString,((Number-1)/@jjMax)+1,1)
                             =substring(@SecondString, ((number-1)% @jjMax)+1 ,1)
            --remember to set the appropriate collation for this comparison!
           then  Char(Ascii(substring(@Array,@index-@jjmax-2,1))+1)--increment the NW value 
         when Ascii(substring(@Array,@index-@jjmax-1,1))>ascii(@west) --compare west to north
           then substring(@Array,@index-@jjmax-1,1) --and take the larger
         else @west-- number to west if larger
        end
from numbers where number<=@ArrayMax
 
--Now all we need to do is to backtrack through the matrix to find the best solution
Declare @commonString Varchar(max), @X_Y int
Select @CommonString ='' --this contains the string
Select @ii=@iimax+1,@jj=@jjmax+1
while (@ii>1 and @jj>1) 
  begin
  Select @X_Y = ((@ii-1)*(@jjMax+1))+@jj --the current matrix location
  if (substring(@firststring,@ii-1,1) = substring(@Secondstring,@jj-1,1))
       BEGIN --if there is a match, add the character (we'll reverse it later)
    Select @CommonString=@CommonString+substring(@firststring,@ii-1,1)
    select @jj=@jj-1, @ii=@ii-1 --go north-west
    end
  else if ascii(Substring(@Array,@X_Y,1)) = ascii(Substring(@Array, @X_Y-@jjMax-1, 1))
    select @ii=@ii-1
  ELSE if ascii(Substring(@Array,@X_Y,1)) = ascii(Substring(@Array, @X_Y-1, 1))
    select @jj=@jj-1
  else break  
  if @@error>0 break   
  end
 return  Reverse(@CommonString)
 end
go
 
Declare @timing datetime Select @Timing=GetDate()
 
if dbo.LongestCommonSubsequence ('1234', '1224533324')<>'1234' 
   raiserror('test 1 failed',16,1)
if dbo.LongestCommonSubsequence ('thisisatest', 'testing123testing')<>'tsitest' 
   raiserror('test 2 failed',16,1)
if dbo.LongestCommonSubsequence ('XMJYAUZ', 'MZJAWXU')<>'MJAU'
   raiserror('test 3 failed',16,1)
if dbo.LongestCommonSubsequence ('yab', 'xabyrbyab')<>'yab'
   raiserror('test 4 failed',16,1)
if dbo.LongestCommonSubsequence ('beginning-middle-ending','beginning-diddle-dum-ending')
   <>'beginning-iddle-ending' raiserror('test 5 failed',16,1)
 
select datediff(millisecond,@timing,GetDate()) as milliseconds


select dbo.LongestCommonSubsequence('ABCDHEF', 'BCDEF')
蠍 蠍郁鍵..
企: : るジ讓曙 襦螻豺 企Ν 譯殊語. 襦螻豺
EditText : Print : Mobile : FindPage : DeletePage : LikePages : Powered by MoniWiki : Last modified 2024-06-13 13:37:38

殊 伎企る 蟆 譬 豺伎螻 譬 襷 螳蟆 覃 襷 覯朱る 蟆.