How to calculate Monthly aggregates from daily data

milan i 81 Reputation points
2021-09-26T17:16:35.157+00:00

we have detail daily data in a table about members., trying to finding how many new members (by considering ID1, ID2, ID3 columns values) monthly totals.

so we want to find out when is the ID1, ID2, ID3 combination first exist and mark add count for monthly.

CREATE TABLE #MYSRCHISTORY_members

(

ID_1 VARCHAR(10),

ID_2 int,

ID_3 VARCHAR(10),

Pay1 varchar(6),

Pay2 varchar(6),

Status varchar(10)

FileDate

)

--- SOME MORE detail INFO about data

--- WE HAVE ABOUT 2 YEARS WORTH OF DAILY FEEDS DATA LOADED TO ONETABLE(MYSRCHISTORY_members) last column FileDate shows what day is the file,

--- USUALLY EACH DAY FILE HAVE ABOUT 30K RECORDS, IN WHICH IT INCLUDES OLD RECORDS AS IS, NEW ENTRIES ADDED (by considering id1,id2,id3 combination) , FOR SOME MEMBERS IT MAY CHANGE OTHER COLUMN VALUES.

--- WE FOUND MEMBER BASED ON THREE id FIELDS, IF ID_1,2,3 CHANGES THEN IT IS CONSIDER AS NEW MEMBER.

--- HERE WOULD LIKE TO KNOW HOW MANY NEWMEMBERS WERE ADDED PER EACH MONTH (MONTHLY AGGREGATION)

---Inserts 5 records in that file, 3 old 2 new (in 2 one is just id change one is

INSERT INTO #MYSRC VALUES ('3120', 250, '3241', '120.75','0.00','EN-25', '09/20/2021') -- exist same set (id1,2,3) in earlier data, so not a new member for Sep

INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '09/20/2021') -- exist same set (id1,2,3) in earlier data, so not a new member for Sep

INSERT INTO #MYSRC VALUES ('3122', 250, '3243', '20.75','0.00','EN-25', '09/20/2021') -- exist same set (id1,2,3) in earlier data, so not a new member for Sep

INSERT INTO #MYSRC VALUES ('7120', 250, '3241', '120.75','0.00','EN-50', '09/03/2021') --New RECORDS because of ID1 change,

INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '09/03/2021') ---Old RECORDS/member

INSERT INTO #MYSRC VALUES ('1122', 250, '3243', '20.75','0.00','EN-25', '09/02/2021') --old record

INSERT INTO #MYSRC VALUES ('1123', 252, '3244', null,null,'EN-25', '09/02/2021') --old record/member

INSERT INTO #MYSRC VALUES ('1122', 27, '3243', '20.75','0.00','EN-25', '09/01/2021') --New Set of ID1,2,3 not exist in earlier data so count it as new member for SEP

INSERT INTO #MYSRC VALUES ('1123', 252, '3244', null,null,'EN-25', '09/01/2021') --New Set of ID1,2,3 not exist in earlier data so count it as new member for SEP

---- August Data below - FileDate (last column)

INSERT INTO #MYSRC VALUES ('3120', 250, '3241', '120.75','0.00','EN-25', '08/31/2021') -- exist same set (id1,2,3) in earlier data, so not a new member for Aug

INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '08/31/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for Aug

INSERT INTO #MYSRC VALUES ('3122', 250, '3243', '20.75','0.00','EN-25', '08/31/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for Aug

INSERT INTO #MYSRC VALUES ('3120', 250, '777', '120.75','0.00','EN-25', '08/30/2021') --New Set of ID1,2,3 not exist in earlier data so count it as new member for Aug

INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '08/30/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for July

INSERT INTO #MYSRC VALUES ('3120', 250, '3241', '120.75','0.00','EN-25', '08/29/2021') -- exist same set (id1,2,3) in earlier data, so not a new member for July

INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '08/29/2021') -- exist same set (id1,2,3) in earlier data, so not a new member for July

INSERT INTO #MYSRC VALUES ('3122', 250, '3243', '20.75','0.00','EN-25', '08/29/2021') -- exist same set (id1,2,3) in earlier data, so not a new member for July

INSERT INTO #MYSRC VALUES ('3120', 250, '888', '120.75','0.00','EN-25', '08/18/2021') ---same set (id1,2,3) not exist earlier, appeared first in Aug 2021. so count it

INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '08/17/2021')--- exist same set (id1,2,3) in earlier data, so not a new member for Aug

INSERT INTO #MYSRC VALUES ('1121', 37, '3242', null,'3.00','FU-25', '08/11/2021') --- New Set of ID1,2,3 not exist in earlier data so count it as new member for AUG

INSERT INTO #MYSRC VALUES ('3122', 36, '100', '20.75','0.00','EN-25', '08/11/2021') ---New Set of ID1,2,3 not exist in earlier data so count it as new member for AUG

---- July Data below

INSERT INTO #MYSRC VALUES ('3120', 250, '3241', '120.75','0.00','EN-25', '07/31/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for July

INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '07/31/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for July

INSERT INTO #MYSRC VALUES ('9122', 250, '3243', '20.75','0.00','EN-25', '07/31/2021') -- New Set of ID1,2,3 not exist in earlier data so count it as new member for July

INSERT INTO #MYSRC VALUES ('3120', 250, '777', '120.75','0.00','EN-25', '07/20/2021') -- New Set of ID1,2,3 not exist in earlier data so count it as new member for July

INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '07/20/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for July

INSERT INTO #MYSRC VALUES ('3120', 250, '3241', '120.75','0.00','FU-25', '07/10/2021') -- New Set of ID1,2,3 not exist in earlier data so count it as new member for July

INSERT INTO #MYSRC VALUES ('6121', 251, '3242', null,'3.00','FU-25', '07/10/2021') -- New Set of ID1,2,3 not exist in earlier data so count it as new member for July

INSERT INTO #MYSRC VALUES ('3122', 250, '3243', '20.75','0.00','EN-25', '07/10/2021') -- New Set of ID1,2,3 not exist in earlier data so count it as new member for July

INSERT INTO #MYSRC VALUES ('3120', 250, '888', '120.75','0.00','EN-25', '07/05/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for July

INSERT INTO #MYSRC VALUES ('5121', 251, '3242', null,'3.00','FU-25', '07/05/2021')-- New Set of ID1,2,3 not exist in earlier data so count it as new member for July

INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '07/01/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for July

INSERT INTO #MYSRC VALUES ('4122', 250, '100', '20.75','0.00','EN-25', '07/01/2021') -- New Set of ID1,2,3 not exist in earlier data so count it as new member for July

---- Jun

INSERT INTO #MYSRC VALUES ('3120', 250, '888', '120.75','0.00','EN-25', '06/05/2021')

INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '06/05/2021')

INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '06/01/2021')

INSERT INTO #MYSRC VALUES ('3122', 250, '100', '20.75','0.00','EN-25', '06/01/2021') --- assume jun 2021 is Starting population

Expected Output

Date, NewMembersCount

Jun-2021, 0 -- The oldest in the file table can make all 0's or simply just rows count

July-2021, 7 -- 7 new members were added on July 2021

Aug-2021,4 -- 4 new members were added on July 2021

Sep-2021,3 -- 3 new members were added on July 2021

Thank you

ASita

SQL Server
SQL Server
A family of Microsoft relational database management and analysis systems for e-commerce, line-of-business, and data warehousing solutions.
12,654 questions
Transact-SQL
Transact-SQL
A Microsoft extension to the ANSI SQL language that includes procedural programming, local variables, and various support functions.
4,550 questions
0 comments No comments
{count} votes

Accepted answer
  1. MelissaMa-MSFT 24,176 Reputation points
    2021-09-27T03:02:16.59+00:00

    Hi @milan i ,

    Based on Viorel's query, please refer below updated one and check whether it is helpful.

    ;with MY as  
     (  
         select distinct month(FileDate) as m, year(FileDate) as y  
         from #MYSRC  
     ),   
     F as  
     (  
         select top(1) m as fm, y as fy from MY order by y, m  
     )  
     select format(datefromparts(y, m, 1), 'MMM"-"yyyy') as [Date],  
         case when m = fm and y = fy then 0  
         else  
             (select count(*) from   
                 (  
                     select id_1, id_2, id_3 from #MYSRC where month(FileDate) = m and year(FileDate) = y  
                     except  
                     select id_1, id_2, id_3 from #MYSRC where year(FileDate) < y or (year(FileDate) = y and month(FileDate) < m)  
                 ) d  
             )  
         end as NewMembersCount  
    	 , case when m = fm and y = fy then 0  
         else  
             (select count(*) from   
                 (  
                     select id_1, id_2, id_3 from #MYSRC where month(FileDate) = m and year(FileDate) = y and left(Status,2)='EN'  
                     except  
                     select id_1, id_2, id_3 from #MYSRC where year(FileDate) < y or (year(FileDate) = y and month(FileDate) < m  )  
                 ) d  
             )  
         end as EUMembersCount  
    	  , case when m = fm and y = fy then 0  
         else  
             (select count(*) from   
                 (  
                     select id_1, id_2, id_3 from #MYSRC where month(FileDate) = m and year(FileDate) = y and left(Status,2)='FU'  
                     except  
                     select id_1, id_2, id_3 from #MYSRC where year(FileDate) < y or (year(FileDate) = y and month(FileDate) < m  )  
                 ) d  
             )  
         end as FUMembersCount  
     from MY, F  
     order by y, m  
    

    Output:

    Date	NewMembersCount	EUMembersCount	FUMembersCount  
    Jun-2021	0	0	0  
    Jul-2021	7	5	2  
    Aug-2021	2	1	1  
    Sep-2021	4	4	0  
    

    Best regards,
    Melissa


    If the answer is helpful, please click "Accept Answer" and upvote it.
    Note: Please follow the steps in our documentation to enable e-mail notifications if you want to receive the related email notification for this thread.

    0 comments No comments

8 additional answers

Sort by: Most helpful
  1. Viorel 111.8K Reputation points
    2021-09-26T18:04:48.337+00:00

    Try one of the queries:

    ;
    with MY as
    (
        select distinct month(FileDate) as m, year(FileDate) as y
        from #MYSRC
    ), 
    F as
    (
        select top(1) m as fm, y as fy from MY order by y, m
    )
    select format(datefromparts(y, m, 1), 'MMM"-"yyyy') as [Date],
        case when m = fm and y = fy then 0
        else
            (select count(*) from 
                (
                    select id_1, id_2, id_3 from #MYSRC where month(FileDate) = m and year(FileDate) = y
                    except
                    select id_1, id_2, id_3 from #MYSRC where year(FileDate) < y or (year(FileDate) = y and month(FileDate) < m)
                ) d
            )
        end as NewMembersCount
    from MY, F
    order by y, m
    

    However the results are different. (Maybe these results are correct).

    Date       NewMembersCount
    Jun-2021               0
    Jul-2021               7
    Aug-2021               2
    Sep-2021               4
    

  2. milan i 81 Reputation points
    2021-09-26T23:26:10.127+00:00

    Thank you very much @Viorel

    Actually small help here with the out put table.

    how can we add couple of new columns(ENCounts and FUCounts.) to the output, by using "status" in the source table (#mysrc).

    in the given source table #mySRC , it has a column name "Status" first two characters defines either EN , FU, if EN then add count to EN if FU then add count to FU

    for example..

    Create Table #Output
    ( DateCol Varchar(10),
    NewMemberscount int,
    EUMembersCount int,
    FUCMembersCount int
    )

    Date NewMembersCount EUMembersCount FUMmbersCount
    Jun-2021 0 0 0
    Jul-2021 7 4 3
    Aug-2021 2 1 1
    Sep-2021 4 4 0

    Thank you,
    Asita


  3. milan i 81 Reputation points
    2021-09-27T00:38:14.297+00:00

    if any problem with source code please use this. (earlier comma and a datatype missing)

    CREATE TABLE #MYSRC

    (

    ID_1 VARCHAR(10),

    ID_2 int,

    ID_3 VARCHAR(10),

    Pay1 varchar(6),

    Pay2 varchar(6),

    Status varchar(10),

    FileDate DATE

    )

    --- SOME MORE detail INFO about data

    --- WE HAVE ABOUT 2 YEARS WORTH OF DAILY FEEDS DATA LOADED TO ONETABLE(MYSRCHISTORY_members) last column FileDate shows what day is the file,

    --- USUALLY EACH DAY FILE HAVE ABOUT 30K RECORDS, IN WHICH IT INCLUDES OLD RECORDS AS IS, NEW ENTRIES ADDED (by considering id1,id2,id3 combination) , FOR SOME MEMBERS IT MAY CHANGE OTHER COLUMN VALUES.

    --- WE FOUND MEMBER BASED ON THREE id FIELDS, IF ID_1,2,3 CHANGES THEN IT IS CONSIDER AS NEW MEMBER.

    --- HERE WOULD LIKE TO KNOW HOW MANY NEWMEMBERS WERE ADDED PER EACH MONTH (MONTHLY AGGREGATION)

    ---Inserts 5 records in that file, 3 old 2 new (in 2 one is just id change one is

    INSERT INTO #MYSRC VALUES ('3120', 250, '3241', '120.75','0.00','EN-25', '09/20/2021') -- exist same set (id1,2,3) in earlier data, so not a new member for Sep

    INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '09/20/2021') -- exist same set (id1,2,3) in earlier data, so not a new member for Sep

    INSERT INTO #MYSRC VALUES ('3122', 250, '3243', '20.75','0.00','EN-25', '09/20/2021') -- exist same set (id1,2,3) in earlier data, so not a new member for Sep

    INSERT INTO #MYSRC VALUES ('7120', 250, '3241', '120.75','0.00','EN-50', '09/03/2021') --New RECORDS because of ID1 change,

    INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '09/03/2021') ---Old RECORDS/member

    INSERT INTO #MYSRC VALUES ('1122', 250, '3243', '20.75','0.00','EN-25', '09/02/2021') --old record

    INSERT INTO #MYSRC VALUES ('1123', 252, '3244', null,null,'EN-25', '09/02/2021') --old record/member

    INSERT INTO #MYSRC VALUES ('1122', 27, '3243', '20.75','0.00','EN-25', '09/01/2021') --New Set of ID1,2,3 not exist in earlier data so count it as new member for SEP

    INSERT INTO #MYSRC VALUES ('1123', 252, '3244', null,null,'EN-25', '09/01/2021') --New Set of ID1,2,3 not exist in earlier data so count it as new member for SEP

    ---- August Data below - FileDate (last column)

    INSERT INTO #MYSRC VALUES ('3120', 250, '3241', '120.75','0.00','EN-25', '08/31/2021') -- exist same set (id1,2,3) in earlier data, so not a new member for Aug

    INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '08/31/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for Aug

    INSERT INTO #MYSRC VALUES ('3122', 250, '3243', '20.75','0.00','EN-25', '08/31/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for Aug

    INSERT INTO #MYSRC VALUES ('3120', 250, '777', '120.75','0.00','EN-25', '08/30/2021') --New Set of ID1,2,3 not exist in earlier data so count it as new member for Aug

    INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '08/30/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for July

    INSERT INTO #MYSRC VALUES ('3120', 250, '3241', '120.75','0.00','EN-25', '08/29/2021') -- exist same set (id1,2,3) in earlier data, so not a new member for July

    INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '08/29/2021') -- exist same set (id1,2,3) in earlier data, so not a new member for July

    INSERT INTO #MYSRC VALUES ('3122', 250, '3243', '20.75','0.00','EN-25', '08/29/2021') -- exist same set (id1,2,3) in earlier data, so not a new member for July

    INSERT INTO #MYSRC VALUES ('3120', 250, '888', '120.75','0.00','EN-25', '08/18/2021') ---same set (id1,2,3) not exist earlier, appeared first in Aug 2021. so count it

    INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '08/17/2021')--- exist same set (id1,2,3) in earlier data, so not a new member for Aug

    INSERT INTO #MYSRC VALUES ('1121', 37, '3242', null,'3.00','FU-25', '08/11/2021') --- New Set of ID1,2,3 not exist in earlier data so count it as new member for AUG

    INSERT INTO #MYSRC VALUES ('3122', 36, '100', '20.75','0.00','EN-25', '08/11/2021') ---New Set of ID1,2,3 not exist in earlier data so count it as new member for AUG

    ---- July Data below

    INSERT INTO #MYSRC VALUES ('3120', 250, '3241', '120.75','0.00','EN-25', '07/31/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for July

    INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '07/31/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for July

    INSERT INTO #MYSRC VALUES ('9122', 250, '3243', '20.75','0.00','EN-25', '07/31/2021') -- New Set of ID1,2,3 not exist in earlier data so count it as new member for July

    INSERT INTO #MYSRC VALUES ('3120', 250, '777', '120.75','0.00','EN-25', '07/20/2021') -- New Set of ID1,2,3 not exist in earlier data so count it as new member for July

    INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '07/20/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for July

    INSERT INTO #MYSRC VALUES ('3120', 250, '3241', '120.75','0.00','EN-25', '07/10/2021') -- New Set of ID1,2,3 not exist in earlier data so count it as new member for July

    INSERT INTO #MYSRC VALUES ('6121', 251, '3242', null,'3.00','FU-25', '07/10/2021') -- New Set of ID1,2,3 not exist in earlier data so count it as new member for July

    INSERT INTO #MYSRC VALUES ('3122', 250, '3243', '20.75','0.00','FU-25', '07/10/2021') -- New Set of ID1,2,3 not exist in earlier data so count it as new member for July

    INSERT INTO #MYSRC VALUES ('3120', 250, '888', '120.75','0.00','EN-25', '07/05/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for July

    INSERT INTO #MYSRC VALUES ('5121', 251, '3242', null,'3.00','EN-25', '07/05/2021')-- New Set of ID1,2,3 not exist in earlier data so count it as new member for July

    INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','FU-25', '07/01/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for July

    INSERT INTO #MYSRC VALUES ('4122', 250, '100', '20.75','0.00','EN-25', '07/01/2021') -- New Set of ID1,2,3 not exist in earlier data so count it as new member for July

    ---- Jun

    INSERT INTO #MYSRC VALUES ('3120', 250, '888', '120.75','0.00','EN-25', '06/05/2021')

    INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '06/05/2021')

    INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '06/01/2021')

    INSERT INTO #MYSRC VALUES ('3122', 250, '100', '20.75','0.00','EN-25', '06/01/2021') --- assume jun 2021 is Starting population

    Thank you

    0 comments No comments

  4. milan i 81 Reputation points
    2021-09-28T02:12:25.793+00:00

    Thank you much @MelissaMa-MSFT

    I am working in my data set with your sample update you soon. Once again Thank you much

    0 comments No comments