by Pareto Analyst (1 Submission)
Category: Databases/Data Access/DAO/ADO
Compatability: Visual Basic 5.0
Difficulty: Unknown Difficulty
Originally Published: Fri 21st December 2001
Date Added: Mon 8th February 2021
Rating: (1 Votes)
Purpose: Stored Procedure used to determine the Linear Correlation between 2 variabes in a table of your database. This can also be used to
MAY 1, 2001
Dataminer is the name of my database. Replace it with yours.
TEST_DATA is the name of a table in dataminer which contains the data, that I am working with.
XDATA is the name of a column in TEST_DATA which contains the independent data values.
YDATA is the name of a column in TEST_DATA which contains the dependent data values.
*/
DECLARE @RC int
DECLARE @tablename varchar(255)
DECLARE @x_column varchar(255)
DECLARE @y_column varchar(255)
DECLARE @a float
DECLARE @b float
DECLARE @r float
DECLARE @failcode float
DECLARE @debug int
-- Set parameter values
SET @tablename = 'TEST_DATA'
SET @x_column = 'XDATA'
SET @y_column = 'YDATA'
EXEC @RC = [Dataminer].[dbo].[usp_Linear_Regression_2V] @tablename, @x_column, @y_column, @a OUTPUT , @b OUTPUT , @r OUTPUT , @failcode OUTPUT , @debug
SELECT @a 'A', @B 'B', @r 'R', @failcode 'Error Code'
GO
/* END OF THIS PROCEDURE */
CREATE Procedure usp_Linear_Regression_2V
/*'****************************************HEADER NOTICE - HTTP://WWW.PARETOANALYSIS.COM***************************************
//
'
'Copyright: The use or distribution of this algorithm is free provided that it is
' used / distributed along with this HEADER NOTICE always and withhout
' any changes to the HEADER NOTICE.
'
' No claim is made as to the accuracy or fitness of this algorithm. The use of this
' algorithm is at your own risk and choice and the author is not liable in anyway
' for its use or damages that may occur as a result of its use.
'
'
'Purpose: Algorithm that calculates the Linear Regression between two variables (columns of data).
'
'Source: http://www.paretoanalysts.com
'
'Author: Pareto Analysts
'
'Creation Date: May 1, 2001
'
'Description: Calculates the Linear Regression of a series of numbers
' for use in statistical analysis, data mining or data analysis.
'
'Inquiries: Inquiries about this algorithm, data mining, statistical analysis and data analysis
' can be directed to http://www.paretoanalysts.com
'
'******************************************************************************************************************************
'If a Linear Relationship between two variables is assumed, it may be described by a straight line: Y = A + BX
'where Y : dependent variable
' X : independent variable
' A : the point on the Y - axis where the line, Y = A + BX intercepts it
' B : the slope of the line Y = A + BX
' R : the linear correlation coefficient measures the goodness of fit between the line Y = A + BX and your data.
' If R is zero, there is no linear correlation between X and Y and X cannot be used to predict Y.
' If it's 1 there is a perfect linear correlation between X and Y. The closer R is to 1 the better for linear correlation.
' N : the number of elements in the sample e.g. the count of the independent variable X
'
' A = (SUM(Y)*SUM(X^2)) - (SUM(X)*SUM(X*Y)) / (N*SUM(X^2)) - (SUM(X)^2)
'
' B = N*SUM(X*Y) - (SUM(X)*SUM(Y)) / N*SUM(X^2)-(SUM(Y)^2)
'
' N = Count(X)
'
' R = N*SUM(X*Y) - (SUM(X)*SUM(Y)) / SQUARET(N*SUM(X^2) - (SUM(X))^2)*(N*SUM(Y^2) - (SUM(Y))^2)
*/
(
@tablename varchar(255), /* table containing data used to create the regression */
@x_column varchar(255), /* column containing the independent X variable / data */
@y_column varchar(255), /* column containing the dependent Y variables / data */
@a float Output, /* correlation coefficient of the curve Y = A + BX */
@b float Output, /* point on the Y - axis where the line, Y = A + BX intercepts it */
@r float Output, /* slope of the line Y = A + BX */
@failcode float Output, /* zero is returned for success and an error number for failures */
@debug int = 0 /* @debug <> 0 means that this procedure is in a debug mode */
)
--With Recompile
AS
DECLARE @X varchar(255) /* table and column name supplied as the independent(X) variable */
DECLARE @Y varchar(255) /* table and column name supplied as the independent(X) variable */
DECLARE @SUMX float /* SUM(X) */
DECLARE @SUMY float /* SUM(Y) */
DECLARE @SUMSQUAREX float /* SUM(SQUARE(X)) */
DECLARE @SUMSQUAREY float /* SUM(SQUARE(Y)) */
DECLARE @SUMXY float /* SUM(X*Y) */
DECLARE @N float /* number of records Count(*) or Count(X) in the data set. */
DECLARE @nSQL varchar(510) /* SQL string to be executed */
IF @tablename IS NULL
BEGIN
RAISERROR('@tablename input parameter cannot be null. Use a valid table name.',1,1)
RETURN
END
ELSE IF @x_column IS NULL
BEGIN
RAISERROR('@x_column input parameter cannot be null. Use a valid column name.',1,1)
RETURN
END
ELSE IF @y_column IS NULL
BEGIN
RAISERROR('@y_column input parameter cannot be null. Use a valid column name.',1,1)
RETURN
END
/* the full column name for the independent (X) variable. */
BEGIN
SELECT @X = (@tablename) + '.' + (@x_column)
END
/* the full column name for the independent (Y) variable. */
BEGIN
SELECT @Y = (@tablename) + '.' + (@y_column)
END
/* Print debug information when in debug mode. */
BEGIN
IF @debug <> 0
PRINT '@X : ' + @X + ', ' + SPACE(2) +'@Y : ' + @Y
END
/*
The point on the Y - axis where the line Y = A + BX intercepts it is given by the equation:
A = [(SUM(Y))*(SUM(SQUARE(X))) - (SUM(X))*(SUM(X*Y))] / [N*SUM(SQUARE(X)) - SQUARE((SUM(X)))].
The slope of the line Y = A + BX is given by the equation:
B = N*SUM(X*Y) - (SUM(X))*(SUM(Y)) / [N*SUM(SQUARE(X)) - SQUARE((SUM(X))]
The correlation coefficient of the line Y = A + BX where 0 means no correlation and 1 means perfect
correlation is given by the equation:
R = N*SUM(X*Y) - (SUM(X))*(SUM(Y)) / SQRT[N*SUM(SQUARE(X)) - SQUARE(SUM(X))]*SQRT[N*SUM(SQUARE(Y)) - SQUARE((SUM(Y)))]
*/
/* Analyzing the equations for A, B, R above will give 6 repeating expressions
SUM(X), SUM(Y), SUM(SQUARE(X)), SUM(SQUARE(Y)), SUM(X*Y), N
*/
/* Build a SQL string to return the values of the repeating expressions. */
SET @nSQL =
'SELECT SUM(' + @X + ') AS SUMX, SUM(' + @Y + ') AS SUMY, SUM(SQUARE(' + @X + '))AS SUMSQUAREX, SUM(SQUARE(' + @Y + ')) AS SUMSQUAREY, ' +
'SUM((' + @X + ')*(' + @Y + ')) AS SUMXY, COUNT (*) AS N INTO ##Linear FROM ' + @tablename +
' WHERE ' + @X + ' IS NOT NULL AND ' + @Y + ' IS NOT NULL '
/* Print debug information when in debug mode. */
BEGIN
IF @debug <> 0
PRINT '@nSQL : ' + @nSQL
END
EXEC (@nSQL)
BEGIN
IF @@ERROR <> 0
RETURN
END
/* Initialize the local variables with the result from the execution of the
SQL string.
*/
SELECT @SUMX = SUMX, @SUMY = SUMY, @SUMSQUAREX = SUMSQUAREX, @SUMSQUAREY = SUMSQUAREY, @SUMXY = SUMXY, @N = N FROM ##Linear
DROP TABLE ##LINEAR
/* Initialize the output parameters with the result from the regression calculations.
*/
SET @a = ((@SUMY)*(@SUMSQUAREX) - (@SUMX)*(@SUMXY)) / (@N*@SUMSQUAREX - SQUARE((@SUMX)))
SET @b = (@N*@SUMXY - (@SUMX)*(@SUMY)) / (@N*@SUMSQUAREX - SQUARE((@SUMX)))
SET @r = (@N*@SUMXY - (@SUMX)*(@SUMY)) / SQRT((@N*@SUMSQUAREX - SQUARE(@SUMX))*((@N*@SUMSQUAREY) - SQUARE(@SUMY)))
SET @failcode = @@error
/* Print more debug information in debug mode. */
WHILE (@debug <> 0)
BEGIN
PRINT 'A : '
PRINT @A
PRINT 'B : '
PRINT @B
PRINT 'R: '
PRINT @R
PRINT '@@ERROR: '
PRINT @@ERROR
BREAK
END
No comments have been posted about Purpose: Stored Procedure used to determine the Linear Correlation between 2 variabes in a table of. Why not be the first to post a comment about Purpose: Stored Procedure used to determine the Linear Correlation between 2 variabes in a table of.