postgres 源码分析 之 insert (1)

时间:2021-06-28 22:24:32

    题外话:今天查找postgres官方文档,才发现pg9.3 文档 已有中文文档,看来lazy了,只记得自己看过8.1中文文档,9.1英文文档,自问下:时间都去哪儿了?

今天主要讲执行以下insert操作出现错误对应的函数。

1. 操作

postgres=# create table test(id1 int,id2 int);
CREATE TABLE
postgres=# insert into test values(1,1);
INSERT 0 1
postgres=# insert into test(id1) values(2,2);
ERROR:  INSERT has more expressions than target columns
LINE 1: insert into test(id1) values(2,2);
                                       ^
postgres=# insert into test(id1,id2) values(3);
ERROR:  INSERT has more target columns than expressions
LINE 1: insert into test(id1,id2) values(3);
                             ^
postgres=# \d+ test
                         Table "public.test"
 Column |  Type   | Modifiers | Storage | Stats target | Description 
--------+---------+-----------+---------+--------------+-------------
 id1    | integer |           | plain   |              | 
 id2    | integer |           | plain   |              | 
Has OIDs: no

postgres=# insert into test  values(4);
INSERT 0 1<pre name="code" class="cpp">postgres=# \d+ test2
                                Table "public.test2"
 Column |         Type          | Modifiers | Storage  | Stats target | Description 
--------+-----------------------+-----------+----------+--------------+-------------
 id1    | character varying(12) |           | extended |              | 
 id2    | character varying(12) |           | extended |              | 
Has OIDs: no

postgres=# insert into test select * from test2;
ERROR:  column "id1" is of type integer but expression is of type character varying
LINE 1: insert into test select * from test2;
                                ^
HINT:  You will need to rewrite or cast the expression.

 

2. 问题描述

(1)当指定的表column数目小于插入的columns of values 时,报错:ERROR:  INSERT has more expressions than target columns

(2)当指定的表column数目大于插入的columns of values 时,报错:ERROR:  INSERT has more target columns than expressions

(3)当指定的表column类型与实际插入数据(子语句)类型不符,报错:

ERROR:  column "id1" is of type integer but expression is of type character varying

HINT:  You will need to rewrite or cast the expression.

3. 源码分析

3.1 数据库版本

9.3 beta

3.2 函数 transformInsertRow

位置:src/backend/parser/analyze.c

/*
 * Prepare an INSERT row for assignment to the target table.
 *
 * The row might be either a VALUES row, or variables referencing a
 * sub-SELECT output.
 */
static List *
transformInsertRow(ParseState *pstate, List *exprlist,
				   List *stmtcols, List *icolumns, List *attrnos)
{
	List	   *result;
	ListCell   *lc;
	ListCell   *icols;
	ListCell   *attnos;

	/*
	 * Check length of expr list.  It must not have more expressions than
	 * there are target columns.  We allow fewer, but only if no explicit
	 * columns list was given (the remaining columns are implicitly
	 * defaulted).	Note we must check this *after* transformation because
	 * that could expand '*' into multiple items.
	 */
	if (list_length(exprlist) > list_length(icolumns))
		ereport(ERROR,
				(errcode(ERRCODE_SYNTAX_ERROR),
				 errmsg("INSERT has more expressions than target columns"),
				 parser_errposition(pstate,
									exprLocation(list_nth(exprlist,
												  list_length(icolumns))))));
	if (stmtcols != NIL &&
		list_length(exprlist) < list_length(icolumns))
	{
		/*
		 * We can get here for cases like INSERT ... SELECT (a,b,c) FROM ...
		 * where the user accidentally created a RowExpr instead of separate
		 * columns.  Add a suitable hint if that seems to be the problem,
		 * because the main error message is quite misleading for this case.
		 * (If there's no stmtcols, you'll get something about data type
		 * mismatch, which is less misleading so we don't worry about giving a
		 * hint in that case.)
		 */
		ereport(ERROR,
				(errcode(ERRCODE_SYNTAX_ERROR),
				 errmsg("INSERT has more target columns than expressions"),
				 ((list_length(exprlist) == 1 &&
				   count_rowexpr_columns(pstate, linitial(exprlist)) ==
				   list_length(icolumns)) ?
				  errhint("The insertion source is a row expression containing the same number of columns expected by the INSERT. Did you accidentally use extra parentheses?") : 0),
				 parser_errposition(pstate,
									exprLocation(list_nth(icolumns,
												  list_length(exprlist))))));
	}


3.3 源码分析

(1)结构体 ParseState

/*
 * State information used during parse analysis
 *
 * parentParseState: NULL in a top-level ParseState.  When parsing a subquery,
 * links to current parse state of outer query.
 *
 * p_sourcetext: source string that generated the raw parsetree being
 * analyzed, or NULL if not available.	(The string is used only to
 * generate cursor positions in error messages: we need it to convert
 * byte-wise locations in parse structures to character-wise cursor
 * positions.)
 *
 * p_rtable: list of RTEs that will become the rangetable of the query.
 * Note that neither relname nor refname of these entries are necessarily
 * unique; searching the rtable by name is a bad idea.
 *
 * p_joinexprs: list of JoinExpr nodes associated with p_rtable entries.
 * This is one-for-one with p_rtable, but contains NULLs for non-join
 * RTEs, and may be shorter than p_rtable if the last RTE(s) aren't joins.
 *
 * p_joinlist: list of join items (RangeTblRef and JoinExpr nodes) that
 * will become the fromlist of the query's top-level FromExpr node.
 *
 * p_namespace: list of ParseNamespaceItems that represents the current
 * namespace for table and column lookup.  (The RTEs listed here may be just
 * a subset of the whole rtable.  See ParseNamespaceItem comments below.)
 *
 * p_lateral_active: TRUE if we are currently parsing a LATERAL subexpression
 * of this parse level.  This makes p_lateral_only namespace items visible,
 * whereas they are not visible when p_lateral_active is FALSE.
 *
 * p_ctenamespace: list of CommonTableExprs (WITH items) that are visible
 * at the moment.  This is entirely different from p_namespace because a CTE
 * is not an RTE, rather "visibility" means you could make an RTE from it.
 *
 * p_future_ctes: list of CommonTableExprs (WITH items) that are not yet
 * visible due to scope rules.	This is used to help improve error messages.
 *
 * p_parent_cte: CommonTableExpr that immediately contains the current query,
 * if any.
 *
 * p_windowdefs: list of WindowDefs representing WINDOW and OVER clauses.
 * We collect these while transforming expressions and then transform them
 * afterwards (so that any resjunk tlist items needed for the sort/group
 * clauses end up at the end of the query tlist).  A WindowDef's location in
 * this list, counting from 1, is the winref number to use to reference it.
 */
struct ParseState
{
	struct ParseState *parentParseState;		/* stack link */
	const char *p_sourcetext;	/* source text, or NULL if not available */
	List	   *p_rtable;		/* range table so far */
	List	   *p_joinexprs;	/* JoinExprs for RTE_JOIN p_rtable entries */
	List	   *p_joinlist;		/* join items so far (will become FromExpr
								 * node's fromlist) */
	List	   *p_namespace;	/* currently-referenceable RTEs (List of
								 * ParseNamespaceItem) */
	bool		p_lateral_active;		/* p_lateral_only items visible? */
	List	   *p_ctenamespace; /* current namespace for common table exprs */
	List	   *p_future_ctes;	/* common table exprs not yet in namespace */
	CommonTableExpr *p_parent_cte;		/* this query's containing CTE */
	List	   *p_windowdefs;	/* raw representations of window clauses */
	ParseExprKind p_expr_kind;	/* what kind of expression we're parsing */
	int			p_next_resno;	/* next targetlist resno to assign */
	List	   *p_locking_clause;		/* raw FOR UPDATE/FOR SHARE info */
	Node	   *p_value_substitute;		/* what to replace VALUE with, if any */
	bool		p_hasAggs;
	bool		p_hasWindowFuncs;
	bool		p_hasSubLinks;
	bool		p_hasModifyingCTE;
	bool		p_is_insert;
	bool		p_is_update;
	bool		p_locked_from_parent;
	Relation	p_target_relation;
	RangeTblEntry *p_target_rangetblentry;

	/*
	 * Optional hook functions for parser callbacks.  These are null unless
	 * set up by the caller of make_parsestate.
	 */
	PreParseColumnRefHook p_pre_columnref_hook;
	PostParseColumnRefHook p_post_columnref_hook;
	ParseParamRefHook p_paramref_hook;
	CoerceParamHook p_coerce_param_hook;
	void	   *p_ref_hook_state;		/* common passthrough link for above */
};

(2)结构体List

typedef struct List
{
	NodeTag		type;			/* T_List, T_IntList, or T_OidList */
	int			length;
	ListCell   *head;
	ListCell   *tail;
} List;

(3)结构体ListCell

struct ListCell
{
	union
	{
		void	   *ptr_value;
		int			int_value;
		Oid			oid_value;
	}			data;
	ListCell   *next;
};

(4)函数list_length

STATIC_IF_INLINE int
list_length(const List *l)
{
	return l ? l->length : 0;
}

(5)宏定义 NIL

/*
 * The *only* valid representation of an empty list is NIL; in other
 * words, a non-NIL list is guaranteed to have length >= 1 and
 * head/tail != NULL
 */
#define NIL						((List *) NULL)

(6)函数list_head

STATIC_IF_INLINE ListCell *
list_head(const List *l)
{
	return l ? l->head : NULL;
}

(7)函数foreach

/*
 * foreach -
 *	  a convenience macro which loops through the list
 */
#define foreach(cell, l)	\
	for ((cell) = list_head(l); (cell) != NULL; (cell) = lnext(cell))

(8)宏定义lfirst

#define lfirst(lc) ((lc)->data.ptr_value)

(9)宏定义IsA

#define IsA(nodeptr,_type_) (nodeTag(nodeptr) == T_##_type_)

#define nodeTag(nodeptr) (((const Node*)(nodeptr))->type)

(10)函数transformAssignedExpr

/*
 * transformAssignedExpr()
 *	This is used in INSERT and UPDATE statements only.	It prepares an
 *	expression for assignment to a column of the target table.
 *	This includes coercing the given value to the target column's type
 *	(if necessary), and dealing with any subfield names or subscripts
 *	attached to the target column itself.  The input expression has
 *	already been through transformExpr().
 *
 * pstate		parse state
 * expr			expression to be modified
 * exprKind		indicates which type of statement we're dealing with
 * colname		target column name (ie, name of attribute to be assigned to)
 * attrno		target attribute number
 * indirection	subscripts/field names for target column, if any
 * location		error cursor position for the target column, or -1
 *
 * Returns the modified expression.
 *
 * Note: location points at the target column name (SET target or INSERT
 * column name list entry), and must therefore be -1 in an INSERT that
 * omits the column name list.	So we should usually prefer to use
 * exprLocation(expr) for errors that can happen in a default INSERT.
 */
Expr *
transformAssignedExpr(ParseState *pstate,
					  Expr *expr,
					  ParseExprKind exprKind,
					  char *colname,
					  int attrno,
					  List *indirection,
					  int location)
{
	Relation	rd = pstate->p_target_relation;
	Oid			type_id;		/* type of value provided */
	Oid			attrtype;		/* type of target column */
	int32		attrtypmod;
	Oid			attrcollation;	/* collation of target column */
	ParseExprKind sv_expr_kind;

	/*
	 * Save and restore identity of expression type we're parsing.  We must
	 * set p_expr_kind here because we can parse subscripts without going
	 * through transformExpr().
	 */
	Assert(exprKind != EXPR_KIND_NONE);
	sv_expr_kind = pstate->p_expr_kind;
	pstate->p_expr_kind = exprKind;

	Assert(rd != NULL);
	if (attrno <= 0)
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("cannot assign to system column \"%s\"",
						colname),
				 parser_errposition(pstate, location)));
	attrtype = attnumTypeId(rd, attrno);
	attrtypmod = rd->rd_att->attrs[attrno - 1]->atttypmod;
	attrcollation = rd->rd_att->attrs[attrno - 1]->attcollation;

	/*
	 * If the expression is a DEFAULT placeholder, insert the attribute's
	 * type/typmod/collation into it so that exprType etc will report the
	 * right things.  (We expect that the eventually substituted default
	 * expression will in fact have this type and typmod.  The collation
	 * likely doesn't matter, but let's set it correctly anyway.)  Also,
	 * reject trying to update a subfield or array element with DEFAULT, since
	 * there can't be any default for portions of a column.
	 */
	if (expr && IsA(expr, SetToDefault))
	{
		SetToDefault *def = (SetToDefault *) expr;

		def->typeId = attrtype;
		def->typeMod = attrtypmod;
		def->collation = attrcollation;
		if (indirection)
		{
			if (IsA(linitial(indirection), A_Indices))
				ereport(ERROR,
						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						 errmsg("cannot set an array element to DEFAULT"),
						 parser_errposition(pstate, location)));
			else
				ereport(ERROR,
						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						 errmsg("cannot set a subfield to DEFAULT"),
						 parser_errposition(pstate, location)));
		}
	}

	/* Now we can use exprType() safely. */
	type_id = exprType((Node *) expr);

	/*
	 * If there is indirection on the target column, prepare an array or
	 * subfield assignment expression.	This will generate a new column value
	 * that the source value has been inserted into, which can then be placed
	 * in the new tuple constructed by INSERT or UPDATE.
	 */
	if (indirection)
	{
		Node	   *colVar;

		if (pstate->p_is_insert)
		{
			/*
			 * The command is INSERT INTO table (col.something) ... so there
			 * is not really a source value to work with. Insert a NULL
			 * constant as the source value.
			 */
			colVar = (Node *) makeNullConst(attrtype, attrtypmod,
											attrcollation);
		}
		else
		{
			/*
			 * Build a Var for the column to be updated.
			 */
			colVar = (Node *) make_var(pstate,
									   pstate->p_target_rangetblentry,
									   attrno,
									   location);
		}

		expr = (Expr *)
			transformAssignmentIndirection(pstate,
										   colVar,
										   colname,
										   false,
										   attrtype,
										   attrtypmod,
										   attrcollation,
										   list_head(indirection),
										   (Node *) expr,
										   location);
	}
	else
	{
		/*
		 * For normal non-qualified target column, do type checking and
		 * coercion.
		 */
		Node	   *orig_expr = (Node *) expr;

		expr = (Expr *)
			coerce_to_target_type(pstate,
								  orig_expr, type_id,
								  attrtype, attrtypmod,
								  COERCION_ASSIGNMENT,
								  COERCE_IMPLICIT_CAST,
								  -1);
		if (expr == NULL)
			ereport(ERROR,
					(errcode(ERRCODE_DATATYPE_MISMATCH),
					 errmsg("column \"%s\" is of type %s"
							" but expression is of type %s",
							colname,
							format_type_be(attrtype),
							format_type_be(type_id)),
				 errhint("You will need to rewrite or cast the expression."),
					 parser_errposition(pstate, exprLocation(orig_expr))));
	}

	pstate->p_expr_kind = sv_expr_kind;

	return expr;
}

(11)函数lappend 

看到这个函数的定义,想起那年依稀相似的情景。

/*
 * Append a pointer to the list. A pointer to the modified list is
 * returned. Note that this function may or may not destructively
 * modify the list; callers should always use this function's return
 * value, rather than continuing to use the pointer passed as the
 * first argument.
 */
List *
lappend(List *list, void *datum)
{
	Assert(IsPointerList(list));

	if (list == NIL)
		list = new_list(T_List);
	else
		new_tail_cell(list);

	lfirst(list->tail) = datum;
	check_list_invariants(list);
	return list;
}

(112)宏定义IsPointerList

#define IsPointerList(l) ((l) == NIL || IsA((l), List))

(13)函数new_list

/*
 * Return a freshly allocated List. Since empty non-NIL lists are
 * invalid, new_list() also allocates the head cell of the new list:
 * the caller should be sure to fill in that cell's data.
 */
static List *
new_list(NodeTag type)
{
	List	   *new_list;
	ListCell   *new_head;

	new_head = (ListCell *) palloc(sizeof(*new_head));
	new_head->next = NULL;
	/* new_head->data is left undefined! */

	new_list = (List *) palloc(sizeof(*new_list));
	new_list->type = type;
	new_list->length = 1;
	new_list->head = new_head;
	new_list->tail = new_head;

	return new_list;
}

(14)函数new_tail_cell

/*
 * Allocate a new cell and make it the tail of the specified
 * list. Assumes the list it is passed is non-NIL.
 *
 * The data in the new tail cell is undefined; the caller should be
 * sure to fill it in
 */
static void
new_tail_cell(List *list)
{
	ListCell   *new_tail;

	new_tail = (ListCell *) palloc(sizeof(*new_tail));
	new_tail->next = NULL;

	list->tail->next = new_tail;
	list->tail = new_tail;
	list->length++;
}

(15)函数check_list_invariants

/*
 * Check that the specified List is valid (so far as we can tell).
 */
static void
check_list_invariants(const List *list)
{
	if (list == NIL)
		return;

	Assert(list->length > 0);
	Assert(list->head != NULL);
	Assert(list->tail != NULL);

	Assert(list->type == T_List ||
		   list->type == T_IntList ||
		   list->type == T_OidList);

	if (list->length == 1)
		Assert(list->head == list->tail);
	if (list->length == 2)
		Assert(list->head->next == list->tail);
	Assert(list->tail->next == NULL);
}

4. record

上述内容中函数transformAssignedExpr 还没能完全理解。

date:  Sat Jul  5 13:42:42 CST 2014