<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article
  PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD with MathML3 v1.2 20190208//EN" "JATS-journalpublishing1-mathml3.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.2" xml:lang="en">
<front>
<journal-meta><journal-id journal-id-type="publisher-id">JNC</journal-id><journal-id journal-id-type="nlm-ta">J Numer Cogn</journal-id>
<journal-title-group>
<journal-title>Journal of Numerical Cognition</journal-title><abbrev-journal-title abbrev-type="pubmed">J. Numer. Cogn.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2363-8761</issn>
<publisher><publisher-name>PsychOpen</publisher-name></publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">jnc.14249</article-id>
<article-id pub-id-type="doi">10.5964/jnc.14249</article-id>
<article-categories>
<subj-group subj-group-type="heading"><subject>Empirical Research</subject></subj-group>
<subj-group subj-group-type="badge">
<subject>Data</subject>
<subject>Code</subject>
<subject>Materials</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Assessing Early Math Skills in Preschoolers by Using Digital Games</article-title>
<alt-title alt-title-type="right-running">Using Games to Assess Early Math</alt-title>
<alt-title specific-use="APA-reference-style" xml:lang="en">Assessing early math skills in preschoolers by using digital games</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes"><contrib-id contrib-id-type="orcid" authenticated="false">https://orcid.org/0000-0002-2785-2751</contrib-id><name name-style="western"><surname>Navarrete-Ulloa</surname><given-names>Jairo A.</given-names></name><xref ref-type="corresp" rid="cor1">*</xref><xref ref-type="aff" rid="aff1"><sup>1</sup></xref></contrib>
<contrib contrib-type="author"><contrib-id contrib-id-type="orcid" authenticated="false">https://orcid.org/0000-0001-9509-6436</contrib-id><name name-style="western"><surname>Gómez</surname><given-names>David M.</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref><xref ref-type="aff" rid="aff2"><sup>2</sup></xref></contrib>
<contrib contrib-type="author"><contrib-id contrib-id-type="orcid" authenticated="false">https://orcid.org/0009-0005-7834-8554</contrib-id><name name-style="western"><surname>Ponce</surname><given-names>Llery</given-names></name><xref ref-type="aff" rid="aff3"><sup>3</sup></xref></contrib>
<contrib contrib-type="author"><contrib-id contrib-id-type="orcid" authenticated="false">https://orcid.org/0000-0002-4466-6440</contrib-id><name name-style="western"><surname>Munoz-Rubke</surname><given-names>Felipe</given-names></name><xref ref-type="aff" rid="aff4"><sup>4</sup></xref></contrib>
<contrib contrib-type="author"><contrib-id contrib-id-type="orcid" authenticated="false">https://orcid.org/0000-0003-1025-7021</contrib-id><name name-style="western"><surname>Dartnell</surname><given-names>Pablo R.</given-names></name><xref ref-type="aff" rid="aff3"><sup>3</sup></xref><xref ref-type="aff" rid="aff5"><sup>5</sup></xref></contrib>
<contrib contrib-type="editor">
<name>
<surname>Ramani</surname>
<given-names>Geetha</given-names>
</name>
<xref ref-type="aff" rid="aff6"/>
</contrib>
<aff id="aff1"><label>1</label><institution>Institute of Education Sciences, Universidad de O'Higgins, Rancagua</institution>, <country country="CL">Chile</country></aff>
<aff id="aff2"><label>2</label><institution>Millennium Nucleus for the Study of the Development of Early Mathematics Skills (MEMAT)</institution>, <addr-line><city>Santiago</city></addr-line>, <country country="CL">Chile</country></aff>
<aff id="aff3"><label>3</label><institution content-type="dept">Institute for Advanced Studies in Education</institution>, <institution>Universidad de Chile</institution>, <addr-line><city>Santiago</city></addr-line>, <country country="CL">Chile</country></aff>
<aff id="aff4"><label>4</label><institution>Instituto de Psicología, Universidad Austral de Chile, Puerto Montt</institution>, <country country="CL">Chile</country></aff>
<aff id="aff5"><label>5</label><institution content-type="dept">Center for Mathematical Modeling</institution>, <institution>Universidad de Chile</institution>, <addr-line><city>Santiago</city></addr-line>, <country country="CL">Chile</country></aff>
<aff id="aff6">University of Maryland, College Park, MD, <country>United States</country></aff>
</contrib-group>
<author-notes>
<corresp id="cor1"><label>*</label>Av. Libertador Bernardo O’Higgins 611, of. 717, Rancagua, Chile. <email xlink:href="jairo.navarrete@uoh.cl">jairo.navarrete@uoh.cl</email></corresp>
</author-notes>
<pub-date pub-type="epub"><day>08</day><month>04</month><year>2025</year></pub-date>
<pub-date pub-type="collection" publication-format="electronic"><year>2025</year></pub-date>
<volume>11</volume><elocation-id>e14249</elocation-id>
<history>
<date date-type="received">
<day>10</day>
<month>04</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>17</day>
<month>01</month>
<year>2025</year>
</date>
</history>
<permissions><copyright-year>2025</copyright-year><copyright-holder>Navarrete-Ulloa, Gómez, Ponce et al.</copyright-holder><license license-type="open-access" specific-use="CC BY 4.0" xlink:href="https://creativecommons.org/licenses/by/4.0/"><ali:license_ref>https://creativecommons.org/licenses/by/4.0/</ali:license_ref><license-p>This is an open-access article distributed under the terms of the Creative Commons Attribution (CC BY) 4.0 License, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p></license></permissions>
<abstract>
<p>Improving early mathematical competence is a major priority worldwide; thus, assessing early math abilities is critical. Although various international standardized instruments serve this purpose, their usage in underdeveloped countries is prohibitive due to their resource-intensive requirements. In this report, we explore the development of the “Test de Pensamiento Matemático" (TPM, Test of Preschool Mathematics), which is an automated, game-based, digital instrument for assessing early math abilities in 4-to-6-year-old children in accordance with international curricular standards. A confirmatory factor analysis shows an optimal fit for two dimensions: numerical thinking and visuospatial reasoning. By drawing on technology, the TPM can be applied to large groups of children, so it becomes an efficient tool for assessing performance, monitoring learning improvements, and screening children who need additional support to develop their math abilities at the same pace with their peers.</p>
</abstract>
<abstract xml:lang="en" abstract-type="non-technical">
<sec><title>Background</title>
<p>Teaching young children math skills is important all over the world, but many countries, especially those with fewer resources, struggle to find good ways to test these skills. Although various international standardized instruments serve this purpose, their usage in underdeveloped countries is prohibitive due to their resource-intensive requirements.</p></sec>
<sec><title>Why was this study done?</title>
<p>This study introduces and validates a new tool called the Test de Pensamiento Matemático (TPM), or Test of Preschool Mathematics. It’s a digital, game-like test designed to measure math skills in kids aged 4 to 6. </p></sec>
<sec><title>What did the researchers do and find?</title>
<p>The test focuses on two main areas: number skills and visual-spatial reasoning (like understanding space and patterns). The TPM is easy to use and can test many children at once, making it a practical option for schools and communities with limited resources. </p></sec>
<sec><title>What do these findings mean?</title>
<p>The assessment helps teachers see how well kids are learning math, track their progress, and identify children who might need extra help to keep up with their classmates. By using technology, the TPM makes math assessment more accessible and efficient for everyone.</p></sec>
</abstract>
<abstract abstract-type="highlights">
<title>Highlights</title>
<p><list list-type="bullet">
<list-item>
<p>The TPM (Test of Preschool Mathematics) is an innovative assessment tool designed to evaluate early math skills in children at pre-K and kindergarten levels.</p></list-item>
<list-item>
<p>It focuses on two key dimensions of learning: numerical thinking and spatial reasoning, providing a comprehensive understanding of a child's foundational math abilities.</p></list-item>
<list-item>
<p>As a game-based assessment, the TPM is engaging and user-friendly, allowing large groups of children to participate simultaneously using tablets or smartphones paired with headphones.</p></list-item>
<list-item>
<p>The entire assessment process is efficient, taking approximately 90 minutes to evaluate large groups of children, making it a practical solution for educators and researchers working with young learners.</p></list-item>
</list></p>
</abstract>
<kwd-group kwd-group-type="author"><kwd>educational assessment</kwd><kwd>mathematics education</kwd><kwd>mathematics tests</kwd><kwd>early childhood education</kwd><kwd>educational games</kwd></kwd-group>
</article-meta>
</front>
<body>
<sec sec-type="intro"><title></title>
<p>Developing early math skills is crucial for students' learning and development throughout their school and post-school trajectory. Recent studies reveal that students who begin elementary school with underdeveloped math skills demonstrate low academic performance throughout their school trajectory. More specifically, early math skill levels at age five predict the student’s likelihood to pursue a college degree and directly relate to the curriculum the student develops during high school (<xref ref-type="bibr" rid="r12">Davis‐Kean et al., 2022</xref>; <xref ref-type="bibr" rid="r56">Watts et al., 2018</xref>). Early math skills’ progress between 4 and 6 years old is the strongest predictor of math performance afterward (<xref ref-type="bibr" rid="r51">Siegler et al., 2012</xref>; <xref ref-type="bibr" rid="r57">Watts et al., 2014</xref>, <xref ref-type="bibr" rid="r56">2018</xref>). Poor development of math skills is associated with early dropout from educational opportunities, lack of productive skills, sporadic employment, long periods of unemployment, low pay, and few opportunities for career advancement (<xref ref-type="bibr" rid="r6">Bynner &amp; Parsons, 1997</xref>; <xref ref-type="bibr" rid="r42">Parsons &amp; Bynner, 2005</xref>). Consequently, the importance of monitoring the development of early math skills cannot be overstated.</p>
<p>The literature on early math development has proposed a variety of assessment instruments: For example, the Clinical Interview Method and the Birthday Party (CIM; BP; <xref ref-type="bibr" rid="r17">Ginsburg et al., 2016</xref>; <xref ref-type="bibr" rid="r18">Ginsburg &amp; Pappas, 2016</xref>), the Early Grade Mathematics Assessment (EGMA; <xref ref-type="bibr" rid="r44">Platas, Ketterlin-Geller, &amp; Sitabkhan, 2016</xref>), the Test of Early Mathematics Ability (TEMA-3; <xref ref-type="bibr" rid="r22">Hoffman &amp; Grialou, 2005</xref>), and the Research-based Early Math Assessment (REMA; <xref ref-type="bibr" rid="r11">Clements et al., 2008</xref>; <xref ref-type="bibr" rid="r13">Dong et al., 2021</xref>). These instruments generally consider assessing numbers, their operations, and spatial topics such as shapes, space problems, and visual patterns (see <xref ref-type="table" rid="t1">Table 1</xref>). A recent study has systematically reviewed 59 tools for measuring math competence (<xref ref-type="bibr" rid="r41">Outhwaite et al., 2024</xref>). The authors identified 37 math assessments (1-14 years) and 22 screeners (3-14 years). Of these tools, 52 are child-direct measures that require individual presentation, whereas only seven of them can be applied to small groups of children. Moreover, 49 require a trained assessor, and 41 are paper-based. These features suggest that children’s lack of autonomy makes these assessment tools costly in terms of the amount of labor needed to evaluate large groups of children. For example, the full version of the REMA assessment requires around sixty minutes to assess one student, meaning that evaluating a classroom with 30 children would take around thirty hours of professional work (see <xref ref-type="table" rid="t1">Table 1</xref>). Note that, from the 59 measurement tools reviewed by Outhwaite et al., 55 target number skills and 47 target arithmetic skills, whereas only 22 measure shape, space, and measure skills. More importantly, only nine assessments and two screeners were evaluated in countries, cultures, or language groups different from WEIRD societies and English-speakers (<xref ref-type="bibr" rid="r41">Outhwaite et al., 2024</xref>).</p>
<p>The aforementioned instruments are frequently used in research institutions and educational systems of resourceful educational communities. In contrast, they are barely used in less affluent backgrounds due to their high demands on time and resources. For example, among the instruments listed in <xref ref-type="table" rid="t1">Table 1</xref>, the shortest application requires 20 minutes per child, so evaluating a classroom with 30 participants would need more than ten hours of continuous professional labor. Under-resourced communities often lack resources and devote little time to performing math activities (e.g., <xref ref-type="bibr" rid="r54">Strasser et al., 2009</xref>). Consequently, applying these assessments in such communities becomes prohibitive due the high amount of labor that compensates for childrens’ lack of autonomy that is associated with their early age.</p>
<table-wrap id="t1" position="anchor" orientation="portrait">
<label>Table 1</label><caption><title>Comparison of Different Instruments for Assessing Early Math Skills</title></caption>
<table frame="hsides" rules="groups">
<col width="30%" align="left"/>
<col width="10%"/>
<col width="10%"/>
<col width="10%"/>
<col width="10%"/>
<col width="10%"/>
<col width="10%"/>
<col width="10%"/>
<thead>
<tr>
<th>Features</th>
<th>CIM</th>
<th>BP</th>
<th>EGMA</th>
<th>TEMA-3</th>
<th>REMA-Full</th>
<th>REMA-SF</th>
<th>TPM</th>
</tr>
</thead>
<tbody>
<tr>
<td>Number and Operations</td>
<td>Yes</td>
<td>Yes</td>
<td>Yes</td>
<td>Yes</td>
<td>Yes</td>
<td>Yes</td>
<td>Yes</td>
</tr>
<tr>
<td>Shapes</td>
<td>No</td>
<td>Yes</td>
<td>No</td>
<td>No</td>
<td>Yes</td>
<td>Yes</td>
<td>No</td>
</tr>
<tr>
<td>Space</td>
<td>No</td>
<td>Yes</td>
<td>No</td>
<td>No</td>
<td>Yes</td>
<td>Yes</td>
<td>Yes</td>
</tr>
<tr>
<td>Patterns</td>
<td>No</td>
<td>Yes</td>
<td>No</td>
<td>No</td>
<td>Yes</td>
<td>Yes</td>
<td>Yes</td>
</tr>
<tr>
<td>Individual application time (one child)</td>
<td>40m</td>
<td>20m</td>
<td>20m</td>
<td>40m</td>
<td>60m</td>
<td>25m</td>
<td>50m</td>
</tr>
<tr>
<td>Group application time (30 children)</td>
<td>20h</td>
<td>10h</td>
<td>10h</td>
<td>20h</td>
<td>30h</td>
<td>12.5h</td>
<td>1.5h</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>For example, in Chile, prior studies have estimated that the average time devoted to performing math activities at kindergarten is around 30 minutes per week (<xref ref-type="bibr" rid="r54">Strasser et al., 2009</xref>), suggesting that time available specifically for math assessment is even lower. Consequently, in under-resourced backgrounds, the best alternative available is the application of systematic observation methods, documentation processes, and performance rubrics for assessment (<xref ref-type="bibr" rid="r2">Alsina, 2021</xref>). These assessment methods rely on observing children's behavior, taking notes, and registering children's performance in math activities during an extended period; they are also widely used because they are more affordable and flexible, and they provide practical assessment results. However, one of their drawbacks is that they tend to be subjective—they rely on the expert gaze of the teacher. In principle, this might not be an issue, but these results may not be appropriate for providing performance comparisons between classrooms or schools. Furthermore, the results of these observational methods are accessible only after the observation period, so the collected information might not be helpful in providing valuable immediate feedback.</p>
<p>Additionally, despite the importance of developing early math skills, there is little information regarding their developmental trajectory in early education and the potential differences between boys and girls that might appear at these levels. For example, a study in Chile used data from older students and growth models to predict the existence of a gap between boys and girls in early math performance with a size around 0.15 <italic>SD</italic> in classrooms of 4-6 year-old children (<xref ref-type="bibr" rid="r43">Perez Mejias et al., 2021</xref>). It has been argued that these differences in math performance may arise due to math-gender stereotypes (<xref ref-type="bibr" rid="r59">Zhu &amp; Chiu, 2019</xref>) or sex differences in preferred strategies to approach mathematical problems (<xref ref-type="bibr" rid="r53">Spelke, 2005</xref>), among other factors. In consequence, the understanding of these sex gaps should be approached from a multicausal perspective (<xref ref-type="bibr" rid="r7">Casey &amp; Ganley, 2021</xref>) and the development of an assessment instrument should be aware of these concerns. A relevant question is thus whether instruments’ factor structure is similar for boys and girls, a psychometric property known as measurement invariance.</p>
<sec sec-type="other1"><title>A Technological Solution to Math Assessment in Early Education</title>
<p>The solution proposed here uses technology to automate some components of the assessment process. The previous section highlighted that the critical challenge is children’s lack of autonomy, making assessment expensive due to the required mediation or subjective due to the teacher-dependent observation process. Hence, an assessment solution should be designed that can overcome children's lack of autonomy and remain affordable and objective. The core idea presented here is to automate the individual mediation processes through mobile devices and gamification to allow group applications of the assessment (all children in a classroom) in a simple, automated, and affordable way. In this regard, tactile mobile devices such as tablets and smartphones can facilitate children's autonomy (<xref ref-type="bibr" rid="r23">Holloway et al., 2013</xref>). These devices can provide verbal instructions, represent abstract ideas in interactive visual models, or allow manipulation of virtual objects through touch screens, thus automating specific evaluation processes.</p>
<p>Gamification could also increase the attention span of early education students (as early ages are associated with short attention spans). In fact, educational interest in gamification has grown exponentially due to its ability to engage students innovatively and increase their motivation and creativity (<xref ref-type="bibr" rid="r58">Zainuddin et al., 2020</xref>). Although using electronic devices is not recommended at early ages due to the increase of emotional problems such as anxiety and social isolation, these adverse effects are associated with the intensive use of these devices during prolonged periods (<xref ref-type="bibr" rid="r29">Lin et al., 2020</xref>). The solution proposed here considers an adequate use of these devices by applying them only occasionally.</p>
<p>The present work aims to develop a proof of concept for an automated assessment tool that facilitates the quick and straightforward acquisition of high-quality information about children's early math abilities by allowing group assessments. This report presents a proof of concept of the "Test of Preschool Mathematics" (TPM), which enables group assessments by automating the mediation process through mobile devices and gamification. Its design aims to assess mathematical skills in prekindergarten and kindergarten levels in alignment with international curricular standards, and it uses digital games to present assessment tasks that evaluate numerical or visuospatial reasoning. We hypothesize that the data collected through this group assessment strategy would be highly informative about children's abilities related to early math skills. In this regard, we will present preliminary evidence about the validity of the TPM by collecting and analyzing data from more than 700 children in prekindergarten, kindergarten, and first grade levels.</p>
<p>The organization of this work is as follows. The next section provides an overview of international curricular standards and their relationship with Chile's curriculum—the background of the present work. Additionally, it briefly describes the expected mathematical learning for children in prekindergarten and kindergarten classrooms. The third section presents a brief description of the TPM development process. The methods section describes the data collection, the sample, and the participants, while the results section presents the data analysis supporting the TPM's criterion and construct validity. Finally, the last section discusses the results and argues that the TPM is an assessment tool that combines efficient resource use and good information quality regarding math performance in early education. The discussion highlights that this tool could present opportunities to assess early childhood math skills and develop timely and appropriate strategies to strengthen under-resourced educational communities.</p></sec>
<sec sec-type="other2"><title>International Standards and the Chilean Curricular Framework</title>
<p>Developing early numeracy is a powerful predictor of future math performance (<xref ref-type="bibr" rid="r57">Watts et al., 2014</xref>, <xref ref-type="bibr" rid="r55">2017</xref>, <xref ref-type="bibr" rid="r56">2018</xref>), mainly because it lies at the foundation of mathematical structure, allowing us to describe generalizations of predictable sequences. Consequently, many countries have emphasized opportunities to strengthen students' mathematical learning in general and also in early childhood (<xref ref-type="bibr" rid="r10">Clements &amp; Sarama, 2011</xref>; <xref ref-type="bibr" rid="r30">Melhuish &amp; Petrogiannis, 2006</xref>). Indeed, progress has been made in consolidating robust curriculum standards for mathematics teaching at the international level. One of the curricular frameworks of reference is the Curriculum Focal Points (<xref ref-type="bibr" rid="r39">NCTM, 2006</xref>), which drove the emergence of collaborative environments promoting the creation and development of high-quality tools such as assessment instruments, curriculum frameworks, and instructional materials (e.g., books and software). For example, by drawing on the Focal Points curriculum, the Common Core State Standards (<xref ref-type="bibr" rid="r8">CCSS, 2010</xref>) have been massively adopted in different U.S. territories, creating a common framework that facilitates and promotes collaboration and curriculum development for a coherent educational system.</p>
<p>A comparative analysis between the Curriculum Focal Points and the Common Core State Standards concludes that, although the two documents differ in terms of their levels of specificity, both standards are highly comparable in their coherence, focus, and content (<xref ref-type="bibr" rid="r1">Achieve, 2010</xref>). According to the NCTM website, comparative analyses between the Canadian mathematics curriculum and the Focal Points are highly similar. Regarding math education in prekindergarten and kindergarten classrooms, these documents state that the majority of instructional time should focus on (1) representing, relating, and operating with whole numbers and (2) describing shapes and spatial relationships (<xref ref-type="bibr" rid="r8">CCSS, 2010</xref>).</p>
<p>In coherence with these standards, Chile's curriculum (<xref ref-type="bibr" rid="r33">MINEDUC, 2018</xref>) strongly emphasizes the development of number knowledge and aims to develop reasoning skills about spatial relationships. A detailed description of these learning goals is provided as supplementary material (see <xref ref-type="bibr" rid="sp1_r2">Navarrete-Ulloa et al., 2025S-b</xref>, Table S2). Chile’s curriculum aims to teach students numerical skills, using numbers to represent quantities and solve quantitative problems. The learning objectives include comparison relationships; the notion of number, quantifier function, and numerical sequence; concrete, pictorial, and symbolic (COPISI) numerical representations of quantities; and solving problems through addition and subtraction operations using concrete and pictorial representations (up to 10). Regarding spatial relationships, the learning objectives include the creation of two- and three-element patterns, identification/classification of shapes and objects by their attributes, seriation of objects by various attributes (such as height, length, and capacity), and spatial/temporal orientation and representation of objects from different perspectives (top, bottom, sides).</p></sec>
<sec sec-type="other3"><title>Design of the Test of Preschool Mathematics (TPM)</title>
<p>We reviewed the literature for tasks to assess early mathematical skills. Experts analyzed each task to verify its alignment with the Chilean early education’s learning objectives (<xref ref-type="bibr" rid="r33">MINEDUC, 2018</xref>). Not all target learning objectives could be covered, so we designed specific tasks to cover them by drawing on theoretical frameworks of analogical reasoning and analogical representations (<xref ref-type="bibr" rid="r26">Kalra &amp; Richland, 2022</xref>; <xref ref-type="bibr" rid="r35">Navarrete &amp; Dartnell, 2017</xref>; <xref ref-type="bibr" rid="r36">Navarrete et al., 2018</xref>; <xref ref-type="bibr" rid="r37">Navarrete-Ulloa &amp; Munoz-Rubke, 2022</xref>; <xref ref-type="bibr" rid="r47">Ramani et al., 2012</xref>, <xref ref-type="bibr" rid="r45">2020</xref>; <xref ref-type="bibr" rid="r48">Richland et al., 2004</xref>). Analogical reasoning consists in comparing the structure of two entities and is particularly useful for processing abstract domains through concrete representations (e.g., love is a journey) by identifying a structural correspondence between a known concrete domain and the abstract concept (<xref ref-type="bibr" rid="r3">Andrews et al., 2006</xref>; <xref ref-type="bibr" rid="r20">Halford et al., 2010</xref>; <xref ref-type="bibr" rid="r25">Holyoak, 2012</xref>; <xref ref-type="bibr" rid="r28">Lakoff, 2006</xref>). For example, a balanced scale can be a friendly representation of some first-degree equations (<xref ref-type="bibr" rid="r4">Araya et al., 2010</xref>). This property of analogical reasoning helps to design concrete representations of abstract and complex concepts (<xref ref-type="bibr" rid="r15">Gentner, 2010</xref>; <xref ref-type="bibr" rid="r16">Gentner &amp; Colhoun, 2010</xref>; <xref ref-type="bibr" rid="r35">Navarrete &amp; Dartnell, 2017</xref>; <xref ref-type="bibr" rid="r37">Navarrete-Ulloa &amp; Munoz-Rubke, 2022</xref>; <xref ref-type="bibr" rid="r49">Richland &amp; McDonough, 2010</xref>; <xref ref-type="bibr" rid="r50">Richland et al., 2012</xref>).</p>
<p>The methods described above led to sixteen proposals for digital tasks to measure math abilities in children. These proposals consisted in a conceptual model of the task with focus on assessing the target learning goal along with a basic design of the user interface and its functionality. Each proposal was presented by a member of the team and evaluated by five experts who used a scoring rubric built on the following criteria: (a) factibility of development, (b) face validity based on literature support, and (c) intuitiveness of the user interface. From these proposals, twelve were selected for further development. The five experts conducted three cycles of review, evaluation, and feedback to develop these tasks further and improve them. Three of these experts had a doctoral degree and research experience, whereas the two other experts had a master's degree and preschool classroom experience. These experts then selected the best nine tasks based on the same criteria described above. Afterward, the development team implemented these nine tasks as digital games. To explore the usability and validity of the assessment, we carried out a piloting process where a group of children performed all nine digital games. One task was eliminated because its average duration was deemed too long for children to complete in actual testing. Another task was eliminated because its user interface was not intuitive enough and difficult to use, and thus, its validity was called into question.</p>
<p>The seven remaining tasks compose the TPM and cover nine out of the twelve learning goals of the Chilean curriculum in early education (see <xref ref-type="bibr" rid="sp1_r2">Navarrete-Ulloa et al., 2025S-b</xref>, Table S2). Furthermore, these seven tasks are strongly aligned with international curriculums such as the Focus Points (NCTM) and the Common Core State Standards (CCSS), as these tasks focus on (1) representing, relating, and operating with whole numbers and (2) describing shapes and spatial relationships (<xref ref-type="bibr" rid="r8">CCSS, 2010</xref>).</p>
<p><xref ref-type="table" rid="t2">Table 2</xref> briefly describes the seven TPM tasks, which, through narrative, illustrations, and animations, emulate digital games. The graphic and interaction design of each task had an edutainment focus implemented by an illustrator and a programmer, both experts in designing and implementing games for children. To lower children's anxiety, there was a progress indicator on the screen; the character's voice was calm and soft, and a ludic story surrounded each game, where the characters had to achieve a critical mission.<?pagebreak-after?></p>
<table-wrap id="t2" position="anchor" orientation="portrait">
<label>Table 2</label><caption><title>Description of the Seven TPM’s Tasks, Grouped Into Numerical Thinking and Visuospatial Reasoning</title></caption>
<table frame="hsides" rules="groups" style="compact-1">
<col width="14%" align="left"/>
<col width="15%"/>
<col width="27%" align="left"/>
<col width="22%" align="left"/>
<col width="22%" align="left"/>
<thead>
<tr>
<th>Game Name</th>
<th>Code</th>
<th>Description</th>
<th>Illustration</th>
<th>Scoring</th>
</tr>
</thead>
<tbody>
<tr>
<th colspan="5" align="center">Numerical Thinking<hr/></th>
</tr>
<tr style="transparent-border-top">
<td>Numerical Comparison<break/>(10 items)</td>
<td>NumComp</td>
<td>Evaluates the use of numbers to compare quantities up to 20 in an everyday situation. Two barrels are shown, each labeled with a numeral from 1 to 20. The instructions ask to choose the barrel with the larger (or smaller) quantity associated with it.</td>
<td valign="middle"><p><fig id="ft1" position="anchor" fig-type="simple" orientation="portrait">
<graphic xlink:href="jnc.14249-ft1" position="anchor" orientation="portrait"/></fig></p></td>
<td>Score from 0 to 100 points.<break/>Two possible alternatives: correct or incorrect.<break/>Scores in {0, 100}.</td>
</tr>
<tr style="transparent-border-top">
<td>Concrete Pictorial Symbolic Mapping<break/>(10 items)</td>
<td>CoPiSi</td>
<td>Evaluates skills to represent numbers and quantities up to 10 in a concrete, pictorial, and symbolic way. Three areas are shown on the main screen. The first presents quantities using the fingers of the hand (concrete), the second presents a collection of spaceships (pictorial), and the third presents numerals (symbolic). The instructions for this game indicate a specific quantity and ask to represent it in the three areas of the screen.</td>
<td valign="middle"><p><fig id="ft2" position="anchor" fig-type="simple" orientation="portrait"><graphic xlink:href="jnc.14249-ft2" position="anchor" orientation="portrait"/></fig></p></td>
<td>Score from 0 to 100 points.<break/>Proportional to the number of correct chosen representations.<break/>Scores in {0, 33, 50, 66, 100}</td>
</tr>
<tr style="transparent-border-top">
<td>Additive Problem Solving<break/>(10 items)</td>
<td>AddProb</td>
<td>Evaluates the resolution of simple problems in a concrete and pictorial way by adding or removing up to 10 items. A character requiring an "a" amount of fruit is shown next to a basket with a "b" amount of fruit. The instructions ask to add or remove fruits from the basket to satisfy the requirement. For this, the player must choose an alternative (e.g., +2, -1, +3, etc.) from those shown in the bar presented on the screen.</td>
<td valign="middle"><p><fig id="ft3" position="anchor" fig-type="simple" orientation="portrait"><graphic xlink:href="jnc.14249-ft3" position="anchor" orientation="portrait"/></fig></p></td>
<td>Score from 0 to 100 points.<break/>Two possible alternatives: correct or incorrect.<break/>Scores in {0, 100}.</td>
</tr>
<tr style="transparent-border-top">
<td>Number Line Estimation<break/>(10 items)</td>
<td>NumLine</td>
<td>Evaluates the ability to use numbers to indicate the position of elements in everyday situations. A horizontal line is shown crossing the screen joining two islands, the left one with the number 0 and the right one labeled with the number 10. At the center of the screen a numeral is shown (between 0 and 10) and the instructions ask to estimate the position of a ship at the position on the number line corresponding to that number.<break/>&nbsp;</td>
<td valign="middle"><p><fig id="ft4" position="anchor" fig-type="simple" orientation="portrait"><graphic xlink:href="jnc.14249-ft4" position="anchor" orientation="portrait"/></fig></p></td>
<td>PAE = 10*|Estimate - Position|<break/>Score = 100 – PAE<break/>Scores in [10, 100]</td>
</tr>
<tr style="grey-border-top">
<th colspan="5" align="center">Visuospatial Reasoning<hr/></th>
</tr>
<tr style="transparent-border-top transparent-border-bottom">
<td>Pattern Creation<break/>(10 items)</td>
<td>PattCrea</td>
<td>Evaluates the ability to copy, extend and create visual patterns of two or three elements. The left side of the screen shows a building constructed with floors whose colors follow a pattern, and the right side of the screen shows an incomplete building. The instructions ask you to complete the construction of the building on the right following the same pattern as the building on the left-although the colors may vary.</td>
<td valign="middle"><p><fig id="ft5" position="anchor" fig-type="simple" orientation="portrait"><graphic xlink:href="jnc.14249-ft5" position="anchor" orientation="portrait"/></fig></p></td>
<td>Score from 0 to 100 points.<break/>Proportional to the number of correctly repeated patterns.<break/>Scores in {0, 33, 50, 66, 100}</td>
</tr>
<tr>
<td>Attribute Seriation<break/>(10 items)</td>
<td>AttribSer</td>
<td>Evaluates the ability to order different elements using attributes of height, width, length or capacity to contain. A train is shown that has six cars to place objects. The instructions ask to take objects from the bar presented on the screen and order them from left to right according to one of the attributes mentioned above.</td>
<td valign="middle"><p><fig id="ft6" position="anchor" fig-type="simple" orientation="portrait"><graphic xlink:href="jnc.14249-ft6" position="anchor" orientation="portrait"/></fig></p></td>
<td>Score from 0 to 100 points.<break/>Proportional to the number of correctly ordered items.<break/>Scores in {0, 33, 50, 66, 100}</td>
</tr>
<tr style="transparent-border-top">
<td>Perspective Visualization<break/>(10 items)</td>
<td>PerspVis</td>
<td>It evaluates skills to represent objects from above, from the side or from below. The game shows a character in the center of the screen while taking a picture of an object from a specific perspective (front, side, top, bottom). The game asks the player to identify the photograph thus obtained from among other alternatives that show the same object, but seen from other perspectives (front, side, top, bottom).</td>
<td valign="middle"><p><fig id="ft7" position="anchor" fig-type="simple" orientation="portrait"><graphic xlink:href="jnc.14249-ft7" position="anchor" orientation="portrait"/></fig></p></td>
<td>Score from 0 to 100 points.<break/>Two possible alternatives: correct or incorrect.<break/>Scores in {0, 100}.</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>From now on, we will refer to each task of the TPM as a game. Each “game” (task) consists of "ten levels" (ten items) scored from 0 to 100. In other words, the TPM presents ten levels for each game, but each level differs due to slight modifications to modulate the difficulty level (see <xref ref-type="bibr" rid="sp1_r2">Navarrete-Ulloa et al., 2025S-b</xref>, Table S2). The TPM has seven games, each one with ten levels, meaning that the TPM presents 70 items to children. The TPM does not give feedback to children about the correctness of the answered item, but congratulates them each time they complete a game. The structure of every game has initial directions (given by a character), an example (accompanied by an animation showing the interaction with the screen), and the ten game levels (the task items). The Supplementary Materials contain the audio transcript (translated from Spanish) of the “Numerical Comparison” game to illustrate the types of questions posed to children (see <xref ref-type="bibr" rid="sp1_r2">Navarrete-Ulloa et al., 2025S-b</xref>, Table S3). Additionally, the ten game levels had a progression of difficulty: The first was the easiest, and the last was the most difficult. Nevertheless, every child had to answer all assessment items, since there was no algorithm for selecting the items presented to children according to their previous performance.</p></sec>
<sec sec-type="other4"><title>The Present Study</title>
<p>This paper argues for the TPM’s validity based on the analysis of the data collected from a sample of Chilean children aged 4 to 6 years old in April 2022. This data collection was conducted just after the COVID-19 quarantine. It raised concerns that the data could reflect atypical learning progress, as participants had been isolated in their homes for almost two full years. For this reason, we decided to conduct a second round of data collection after roughly three months of regular school attendance. The data collected in this second round was thus expected to better reflect typical learning progress for children of these ages.</p></sec></sec>
<sec sec-type="methods"><title>Method</title>
<sec><title>Participants and Procedure</title>
<p>Measurements of children's mathematical learning were conducted using the TPM in a sample of 824 participants in Prekindergarten (PK), Kindergarten (K), and Grade 1 (G1) classrooms, recruited from five educational centers located in O’Higgins Region, Chile. In the first and second rounds of data collection, 718 and 653 children participated, respectively. In what follows, we report the results of the second round of data collection and provide analog results for the first round in the Supplementary Materials (see <xref ref-type="bibr" rid="sp1_r2">Navarrete-Ulloa et al., 2025S-b</xref>). Participants’ mean ages and percentage of girls for each educational level are presented in Table S9 (first round) and <xref ref-type="table" rid="t3">Table 3</xref> (second round). The protocols of this research were approved by the Scientific Ethics Committee of Universidad de O’Higgins, certificate 05/2019, and the guardian of each participant signed an informed consent. Each participant also gave informed assent before the start of the activities.</p>
<table-wrap id="t3" position="anchor" orientation="portrait">
<label>Table 3</label><caption><title>Descriptive Statistics of the Sample</title></caption>
<table frame="hsides" rules="groups" width="75%">
<col width="18%" align="left"/>
<col width="20%"/>
<col width="20%"/>
<col width="22%"/>
<col width="20%"/>
<thead>
<tr>
<th valign="bottom">Level</th>
<th valign="bottom"><italic>N</italic></th>
<th valign="bottom"><italic>M</italic> (<italic>SD</italic>)</th>
<th valign="bottom">Number of children with missing age</th>
<th valign="bottom">% girls</th>
</tr>
</thead>
<tbody>
<tr>
<td>PK</td>
<td>203</td>
<td>4.7 (0.3)</td>
<td>7</td>
<td>60%</td>
</tr>
<tr>
<td>K</td>
<td>237</td>
<td>5.7 (9.3)</td>
<td>5</td>
<td>57%</td>
</tr>
<tr>
<td>G1</td>
<td>213</td>
<td>6.7 (0.4)</td>
<td>2</td>
<td>48%</td>
</tr>
<tr>
<td>Total</td>
<td>653</td>
<td>5.8 (0.9)</td>
<td>14</td>
<td>55%</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The design of the TPM’s implementation in classrooms considered the acquisition of students’ lists beforehand. Hence, typing a test identifier in the TPM showed the students’ list, which facilitated a fast start of the assessment process and diminished the application time in classrooms. When children finished a game, their performance data was uploaded to an internet cloud for subsequent analysis. For the interested reader, a prior work details the TPM architecture (<xref ref-type="bibr" rid="r38">Navarrete-Ulloa et al., 2023</xref>).</p>
<p>Data was collected in the classrooms by assessing complete groups during each visit. For data collection, two research assistants visited each classroom carrying the necessary technological equipment (tablets, mobile internet, etc.). When entering the classroom, the assistants connected a router to provide wireless internet to the classroom. After the assistants introduced themselves and gave group instructions, they provided each child a tablet with the app ready for use and headphones. Afterward, they approached each child to type the test identifier and selected the child’s name to start the first game. The assistants followed a protocol including the following guidelines: a) Provide instructions where the TPM is presented as a fun game that they brought for them to play; b) In case they notice a child does not understand the instructions, they should approach the child and explain the task at hand personally. However, they should try their best for these explanations not to influence the child’s performance; c) For each child, there should be a five-minute break in the middle of the assessment process (at the end of one of the games).</p>
<p>Children carried out the TPM games autonomously using a tablet, with few exceptions where an adult was required to mediate the test application. In PK classrooms, the assessment process lasted less than 80 minutes. In K and G1 classrooms, the evaluation process lasted less than 60 minutes. The application of the games in each classroom was divided into two sessions, separated by a five-minute break and monitored by the leading researcher. Ninety-four percent of the participants completed both sessions on the same day, while 6% completed the sessions on different days (<italic>M</italic> = 14 days; <italic>SD</italic> = 6.4; Range = 1-21 days). Regarding class size, classrooms of the PK, K, and G1 levels had an average of 17.6 (range 6-29), 24.4 (range 15-33), and 26.8 (range 22-33) children, respectively.</p></sec>
<sec><title>Data Analysis</title>
<p>To verify the TPM’s criterion validity, the analyses presented below seek to confirm that the TPM can detect significant differences between the three educational levels considered in the present study (PK, K, G1). Concerning construct validity, confirmatory factor analyses were performed to show that the TPM captures two dimensions of children's mathematical thinking: numerical thinking and spatial reasoning (according to international curricular standards). The test has been designed to facilitate diagnostic and formative assessment procedures two years before school; therefore, it is valuable to interpret the data collected in terms of this objective. The three cohorts in our sample represent different assessment points in the learning trajectory of the educational period of interest. The first assessment point is seen as a diagnosis for entry to early education (Pre-Kinder cohort; PK), the second assessment point is a measurement of progress at the end of the first year of early education (Kinder cohort; K), and the third assessment point evaluates the competencies acquired at the end of early education (first grade cohort; G1). Thus, our selection of cohorts allows us to conceptualize three different levels of student progress regarding their early math skills.</p></sec></sec>
<sec sec-type="results"><title>Results</title>
<p>We included only children with complete data in the seven TPM tasks in the analysis. Here, we present results for the second round (<italic>n</italic> = 653), which was less affected by the COVID-19 quarantine. For completeness, we provide the results of the first round (<italic>n</italic> = 718) as Supplementary Materials (see <xref ref-type="bibr" rid="sp1_r2">Navarrete-Ulloa et al., 2025S-b</xref>).</p>
<sec><title>Descriptive Statistics</title>
<p><xref ref-type="table" rid="t3">Table 3</xref> shows descriptive statistics of the sample of children in the second data collection round. Table S4 (<xref ref-type="bibr" rid="sp1_r2">Navarrete-Ulloa et al., 2025S-b</xref>), provides Pearson correlations between each game’s scores and age.</p></sec>
<sec><title>Reliability Analysis</title>
<p>We first looked at item discrimination levels per task. We computed discrimination indices for each item and task by subtracting each item’s average scores for the 27% of participants with top and bottom overall task scores (<xref ref-type="bibr" rid="r14">Ebel &amp; Frisbie, 1991</xref>). In what follows, we considered an item’s discrimination as good or not based on a threshold of 30% (<xref ref-type="bibr" rid="r14">Ebel &amp; Frisbie, 1991</xref>). Number Line Estimation was the task with the least number of good items, while all the items in Numerical Comparison and Perspective Visualization were above the threshold. <xref ref-type="table" rid="t4">Table 4</xref> presents the number of items with above-threshold discrimination and Cronbach’s alpha coefficients for each task. The Numerical Comparison and the Perspective Visualization tasks obtained the lowest alphas (about .60). In contrast, the Pattern Creation task obtained the highest value (about .80). From this point onwards, we discarded the nine items that did not reach a good level of discrimination.</p>
<table-wrap id="t4" position="anchor" orientation="portrait">
<label>Table 4</label><caption><title>Discrimination and Reliability Data for Each Task</title></caption>
<table frame="hsides" rules="groups" width="75%">
<col width="40%" align="left"/>
<col width="20%"/>
<col width="20%"/>
<col width="20%"/>
<thead>
<tr>
<th valign="bottom">Game</th>
<th valign="bottom"># good items</th>
<th valign="bottom">Cronbach's alpha</th>
<th valign="bottom">Cronbach's alpha after item removal</th>
</tr>
</thead>
<tbody>
<tr>
<td>Numerical Comparison</td>
<td>10</td>
<td align="char" char=".">0.66</td>
<td align="char" char=".">0.66</td>
</tr>
<tr>
<td>Concrete Pictorical Symbolic Mapping</td>
<td>7</td>
<td align="char" char=".">0.71</td>
<td align="char" char=".">0.76</td>
</tr>
<tr>
<td>Additive Problem Solving</td>
<td>10</td>
<td align="char" char=".">0.83</td>
<td align="char" char=".">0.83</td>
</tr>
<tr>
<td>Number Line Estimation</td>
<td>6</td>
<td align="char" char=".">0.77</td>
<td align="char" char=".">0.84</td>
</tr>
<tr>
<td>Pattern Creation</td>
<td>10</td>
<td align="char" char=".">0.86</td>
<td align="char" char=".">0.86</td>
</tr>
<tr>
<td>Attribute Seriation</td>
<td>8</td>
<td align="char" char=".">0.75</td>
<td align="char" char=".">0.76</td>
</tr>
<tr>
<td>Perspective Visualization</td>
<td>10</td>
<td align="char" char=".">0.75</td>
<td align="char" char=".">0.75</td>
</tr>
</tbody>
</table>
</table-wrap></sec>
<sec><title>Correlations Between First and Second Data Collection</title>
<p>As already mentioned, the first collection of data (T1) was performed in April 2022, and the second one after roughly three months (T2). <xref ref-type="table" rid="t5">Table 5</xref> presents the correlations between scores of T1 and T2 for each of the tasks, considering only the subset of children who provided complete data in both times of testing. <xref ref-type="table" rid="t5">Table 5</xref> shows strong correlations between the scores of each task at both times (ranging from .37 to .68). However, these correlations should not be taken as estimates of test-retest reliability, due to the long time elapsed between the two measurements: <xref ref-type="bibr" rid="r40">Nunnally and Bernstein (1994)</xref> have suggested a 2-week interval for estimating test-retest reliability regarding achievement-type tests. Hence, correlations in <xref ref-type="table" rid="t5">Table 5</xref> more likely suggest a change in the sample. For example, a comparison between <xref ref-type="fig" rid="f1">Figure&nbsp;1</xref> and Figure S1 (<xref ref-type="bibr" rid="sp1_r2">Navarrete-Ulloa et al., 2025S-b</xref>) shows that children improved their knowledge during the 3-month period. Recent research on analysis of pretest-posttest data indicates that the acquisition of knowledge between T1 and T2 is associated with lower correlation values between the scores obtained in T1 and T2 (<xref ref-type="bibr" rid="r34">Navarrete-Ulloa, 2024</xref>). Hence, these relatively low correlation values more likely reflect a change in the sample due to learning, a better adaptation to school environment, children’s developmental changes, among other factors that are confounded with the time elapsed. Nevertheless, <xref ref-type="table" rid="t5">Table 5</xref> show significant correlations between all the TPM tasks, and suggest that all of them measure somewhat stable constructs.</p>
<table-wrap id="t5" position="anchor" orientation="portrait">
<label>Table 5</label><caption><title>Pearsons’ Correlations Between Children’s Scores for Each TPM Game Between the First Data Collection (T1) and the Second Data Collection (T2) (n = 547)</title></caption>
<table frame="hsides" rules="groups" width="75%">
<col width="50%" align="left"/>
<col width="50%"/>
<thead>
<tr>
<th>Game</th>
<th>T1-T2 Correlation</th>
</tr>
</thead>
<tbody>
<tr>
<td>Numerical Comparison</td>
<td align="char" char=".">.60***</td>
</tr>
<tr>
<td>Concrete Pictorial Symbolic</td>
<td align="char" char=".">.45***</td>
</tr>
<tr>
<td>Additive Problem Solving</td>
<td align="char" char=".">.53***</td>
</tr>
<tr>
<td>Number Line Estimation</td>
<td align="char" char=".">.37***</td>
</tr>
<tr>
<td>Pattern Creation</td>
<td align="char" char=".">.68***</td>
</tr>
<tr>
<td>Attribute Seriation</td>
<td align="char" char=".">.65***</td>
</tr>
<tr>
<td>Perspective Visualization</td>
<td align="char" char=".">.61***</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>***<italic>p</italic> &lt; .0001.</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec><title>Criterion Validity</title>
<p><xref ref-type="fig" rid="f1">Figure 1</xref> presents the average scores obtained by children in each educational level and game. As expected, average scores on all the TPM games increased along with educational level. We used <italic>t</italic>-tests to separately compare children’s scores across consecutive educational levels (PK vs. K, K vs. G1) for each game. Since this amounts to 14 contrasts, we corrected for multiple comparisons using the Holm-Bonferroni procedure (<xref ref-type="bibr" rid="r24">Holm, 1979</xref>). This process showed statistically significant differences between all three educational levels in all but one case: only the Number Line Estimation game failed to show a significant difference (between PK and K, <italic>p</italic> = .08; all other <italic>p</italic>s &lt; .0003).</p>
<fig id="f1" position="anchor" fig-type="figure" orientation="portrait">
<label>Figure 1</label>
<caption>
<title>Children’s Scores for All Games and Levels</title>
<p><italic>Note.</italic> Vertical bars depict 95% confidence intervals.</p>
</caption>
<graphic xlink:href="jnc.14249-f1" position="anchor" orientation="portrait"/></fig>
</sec>
<sec><title>Construct Validity</title>
<p>We then looked at the seven-game set and asked about its factorial structure. According to our theoretical framework, we hypothesized that these tasks would be grouped into two subsets: numerical tasks (Numerical Comparison, Concrete Pictorial Symbolic Mapping, Additive Problem Solving, and Number Line Estimation) and visuospatial tasks (Pattern Creation, Attribute Seriation, and Perspective Visualization). A confirmatory factor analysis considering these two factors in predicting total scores per task showed a good degree of fit: RMSEA = .015, SRMR = .017, CFI = .999, TLI = .998, χ<sup>2</sup>(13) = 14.90, <italic>p</italic> = .31. <xref ref-type="table" rid="t6">Table 6</xref> shows the factor loadings for this model, revealing that Number Line Estimation was the only task with a loading smaller than .60 in its respective factor.</p>
<table-wrap id="t6" position="anchor" orientation="portrait">
<label>Table 6</label><caption><title>Factor Loadings From the CFA</title></caption>
<table frame="hsides" rules="groups">
<col width="40%" align="left"/>
<col width="30%"/>
<col width="30%"/>
<thead>
<tr>
<th>Game</th>
<th>Numerical Thinking factor</th>
<th>Visuospatial Reasoning factor</th>
</tr>
</thead>
<tbody>
<tr>
<td>Numerical Comparison</td>
<td align="char" char=".">.63</td>
<td/>
</tr>
<tr>
<td>Concrete Pictorical Symbolic Mapping</td>
<td align="char" char=".">.71</td>
<td/>
</tr>
<tr>
<td>Additive Problem Solving</td>
<td align="char" char=".">.81</td>
<td/>
</tr>
<tr>
<td>Number Line Estimation</td>
<td align="char" char=".">.35</td>
<td/>
</tr>
<tr>
<td>Pattern Creation</td>
<td/>
<td align="char" char=".">.74</td>
</tr>
<tr>
<td>Attribute Seriation</td>
<td/>
<td align="char" char=".">.74</td>
</tr>
<tr>
<td>Perspective Visualization</td>
<td/>
<td align="char" char=".">.64</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>To understand how the TPM behaves at the different individual tasks and grade levels, we computed Cronbach’s alpha values separately for each task, dimension, and level. <xref ref-type="table" rid="t7">Table 7</xref> shows that tasks improve their internal consistency with grade level, that most tasks show acceptable values at the target level (K), and that two tasks show very low alpha values in the youngest age group (PK): Numerical Comparison and Perspective Visualization. It should be noted that both dimensions (Numerical Thinking and Visuospatial Reasoning) have good internal consistency for all levels.</p>
<table-wrap id="t7" position="anchor" orientation="portrait">
<label>Table 7</label><caption><title>Cronbach’s Alpha for Each Task, Grade Level, and Dimension</title></caption>
<table frame="hsides" rules="groups" width="75%">
<col width="52%" align="left"/>
<col width="16%"/>
<col width="16%"/>
<col width="16%"/>
<thead>
<tr>
<th>Task / Dimension</th>
<th>PK</th>
<th>K</th>
<th>G1</th>
</tr>
</thead>
<tbody>
<tr>
<td>Numerical Comparison</td>
<td align="char" char=".">.23</td>
<td align="char" char=".">.56</td>
<td align="char" char=".">.72</td>
</tr>
<tr>
<td>Concrete Pictorial Symbolic Mapping</td>
<td align="char" char=".">.71</td>
<td align="char" char=".">.72</td>
<td align="char" char=".">.70</td>
</tr>
<tr>
<td>Additive Problem Solving</td>
<td align="char" char=".">.62</td>
<td align="char" char=".">.74</td>
<td align="char" char=".">.86</td>
</tr>
<tr>
<td>Number Line Estimation</td>
<td align="char" char=".">.79</td>
<td align="char" char=".">.84</td>
<td align="char" char=".">.87</td>
</tr>
<tr>
<td>Pattern Creation</td>
<td align="char" char=".">.71</td>
<td align="char" char=".">.81</td>
<td align="char" char=".">.87</td>
</tr>
<tr>
<td>Attribute Seriation</td>
<td align="char" char=".">.65</td>
<td align="char" char=".">.68</td>
<td align="char" char=".">.64</td>
</tr>
<tr>
<td>Perspective Visualization</td>
<td align="char" char=".">.42</td>
<td align="char" char=".">.71</td>
<td align="char" char=".">.76</td>
</tr>
<tr style="grey-border-top">
<th colspan="4">Dimension</th>
</tr>
<tr>
<td>Numerical Thinking</td>
<td align="char" char=".">.65</td>
<td align="char" char=".">.82</td>
<td align="char" char=".">.87</td>
</tr>
<tr>
<td>Visuospatial Reasoning</td>
<td align="char" char=".">.69</td>
<td align="char" char=".">.82</td>
<td align="char" char=".">.87</td>
</tr>
</tbody>
</table>
</table-wrap></sec>
<sec><title>Measurement Invariance</title>
<p>Finally, we evaluated whether the two factors underlying the TPM show measurement invariance between boys and girls, that is to say, if the factor structure is comparable between these two groups (<xref ref-type="bibr" rid="r27">Kline, 2023</xref>). There are different measurement invariance levels: configural, metric, scalar, and strict (<xref ref-type="bibr" rid="r5">Bialosiewicz et al., 2013</xref>). Each of these introduces additional requirements to the previous ones. In configural invariance, both groups exhibit the same factor structure. In metric invariance, factor loadings are equal between groups. In scalar invariance, factor loadings and intercepts are equal between groups. Finally, in strict invariance, factor loadings, intercepts, and residuals are equal between groups. The data should display at least scalar invariance to meaningfully compare factor means for boys and girls.</p>
<p>We computed four multigroup (boys/girls) CFAs, one per each invariance level. A chi-square difference test revealed no significant difference between the configural invariance model and the metric invariance model (χ<sup>2</sup><sub>diff</sub>(5) = 6.3, <italic>p</italic> = .27), and a significant difference between the metric invariance model and the scalar invariance one (χ<sup>2</sup><sub>diff</sub>(5) = 20.6, <italic>p</italic>&nbsp;= .001). In addition, the metric invariance model presented a good degree of fit: RMSEA = .033, SRMR = .033, CFI = .993, TLI = .990, χ<sup>2</sup>(31) = 41.8, <italic>p</italic> = .09. Altogether, this suggested that the two-factor model for the TPM exhibits metric invariance.</p>
<p>We analyzed each factor's measurement invariance separately to shed further light on the TPM’s properties. For the Numerical Thinking factor, the chi-square difference test revealed no significant differences between models (χ<sup>2</sup><sub>diff</sub>(3) = 3.3, <italic>p</italic> = .34; χ<sup>2</sup><sub>diff</sub>(3) = 3.5, <italic>p</italic> = .32; and χ<sup>2</sup><sub>diff</sub>(4) = 7.8, <italic>p</italic> = .10, respectively). Since the strict invariance model displayed a good fit (RMSEA = .029, SRMR = .040, CFI = .993, TLI = .994, χ<sup>2</sup>(14) = 17.9, <italic>p</italic> = .21), we concluded that this factor exhibits strict invariance. Instead, for the Visuospatial Reasoning factor, the chi-square difference test indicated no significant difference between the configural invariance and the metric invariance models (χ<sup>2</sup><sub>diff</sub>(2) = 2.2, <italic>p</italic> = .34), and a significant difference between the metric invariance and the scalar invariance models (χ<sup>2</sup><sub>diff</sub>(2) = 17.8, <italic>p</italic> = .0001). Together with the good fit indices of the metric invariance model (RMSEA = .016, SRMR = .021, CFI = .999, TLI = .999, χ<sup>2</sup>(2) = 2.2, <italic>p</italic> = .34), we concluded that this factor shows metric invariance.</p>
<p>In summary, the measurement invariance analysis suggests that the TPM’s Numerical Thinking factor is measured with the same degree of precision for both boys and girls (<xref ref-type="bibr" rid="r27">Kline, 2023</xref>). In contrast, the Visuospatial Reasoning factor only exhibited metric invariance, indicating that means between boys and girls should not be directly compared.</p></sec>
<sec><title>Percentile Scales</title>
<p>In the Supplementary Materials, we provided percentile scales for the Numerical Thinking and Visuospatial Reasoning dimension scores and total TPM scores for the entire sample (<xref ref-type="bibr" rid="sp1_r2">Navarrete-Ulloa et al., 2025S-b</xref>, Table S5). We also provide percentile scales for the PK (Table S6), K (Table S7), and G1 (Table S8) subsamples.</p></sec></sec>
<sec sec-type="discussion"><title>Discussion</title>
<p>The assessment of mathematical skills in childhood is challenging. Most affordable strategies rely on recording observations during long periods of the learning process. Although there are standardized assessments, they tend to be slow and costly due to the need for individual mediation of early-aged learners. These barriers limit the ability of different educational stakeholders to make effective decisions and implement corrective measures promptly. This work introduced the TPM as an instrument to facilitate assessment in the mathematical area of early childhood education. International standards suggest that math learning at this stage should focus on representing, relating, and operating with whole numbers and describing shapes and spatial relationships (<xref ref-type="bibr" rid="r8">CCSS, 2010</xref>). Consistent with these standards, our confirmatory factor analysis shows that the TPM has two dimensions: The first one associated with numerical thinking and the second one with visuospatial reasoning. Primary consistency analyses indicate that TPM scores reflect at least three degrees of mathematical ability and that the difficulty of each game is well-calibrated for the age and ability of the target population. Consequently, the TPM has criterion and construct validity. Furthermore, the reliability analysis (see <xref ref-type="table" rid="t7">Table 7</xref>) indicates that each TPM dimension can be used for individual assessment (at least) for K and G1 levels—though it is less clear whether they can be used in PK for individual evaluations. Regarding the subgroups of boys and girls, the measurement invariance analysis suggests that the TPM measures Numerical Thinking robustly. Nevertheless, the Visuospatial Reasoning measurement may not be comparable between boys and girls. Finally, although the TPM’s application requires mobile devices and headphones, its application in large groups is simple and fast (around 80 minutes), and its results are immediate. Consequently, this first study provides evidence supporting the fact that a technologically-based assessment instrument such as the TPM may be an efficient way to offer high-quality inputs for the quick assessment of math learning in early childhood.</p>
<p>A key challenge for young children’s classroom assessment is that these children’s abilities are underdeveloped and insufficient to answer traditional tests. Most solutions to this problem require individual mediation to apply the assessment, which implies a significant consumption of resources. In Chile, for example, the evaluation process in early childhood education establishments is based on systematic observation and recording observations. Because this process is idiosyncratic to the classroom, its results cannot be used for comparative analysis or the monitoring of learning and remedial interventions. Although there are standardized instruments in Chile such as the Precalculus Test (<xref ref-type="bibr" rid="r32">Milicic &amp; Schmidt, 2011</xref>), the adaptation of the Utrecht Early Mathematics Assessment Test (<xref ref-type="bibr" rid="r9">Cerda Etchepare et al., 2012</xref>), and the Kinder Test (Educa UC), the application of these instruments is traditional, with an adult mediating the interaction; thus, their application is expensive for large groups.</p>
<p>Similarly, at the international level, there are standardized instruments such as the CIM, BP, EGMA, REMA, and TEMA (<xref ref-type="bibr" rid="r13">Dong et al., 2021</xref>; <xref ref-type="bibr" rid="r17">Ginsburg et al., 2016</xref>; <xref ref-type="bibr" rid="r18">Ginsburg &amp; Pappas, 2016</xref>; <xref ref-type="bibr" rid="r22">Hoffman &amp; Grialou, 2005</xref>; <xref ref-type="bibr" rid="r44">Platas et al., 2016</xref>), whose expenses are out of reach for under-resourced communities. The assessment instrument presented in this paper provides a solution that combines efficiency in the assessment process with adequate information delivery. Hence, the TPM could help to empower these communities by providing an assessment alternative that aligns with international standards and requires fewer resources in terms of time and labor. For example, in Chile, only affluent schools can access standardized methods to assess early math skills, as they can afford them. On the other hand, less affluent schools commonly have classrooms with thirty children under the care of a teacher and a teaching assistant, where assessment relies on systematic observation methods. This feature makes it difficult to ensure similar quality standards across public schools for early math education. Although the TPM requires access to the enabling technologies (internet, tablets, etc.), it requires small amounts of teacher labor. Consequently, the TPM would be a valuable tool for educational environments with access to technology where extra professional labor is difficult to obtain.</p>
<p>Additionally, we verified that the play experience guides the assessment, as mandated by pedagogical principles (<xref ref-type="bibr" rid="r21">Hirsh-Pasek, 2009</xref>) and the Chilean early education’s curricular guidelines. As mentioned earlier, the research assistants introduced the TPM as a fun game to play, along with the graphic design that uses animated characters in everyday fantasy contexts and missions to create a play environment during the evaluation process. Throughout the data collection process, several children expressed that they wanted to "play more games" after finishing all the tasks, while several others asked research assistants, "When do you come back to play again?" These memories constitute anecdotal evidence consistent with the TPM having a game-based design that provides an enjoyable play experience during assessment.</p>
<sec><title>Limitations and Future Directions</title>
<p>The participants in this study have demographic characteristics associated with the geographic region where it was carried out, so the results should not be generalized to the population of Chile in general. This point is relevant because although the scores obtained through this assessment instrument can be used to make specific comparisons between students (or schools), the lack of further information at the national level that identifies the levels of competence obtained makes it challenging to interpret the test results objectively. More details about this would enable the interpretation of the results to facilitate educational decision-making relevant to the student's (school's) reality and appropriate to their regional or national environment.</p>
<p>Considering that the children took the first TPM during the first days of the 2022 school year, which coincided with the return to face-to-face classes interrupted by the COVID-19 quarantines, the scores of this first data collection could have reflected atypical mathematical learning trajectories in contrast to an everyday schooling context. We thus decided to present the analysis of the second data collection, expecting it to reflect a more typical mathematical learning trajectory. While overall scores were indeed higher in the second data collection (compare <xref ref-type="fig" rid="f1">Figure 1</xref> and Figure S1, <xref ref-type="bibr" rid="sp1_r2">Navarrete-Ulloa et al., 2025S-b</xref>), this difference might not be only due to math learning and development but also due to children being more familiar with the teaching methodology and school environment, reflected in an improved relation with the teacher and better behavior in the classroom. Children feeling more at ease in the classroom might have provided better conditions to conduct the second TPM assessment and might partly explain the improved performance. Future research should establish whether there is any significant difference in the score distributions relative to cohorts whose educational process is typical. Researchers aiming to generalize the results presented here would need to evaluate a population at this academic level with a stratified sample according to key educational variables such as geographic region, administrative unit, gender, and educational level of participants. Better yet, in the future, a more rigorous validation study could consider better standards of criterion validity and add discriminant validity criteria.</p>
<p>This work presented evidence of reliability and construct validity resulting from data analysis associated with two different data collections. This redundancy is because the first data collection was conducted at the end of a lockdown period of almost two years (due to the COVID-19 health contingency). Consequently, participants lacked school experience, their levels of autonomy were compromised, and their learning trajectories were likely atypical. These factors suggest that the evidence of construct validity and reliability from the first data collection might be underestimated (see <xref ref-type="bibr" rid="sp1_r2">Navarrete-Ulloa et al., 2025S-b</xref>). There is a similar effect for the second data intake, but in the opposite direction, as most students had prior experience with the instrument. This suggests that the evidence associated with the second data collection might be overestimated. By analyzing both data collections, we can ensure that the evidence of construct validity and reliability for the TPM is bounded by the levels of evidence associated with these two data intakes.</p>
<p>The TPM’s design strongly aligned with worldwide curricular guidelines, which point out that prekindergarten and kindergarten levels should spend the majority of time learning numbers, their relations and representations (numerical reasoning), along with spatial concepts, their relations and logic (spatial reasoning) (<xref ref-type="bibr" rid="r1">Achieve, 2010</xref>; <xref ref-type="bibr" rid="r33">MINEDUC, 2018</xref>; <xref ref-type="bibr" rid="r39">NCTM, 2006</xref>). Nevertheless, those researchers and practitioners focused on assessing only numerical skills might want to drop the dimension of spatial reasoning to obtain a shorter version of the TPM which might be more efficient to this aim. Using different frameworks would have suggested measuring other important precursors for arithmetic learning, such as verbal counting and ordinality (e.g., <xref ref-type="bibr" rid="r31">Merkley &amp; Ansari, 2016</xref>). Still, our theoretical framework and the technology constraints prioritized measuring the seven tasks reported in this study. In this context, estimating the number line tasks has been widely used in the infant mathematical learning literature (<xref ref-type="bibr" rid="r46">Ramani &amp; Siegler, 2008</xref>; <xref ref-type="bibr" rid="r52">Siegler &amp; Ramani, 2009</xref>; <xref ref-type="bibr" rid="r51">Siegler et al., 2012</xref>). Our results indicate that the associated task (NumLine) has standardized factor loading λ = 0.35, which is much lower than the loadings associated with the other three number sets. Hence, there is room to improve the TPM by modifying this estimation task. Along the same lines, the invariance measurement analysis indicates that the Visuospatial Reasoning factor shows only metric invariance, implying that at least a subset of these items may be approached differently by boys and girls, so the comparisons of group means may be contaminated (<xref ref-type="bibr" rid="r19">Gregorich, 2006</xref>). Further research is required to understand the origin of such invariance results for this TPM dimension.</p>
<p>Although the TPM technology assesses large groups of children more efficiently, each child experiences an activity of around sixty minutes. This long period of activity might cause exhaustion, and such a factor might have impacted the results presented here. Still, in our experience, the tablet and game-based design succeeded in maintaining children's attention, and we did not see signs of exhaustion in kindergarten. However, some signs of fatigue were observed in prekindergarten. In this context, practitioners might want to apply the two assessment sessions on two separate days, especially for younger children. Additionally, since the TPM does not require a teacher to mediate between a child and their assessment, it cannot capture the learning dimensions that are more readily observable through social interaction with the adult or between children. Concerning criterion validity—that is, the degree of effectiveness with which the TPM detects levels of early mathematical ability—convergent, retrospective, and predictive validity criteria should be studied. For convergent validity, the correlation between the TPM scores and a test of mathematical skills should be estimated, for example, the "TEMA-III battery" (<xref ref-type="bibr" rid="r22">Hoffman &amp; Grialou, 2005</xref>), a widely used international standard of early mathematical skills which has been adapted to Spanish. For retrospective and predictive validity, we suggest a comparison of participants’ TPM scores with their overall school performance in the academic semester immediately prior to (retrospective) and at the end of (predictive) the academic semester in which the TPM assessment is performed. Concerning discriminant validity, it is essential to distinguish whether the TPM measures early mathematical skills’ development or the general cognitive development of boys and girls. For this purpose, a picture vocabulary test such as the PPVT-III may be used, expecting higher correlations between the TPM and TEMA-III and lower correlations between the TPM and PPVT-III.</p></sec>
<sec sec-type="conclusions"><title>Conclusions</title>
<p>This paper highlights the difficulties in conducting mathematical learning assessments in early childhood, especially for under-resourced educational communities with large student groups. Considering international curricular standards for early childhood mathematics, this paper presented the Test of Preschool Mathematics (TPM), which uses automation and gamification technologies to deliver a pleasant, effective, and efficient assessment experience. Although there is room for improvement of the TPM, the current version meets the minimum desirable requirements that could provide valuable information as one of the inputs considered for the assessment of 4- and 5-year-old children’s development of mathematical abilities.</p></sec></sec>
</body>
<back><fn-group><fn fn-type="financial-disclosure">
<p>This work was funded by the Chilean Agency of Research and Development (ANID), grant ANID/FONDEF/IT23I0012 (JN). Additionally, JN was supported by internal grant PI2402 from Universidad de O'Higgins. In addition, DMG, LP, and PD were supported by the grants PIA/Basal FB210005 and PIA/Support 2024 AFB240004; DMG was also supported by the grant Milenio/NCS2021_014.</p></fn></fn-group>
<ref-list><?pagebreak-before?><title>References</title>
<ref id="r1"><mixed-citation publication-type="web">Achieve. (2010). <italic>Comparing the Common Core State Standards in Mathematics and NCTM’s “Curriculum Focal Points”. Achieving the Common Core</italic>. <ext-link ext-link-type="uri" xlink:href="https://eric.ed.gov/?id=ED512110">https://eric.ed.gov/?id=ED512110</ext-link></mixed-citation></ref>
<ref id="r2"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Alsina</surname>, <given-names>Á.</given-names></string-name></person-group> (<year>2021</year>). <article-title>Estableciendo niveles de adquisición de conocimientos matemáticos importantes de 3 a 6 años: Rúbrica ACMI 3-6.</article-title> <source>Edma 0-6: Educación Matemática en la Infancia</source>, <volume>8</volume>(<issue>2</issue>), <fpage>17</fpage>–<lpage>43</lpage>. <pub-id pub-id-type="doi">10.24197/edmain.2.2019.17-43</pub-id></mixed-citation></ref>
<ref id="r3"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Andrews</surname>, <given-names>G.</given-names></string-name>, <string-name name-style="western"><surname>Birney</surname>, <given-names>D.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Halford</surname>, <given-names>G. S.</given-names></string-name></person-group> (<year>2006</year>). <article-title>Relational processing and working memory capacity in comprehension of relative clause sentences.</article-title> <source>Memory &amp; Cognition</source>, <volume>34</volume>(<issue>6</issue>), <fpage>1325</fpage>–<lpage>1340</lpage>. <pub-id pub-id-type="doi">10.3758/BF03193275</pub-id><pub-id pub-id-type="pmid">17225512</pub-id></mixed-citation></ref>
<ref id="r4"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Araya</surname>, <given-names>R.</given-names></string-name>, <string-name name-style="western"><surname>Calfucura</surname>, <given-names>P.</given-names></string-name>, <string-name name-style="western"><surname>Jiménez</surname>, <given-names>A.</given-names></string-name>, <string-name name-style="western"><surname>Aguirre</surname>, <given-names>C.</given-names></string-name>, <string-name name-style="western"><surname>Palavicino</surname>, <given-names>M. A.</given-names></string-name>, <string-name name-style="western"><surname>Lacourly</surname>, <given-names>N.</given-names></string-name>, <string-name name-style="western"><surname>Soto-Andrade</surname>, <given-names>J.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Dartnell</surname>, <given-names>P.</given-names></string-name></person-group> (<year>2010</year>). <article-title>The effect of analogies on learning to solve algebraic equations.</article-title> <source>Pedagogies</source>, <volume>5</volume>(<issue>3</issue>), <fpage>216</fpage>–<lpage>232</lpage>. <pub-id pub-id-type="doi">10.1080/1554480X.2010.486160</pub-id></mixed-citation></ref>
<ref id="r5"><mixed-citation publication-type="book">Bialosiewicz, S., Murphy, K., &amp; Berry, T. (2013). <italic>Do our measures measure up? The critical role of measurement invariance</italic>. Claremont Evaluation Center.</mixed-citation></ref>
<ref id="r6"><mixed-citation publication-type="other">Bynner, J. M., &amp; Parsons, S. (1997). <italic>Does numeracy matter? Evidence from the National Child Development Study on the impact of poor numeracy on adult life</italic>. Basic Skills Agency.</mixed-citation></ref>
<ref id="r7"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Casey</surname>, <given-names>B. M.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Ganley</surname>, <given-names>C. M.</given-names></string-name></person-group> (<year>2021</year>). <article-title>An examination of gender differences in spatial skills and math attitudes in relation to mathematics success: A bio-psycho-social model.</article-title> <source>Developmental Review</source>, <volume>60</volume>, <elocation-id>100963</elocation-id>. <pub-id pub-id-type="doi">10.1016/j.dr.2021.100963</pub-id></mixed-citation></ref>
<ref id="r8"><mixed-citation publication-type="web">CCSS. (2010). <italic>Common Core State Standards for Mathematics</italic>. CCSSO. <ext-link ext-link-type="uri" xlink:href="https://www.nctm.org/uploadedFiles/Standards_and_Positions/Common_Core_State_Standards/Math_Standards.pdf">https://www.nctm.org/uploadedFiles/Standards_and_Positions/Common_Core_State_Standards/Math_Standards.pdf</ext-link></mixed-citation></ref>
<ref id="r9"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Cerda Etchepare</surname>, <given-names>G.</given-names></string-name>, <string-name name-style="western"><surname>Pérez Wilson</surname>, <given-names>C.</given-names></string-name>, <string-name name-style="western"><surname>Moreno Araya</surname>, <given-names>C.</given-names></string-name>, <string-name name-style="western"><surname>Núñez Risco</surname>, <given-names>K.</given-names></string-name>, <string-name name-style="western"><surname>Quezada Herrera</surname>, <given-names>E.</given-names></string-name>, <string-name name-style="western"><surname>Rebolledo Rojas</surname>, <given-names>J.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Sáez Tisnao</surname>, <given-names>S.</given-names></string-name></person-group> (<year>2012</year>). <article-title>Adaptación de la versión española del Test de Evaluación Matemática Temprana de Utrecht en Chile.</article-title> <source>Estudios Pedagógicos (Valdivia)</source>, <volume>38</volume>(<issue>1</issue>), <fpage>235</fpage>–<lpage>253</lpage>. <pub-id pub-id-type="doi">10.4067/S0718-07052012000100014</pub-id></mixed-citation></ref>
<ref id="r10"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Clements</surname>, <given-names>D. H.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Sarama</surname>, <given-names>J.</given-names></string-name></person-group> (<year>2011</year>). <article-title>Early childhood mathematics intervention.</article-title> <source>Science</source>, <volume>333</volume>(<issue>6045</issue>), <fpage>968</fpage>–<lpage>970</lpage>. <pub-id pub-id-type="doi">10.1126/science.1204537</pub-id><pub-id pub-id-type="pmid">21852488</pub-id></mixed-citation></ref>
<ref id="r11"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Clements</surname>, <given-names>D. H.</given-names></string-name>, <string-name name-style="western"><surname>Sarama</surname>, <given-names>J.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Liu</surname>, <given-names>X. H.</given-names></string-name></person-group> (<year>2008</year>). <article-title>Development of a measure of early mathematics achievement using the Rasch model: The Research‐Based Early Maths Assessment.</article-title> <source>Educational Psychology</source>, <volume>28</volume>(<issue>4</issue>), <fpage>457</fpage>–<lpage>482</lpage>. <pub-id pub-id-type="doi">10.1080/01443410701777272</pub-id></mixed-citation></ref>
<ref id="r12"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Davis‐Kean</surname>, <given-names>P. E.</given-names></string-name>, <string-name name-style="western"><surname>Domina</surname>, <given-names>T.</given-names></string-name>, <string-name name-style="western"><surname>Kuhfeld</surname>, <given-names>M.</given-names></string-name>, <string-name name-style="western"><surname>Ellis</surname>, <given-names>A.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Gershoff</surname>, <given-names>E. T.</given-names></string-name></person-group> (<year>2022</year>). <article-title>It matters how you start: Early numeracy mastery predicts high school math course-taking and college attendance.</article-title> <source>Infant and Child Development</source>, <volume>31</volume>(<issue>2</issue>), <elocation-id>e2281</elocation-id>. <pub-id pub-id-type="doi">10.1002/icd.2281</pub-id><pub-id pub-id-type="pmid">38406821</pub-id></mixed-citation></ref>
<ref id="r13"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Dong</surname>, <given-names>Y.</given-names></string-name>, <string-name name-style="western"><surname>Clements</surname>, <given-names>D. H.</given-names></string-name>, <string-name name-style="western"><surname>Day-Hess</surname>, <given-names>C. A.</given-names></string-name>, <string-name name-style="western"><surname>Sarama</surname>, <given-names>J.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Dumas</surname>, <given-names>D.</given-names></string-name></person-group> (<year>2021</year>). <article-title>Measuring early childhood mathematical cognition: Validating and equating two forms of the Research-based Early Mathematics Assessment.</article-title> <source>Journal of Psychoeducational Assessment</source>, <volume>39</volume>(<issue>8</issue>), <fpage>983</fpage>–<lpage>998</lpage>. <pub-id pub-id-type="doi">10.1177/07342829211037195</pub-id></mixed-citation></ref>
<ref id="r14"><mixed-citation publication-type="book">Ebel, R. L., &amp; Frisbie, D. A. (1991). <italic>Essentials of educational measurement.</italic> Prentice Hall.</mixed-citation></ref>
<ref id="r15"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Gentner</surname>, <given-names>D.</given-names></string-name></person-group> (<year>2010</year>). <article-title>Bootstrapping the mind: Analogical processes and symbol systems.</article-title> <source>Cognitive Science</source>, <volume>34</volume>(<issue>5</issue>), <fpage>752</fpage>–<lpage>775</lpage>. <pub-id pub-id-type="doi">10.1111/j.1551-6709.2010.01114.x</pub-id><pub-id pub-id-type="pmid">21564235</pub-id></mixed-citation></ref>
<ref id="r16"><mixed-citation publication-type="book">Gentner, D., &amp; Colhoun, J. (2010). Analogical processes in human thinking and learning. In B. Glatzeder, V. Goel, &amp; A. Müller (Eds.), <italic>Towards a theory of thinking</italic> (pp. 35–48). Springer Berlin Heidelberg. <pub-id pub-id-type="doi">10.1007/978-3-642-03129-8_3</pub-id></mixed-citation></ref>
<ref id="r17"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Ginsburg</surname>, <given-names>H. P.</given-names></string-name>, <string-name name-style="western"><surname>Lee</surname>, <given-names>Y.-S.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Pappas</surname>, <given-names>S.</given-names></string-name></person-group> (<year>2016</year>). <article-title>A research-inspired and computer-guided clinical interview for mathematics assessment: Introduction, reliability and validity.</article-title> <source>ZDM</source>, <volume>48</volume>(<issue>7</issue>), <fpage>1003</fpage>–<lpage>1018</lpage>. <pub-id pub-id-type="doi">10.1007/s11858-016-0794-8</pub-id></mixed-citation></ref>
<ref id="r18"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Ginsburg</surname>, <given-names>H. P.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Pappas</surname>, <given-names>S.</given-names></string-name></person-group> (<year>2016</year>). <article-title>Invitation to the birthday party: Rationale and description.</article-title> <source>ZDM</source>, <volume>48</volume>(<issue>7</issue>), <fpage>947</fpage>–<lpage>960</lpage>. <pub-id pub-id-type="doi">10.1007/s11858-016-0818-4</pub-id></mixed-citation></ref>
<ref id="r19"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Gregorich</surname>, <given-names>S. E.</given-names></string-name></person-group> (<year>2006</year>). <article-title>Do self-report instruments allow meaningful comparisons across diverse population groups? Testing measurement invariance using the Confirmatory Factor Analysis Framework.</article-title> <source>Medical Care</source>, <volume>44</volume>(<issue>11</issue>), <fpage>S78</fpage>–<lpage>S94</lpage>. <pub-id pub-id-type="doi">10.1097/01.mlr.0000245454.12228.8f</pub-id><pub-id pub-id-type="pmid">17060839</pub-id></mixed-citation></ref>
<ref id="r20"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Halford</surname>, <given-names>G. S.</given-names></string-name>, <string-name name-style="western"><surname>Wilson</surname>, <given-names>W. H.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Phillips</surname>, <given-names>S.</given-names></string-name></person-group> (<year>2010</year>). <article-title>Relational knowledge: The foundation of higher cognition.</article-title> <source>Trends in Cognitive Sciences</source>, <volume>14</volume>(<issue>11</issue>), <fpage>497</fpage>–<lpage>505</lpage>. <pub-id pub-id-type="doi">10.1016/j.tics.2010.08.005</pub-id><pub-id pub-id-type="pmid">20884275</pub-id></mixed-citation></ref>
<ref id="r21"><mixed-citation publication-type="book">Hirsh-Pasek, K. (Ed.). (2009). <italic>A mandate for playful learning in preschool: Presenting the evidence</italic>. Oxford University Press.</mixed-citation></ref>
<ref id="r22"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Hoffman</surname>, <given-names>H.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Grialou</surname>, <given-names>T.</given-names></string-name></person-group> (<year>2005</year>). <article-title>Test of Early Mathematics Ability (3rd ed.) by Ginsburg, H. P., &amp; Baroody, A. J. (2003). Austin, TX: PRO-ED.</article-title> <source>Assessment for Effective Intervention</source><italic>,</italic> <volume>30</volume>(<issue>4</issue>), <fpage>57</fpage>-<lpage>60</lpage>. <pub-id pub-id-type="doi">10.1177/073724770503000409</pub-id></mixed-citation></ref>
<ref id="r23"><mixed-citation publication-type="web">Holloway, D., Green, L., &amp; Livingstone, S. (2013). <italic>Zero to eight: Young children and their internet use</italic>. LSE, London: EU Kids Online. <ext-link ext-link-type="uri" xlink:href="https://eprints.lse.ac.uk/52630/1/Zero_to_eight.pdf">https://eprints.lse.ac.uk/52630/1/Zero_to_eight.pdf</ext-link></mixed-citation></ref>
<ref id="r24"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Holm</surname>, <given-names>S.</given-names></string-name></person-group> (<year>1979</year>). <article-title>A simple sequentially rejective multiple test procedure.</article-title> <source>Scandinavian Journal of Statistics</source>, <volume>6</volume>(<issue>2</issue>), <fpage>65</fpage>–<lpage>70</lpage>.</mixed-citation></ref>
<ref id="r25"><mixed-citation publication-type="book">Holyoak, K. J. (2012). Analogy and relational reasoning. In K. J. Holyoak &amp; R. G. Morrison (Eds.), <italic>The Oxford handbook of thinking and reasoning</italic> (pp. 234–259). Oxford University Press.</mixed-citation></ref>
<ref id="r26"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Kalra</surname>, <given-names>P. B.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Richland</surname>, <given-names>L. E.</given-names></string-name></person-group> (<year>2022</year>). <article-title>Relational reasoning: A foundation for higher cognition based on abstraction.</article-title> <source>Mind, Brain and Education</source>, <volume>16</volume>(<issue>2</issue>), <fpage>149</fpage>–<lpage>152</lpage>. <pub-id pub-id-type="doi">10.1111/mbe.12323</pub-id></mixed-citation></ref>
<ref id="r27"><mixed-citation publication-type="web">Kline, R. B. (2023). <italic>Principles and practice of structural equation modeling</italic>. Guilford Press. <ext-link ext-link-type="uri" xlink:href="https://books.google.cl/books?id=t2CvEAAAQBAJ">https://books.google.cl/books?id=t2CvEAAAQBAJ</ext-link></mixed-citation></ref>
<ref id="r28"><mixed-citation publication-type="book">Lakoff, G. (2006). Conceptual metaphor. In D. Geeraerts (Ed.), <italic>Cognitive linguistics: Basic readings</italic> (pp. 185–238). De Gruyter Mouton. <pub-id pub-id-type="doi">10.1515/9783110199901.185</pub-id></mixed-citation></ref>
<ref id="r29"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Lin</surname>, <given-names>H.-P.</given-names></string-name>, <string-name name-style="western"><surname>Chen</surname>, <given-names>K.-L.</given-names></string-name>, <string-name name-style="western"><surname>Chou</surname>, <given-names>W.</given-names></string-name>, <string-name name-style="western"><surname>Yuan</surname>, <given-names>K.-S.</given-names></string-name>, <string-name name-style="western"><surname>Yen</surname>, <given-names>S.-Y.</given-names></string-name>, <string-name name-style="western"><surname>Chen</surname>, <given-names>Y.-S.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Chow</surname>, <given-names>J. C.</given-names></string-name></person-group> (<year>2020</year>). <article-title>Prolonged touch screen device usage is associated with emotional and behavioral problems, but not language delay, in toddlers.</article-title> <source>Infant Behavior and Development</source>, <volume>58</volume>, <elocation-id>101424</elocation-id>. <pub-id pub-id-type="doi">10.1016/j.infbeh.2020.101424</pub-id><pub-id pub-id-type="pmid">32120178</pub-id></mixed-citation></ref>
<ref id="r30"><mixed-citation publication-type="book">Melhuish, E., &amp; Petrogiannis, K. (2006). <italic>Early childhood care &amp; education: International perspectives</italic>. Routledge.</mixed-citation></ref>
<ref id="r31"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Merkley</surname>, <given-names>R.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Ansari</surname>, <given-names>D.</given-names></string-name></person-group> (<year>2016</year>). <article-title>Why numerical symbols count in the development of mathematical skills: Evidence from brain and behavior.</article-title> <source>Current Opinion in Behavioral Sciences</source>, <volume>10</volume>, <fpage>14</fpage>–<lpage>20</lpage>. <pub-id pub-id-type="doi">10.1016/j.cobeha.2016.04.006</pub-id></mixed-citation></ref>
<ref id="r32"><mixed-citation publication-type="book">Milicic, N., &amp; Schmidt, S. (2011). <italic>Manual De La Prueba De Precalculo</italic>. Editorial Universitaria.</mixed-citation></ref>
<ref id="r33"><mixed-citation publication-type="web">MINEDUC. (2018). <italic>Bases Curriculares Educación Parvularia</italic>. Ministerio de Educación de Chile. <ext-link ext-link-type="uri" xlink:href="https://parvularia.mineduc.cl/wp-content/uploads/2019/09/Bases_Curriculares_Ed_Parvularia_2018-1.pdf">https://parvularia.mineduc.cl/wp-content/uploads/2019/09/Bases_Curriculares_Ed_Parvularia_2018-1.pdf</ext-link></mixed-citation></ref>
<ref id="r34"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Navarrete-Ulloa</surname>, <given-names>J. A.</given-names></string-name></person-group> (<year>2024</year>). <article-title>Learning rates: A correction of gain scores to assess math learning interventions.</article-title> <source>Journal of Experimental Education</source>. <comment>Advance online publication</comment>. <pub-id pub-id-type="doi">10.1080/00220973.2024.2352768</pub-id></mixed-citation></ref>
<ref id="r35"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Navarrete</surname>, <given-names>J. A.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Dartnell</surname>, <given-names>P.</given-names></string-name></person-group> (<year>2017</year>). <article-title>Towards a category theory approach to analogy: Analyzing re-representation and acquisition of numerical knowledge.</article-title> <source>PLoS Computational Biology</source>, <volume>13</volume>(<issue>8</issue>), <elocation-id>e1005683</elocation-id>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1005683</pub-id><pub-id pub-id-type="pmid">28841643</pub-id></mixed-citation></ref>
<ref id="r36"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Navarrete</surname>, <given-names>J. A.</given-names></string-name>, <string-name name-style="western"><surname>Gómez</surname>, <given-names>D. M.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Dartnell</surname>, <given-names>P.</given-names></string-name></person-group> (<year>2018</year>). <article-title>Promoting preschoolers’ numerical knowledge through spatial analogies: Numbers’ spatial alignment influences its learning.</article-title> <source>Contemporary Educational Psychology</source>, <volume>54</volume>, <fpage>112</fpage>–<lpage>124</lpage>. <pub-id pub-id-type="doi">10.1016/j.cedpsych.2018.06.006</pub-id></mixed-citation></ref>
<ref id="r37"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Navarrete-Ulloa</surname>, <given-names>J. A.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Munoz-Rubke</surname>, <given-names>F.</given-names></string-name></person-group> (<year>2022</year>). <article-title>Playing board games to learn rational numbers: A proof-of-concept.</article-title> <source>Mind, Brain and Education</source>, <volume>16</volume>(<issue>4</issue>), <fpage>293</fpage>–<lpage>299</lpage>. <pub-id pub-id-type="doi">10.1111/mbe.12335</pub-id></mixed-citation></ref>
<ref id="r38"><mixed-citation publication-type="confproc">Navarrete-Ulloa, J. A., Ponce Pradenas, L., Flores, C. A., &amp; Verschae, R. (2023). An automated assessment of early math abilities based on digital games. In <italic>ICIET 2023: 2023 11th International Conference on Information and Education Technology. Proceedings</italic> (pp. 172–176). <pub-id pub-id-type="doi">10.1109/ICIET56899.2023.10111319</pub-id></mixed-citation></ref>
<ref id="r39"><mixed-citation publication-type="book">NCTM. (2006). <italic>Curriculum focal points for prekindergarten through grade 8 mathematics: A quest for coherence</italic> (2nd print). National Council of Teachers of Mathematics.</mixed-citation></ref>
<ref id="r40"><mixed-citation publication-type="book">Nunnally, J. C., &amp; Bernstein, I. H. (1994). <italic>Psychometric theory</italic> (3rd ed). McGraw-Hill.</mixed-citation></ref>
<ref id="r41"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Outhwaite</surname>, <given-names>L. A.</given-names></string-name>, <string-name name-style="western"><surname>Aunio</surname>, <given-names>P.</given-names></string-name>, <string-name name-style="western"><surname>Leung</surname>, <given-names>J. K. Y.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Van Herwegen</surname>, <given-names>J.</given-names></string-name></person-group> (<year>2024</year>). <article-title>Measuring mathematical skills in early childhood: A systematic review of the psychometric properties of early maths assessments and screeners.</article-title> <source>Educational Psychology Review</source>, <volume>36</volume>(<issue>4</issue>), <elocation-id>110</elocation-id>. <pub-id pub-id-type="doi">10.1007/s10648-024-09950-6</pub-id></mixed-citation></ref>
<ref id="r42"><mixed-citation publication-type="other">Parsons, S., &amp; Bynner, J. (2005). <italic>Does numeracy matter more?</italic> National Research and Development Centre for Adult Literacy and Numeracy.</mixed-citation></ref>
<ref id="r43"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Perez Mejias</surname>, <given-names>P.</given-names></string-name>, <string-name name-style="western"><surname>McAllister</surname>, <given-names>D. E.</given-names></string-name>, <string-name name-style="western"><surname>Diaz</surname>, <given-names>K. G.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Ravest</surname>, <given-names>J.</given-names></string-name></person-group> (<year>2021</year>). <article-title>A longitudinal study of the gender gap in mathematics achievement: Evidence from Chile.</article-title> <source>Educational Studies in Mathematics</source>, <volume>107</volume>(<issue>3</issue>), <fpage>583</fpage>–<lpage>605</lpage>. <pub-id pub-id-type="doi">10.1007/s10649-021-10052-1</pub-id></mixed-citation></ref>
<ref id="r44"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Platas</surname>, <given-names>L. M.</given-names></string-name>, <string-name name-style="western"><surname>Ketterlin-Geller</surname>, <given-names>L. R.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Sitabkhan</surname>, <given-names>Y.</given-names></string-name></person-group> (<year>2016</year>). <article-title>Using an assessment of early mathematical knowledge and skills to inform policy and practice: Examples from the Early Grade Mathematics Assessment.</article-title> <source>International Journal of Education in Mathematics, Science and Technology</source><italic>,</italic> <volume>4</volume>(<issue>3</issue>), <fpage>163</fpage>-<lpage>173</lpage>. <pub-id pub-id-type="doi">10.18404/ijemst.20881</pub-id></mixed-citation></ref>
<ref id="r45"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Ramani</surname>, <given-names>G.</given-names></string-name>, <string-name name-style="western"><surname>Daubert</surname>, <given-names>E.</given-names></string-name>, <string-name name-style="western"><surname>Lin</surname>, <given-names>G.</given-names></string-name>, <string-name name-style="western"><surname>Kamarsu</surname>, <given-names>S.</given-names></string-name>, <string-name name-style="western"><surname>Wodzinski</surname>, <given-names>A.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Jaeggi</surname>, <given-names>S. M.</given-names></string-name></person-group> (<year>2020</year>). <article-title>Racing dragons and remembering aliens: Benefits of playing number and working memory games on kindergartners’ numerical knowledge.</article-title> <source>Developmental Science</source>, <volume>23</volume>(<issue>4</issue>), <elocation-id>e12908</elocation-id>. <pub-id pub-id-type="doi">10.1111/desc.12908</pub-id><pub-id pub-id-type="pmid">31587470</pub-id></mixed-citation></ref>
<ref id="r46"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Ramani</surname>, <given-names>G.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Siegler</surname>, <given-names>R. S.</given-names></string-name></person-group> (<year>2008</year>). <article-title>Promoting broad and stable improvements in low-income children’s numerical knowledge through playing number board games.</article-title> <source>Child Development</source>, <volume>79</volume>(<issue>2</issue>), <fpage>375</fpage>–<lpage>394</lpage>. <pub-id pub-id-type="doi">10.1111/j.1467-8624.2007.01131.x</pub-id><pub-id pub-id-type="pmid">18366429</pub-id></mixed-citation></ref>
<ref id="r47"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Ramani</surname>, <given-names>G.</given-names></string-name>, <string-name name-style="western"><surname>Siegler</surname>, <given-names>R. S.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Hitti</surname>, <given-names>A.</given-names></string-name></person-group> (<year>2012</year>). <article-title>Taking it to the classroom: Number board games as a small group learning activity.</article-title> <source>Journal of Educational Psychology</source>, <volume>104</volume>(<issue>3</issue>), <fpage>661</fpage>–<lpage>672</lpage>. <pub-id pub-id-type="doi">10.1037/a0028995</pub-id></mixed-citation></ref>
<ref id="r48"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Richland</surname>, <given-names>L. E.</given-names></string-name>, <string-name name-style="western"><surname>Holyoak</surname>, <given-names>K. J.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Stigler</surname>, <given-names>J. W.</given-names></string-name></person-group> (<year>2004</year>). <article-title>Analogy use in eighth-grade mathematics classrooms.</article-title> <source>Cognition and Instruction</source>, <volume>22</volume>(<issue>1</issue>), <fpage>37</fpage>–<lpage>60</lpage>. <pub-id pub-id-type="doi">10.1207/s1532690Xci2201_2</pub-id></mixed-citation></ref>
<ref id="r49"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Richland</surname>, <given-names>L. E.</given-names></string-name>, &amp; <string-name name-style="western"><surname>McDonough</surname>, <given-names>I. M.</given-names></string-name></person-group> (<year>2010</year>). <article-title>Learning by analogy: Discriminating between potential analogs.</article-title> <source>Contemporary Educational Psychology</source>, <volume>35</volume>(<issue>1</issue>), <fpage>28</fpage>–<lpage>43</lpage>. <pub-id pub-id-type="doi">10.1016/j.cedpsych.2009.09.001</pub-id></mixed-citation></ref>
<ref id="r50"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Richland</surname>, <given-names>L. E.</given-names></string-name>, <string-name name-style="western"><surname>Stigler</surname>, <given-names>J. W.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Holyoak</surname>, <given-names>K. J.</given-names></string-name></person-group> (<year>2012</year>). <article-title>Teaching the conceptual structure of mathematics.</article-title> <source>Educational Psychologist</source>, <volume>47</volume>(<issue>3</issue>), <fpage>189</fpage>–<lpage>203</lpage>. <pub-id pub-id-type="doi">10.1080/00461520.2012.667065</pub-id></mixed-citation></ref>
<ref id="r51"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Siegler</surname>, <given-names>R. S.</given-names></string-name>, <string-name name-style="western"><surname>Duncan</surname>, <given-names>G.</given-names></string-name>, <string-name name-style="western"><surname>Davis-Kean</surname>, <given-names>P.</given-names></string-name>, <string-name name-style="western"><surname>Duckworth</surname>, <given-names>K.</given-names></string-name>, <string-name name-style="western"><surname>Claessens</surname>, <given-names>A.</given-names></string-name>, <string-name name-style="western"><surname>Engel</surname>, <given-names>M.</given-names></string-name>, <string-name name-style="western"><surname>Susperreguy</surname>, <given-names>M. I.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Chen</surname>, <given-names>M.</given-names></string-name></person-group> (<year>2012</year>). <article-title>Early predictors of high school mathematics achievement.</article-title> <source>Psychological Science</source>, <volume>23</volume>(<issue>7</issue>), <fpage>691</fpage>–<lpage>697</lpage>. <pub-id pub-id-type="doi">10.1177/0956797612440101</pub-id><pub-id pub-id-type="pmid">22700332</pub-id></mixed-citation></ref>
<ref id="r52"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Siegler</surname>, <given-names>R. S.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Ramani</surname>, <given-names>G.</given-names></string-name></person-group> (<year>2009</year>). <article-title>Playing linear number board games—But not circular ones—Improves low-income preschoolers’ numerical understanding.</article-title> <source>Journal of Educational Psychology</source>, <volume>101</volume>(<issue>3</issue>), <fpage>545</fpage>–<lpage>560</lpage>. <pub-id pub-id-type="doi">10.1037/a0014239</pub-id></mixed-citation></ref>
<ref id="r53"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Spelke</surname>, <given-names>E. S.</given-names></string-name></person-group> (<year>2005</year>). <article-title>Sex differences in intrinsic aptitude for mathematics and science? A critical review.</article-title> <source>The American Psychologist</source>, <volume>60</volume>(<issue>9</issue>), <fpage>950</fpage>–<lpage>958</lpage>. <pub-id pub-id-type="doi">10.1037/0003-066X.60.9.950</pub-id><pub-id pub-id-type="pmid">16366817</pub-id></mixed-citation></ref>
<ref id="r54"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Strasser</surname>, <given-names>K.</given-names></string-name>, <string-name name-style="western"><surname>Lissi</surname>, <given-names>M. R.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Silva</surname>, <given-names>M.</given-names></string-name></person-group> (<year>2009</year>). <article-title>Gestión del Tiempo en 12 Salas Chilenas de Kindergarten: Recreo, Colación y Algo de Instrucción.</article-title> <source>Psykhe (Santiago)</source>, <volume>18</volume>(<issue>1</issue>), <fpage>85</fpage>–<lpage>96</lpage>. <pub-id pub-id-type="doi">10.4067/S0718-22282009000100008</pub-id></mixed-citation></ref>
<ref id="r55"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Watts</surname>, <given-names>T. W.</given-names></string-name>, <string-name name-style="western"><surname>Clements</surname>, <given-names>D. H.</given-names></string-name>, <string-name name-style="western"><surname>Sarama</surname>, <given-names>J.</given-names></string-name>, <string-name name-style="western"><surname>Wolfe</surname>, <given-names>C. B.</given-names></string-name>, <string-name name-style="western"><surname>Spitler</surname>, <given-names>M. E.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Bailey</surname>, <given-names>D. H.</given-names></string-name></person-group> (<year>2017</year>). <article-title>Does early mathematics intervention change the processes underlying children’s learning?</article-title> <source>Journal of Research on Educational Effectiveness</source>, <volume>10</volume>(<issue>1</issue>), <fpage>96</fpage>–<lpage>115</lpage>. <pub-id pub-id-type="doi">10.1080/19345747.2016.1204640</pub-id><pub-id pub-id-type="pmid">29399243</pub-id></mixed-citation></ref>
<ref id="r56"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Watts</surname>, <given-names>T. W.</given-names></string-name>, <string-name name-style="western"><surname>Duncan</surname>, <given-names>G. J.</given-names></string-name>, <string-name name-style="western"><surname>Clements</surname>, <given-names>D. H.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Sarama</surname>, <given-names>J.</given-names></string-name></person-group> (<year>2018</year>). <article-title>What is the long-run impact of learning mathematics during preschool?</article-title> <source>Child Development</source>, <volume>89</volume>(<issue>2</issue>), <fpage>539</fpage>–<lpage>555</lpage>. <pub-id pub-id-type="doi">10.1111/cdev.12713</pub-id><pub-id pub-id-type="pmid">28105650</pub-id></mixed-citation></ref>
<ref id="r57"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Watts</surname>, <given-names>T. W.</given-names></string-name>, <string-name name-style="western"><surname>Duncan</surname>, <given-names>G. J.</given-names></string-name>, <string-name name-style="western"><surname>Siegler</surname>, <given-names>R. S.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Davis-Kean</surname>, <given-names>P. E.</given-names></string-name></person-group> (<year>2014</year>). <article-title>What’s past is prologue: Relations between early mathematics knowledge and high school achievement.</article-title> <source>Educational Researcher</source>, <volume>43</volume>(<issue>7</issue>), <fpage>352</fpage>–<lpage>360</lpage>. <pub-id pub-id-type="doi">10.3102/0013189X14553660</pub-id><pub-id pub-id-type="pmid">26806961</pub-id></mixed-citation></ref>
<ref id="r58"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Zainuddin</surname>, <given-names>Z.</given-names></string-name>, <string-name name-style="western"><surname>Chu</surname>, <given-names>S. K. W.</given-names></string-name>, <string-name name-style="western"><surname>Shujahat</surname>, <given-names>M.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Perera</surname>, <given-names>C. J.</given-names></string-name></person-group> (<year>2020</year>). <article-title>The impact of gamification on learning and instruction: A systematic review of empirical evidence.</article-title> <source>Educational Research Review</source>, <volume>30</volume>, <elocation-id>100326</elocation-id>. <pub-id pub-id-type="doi">10.1016/j.edurev.2020.100326</pub-id></mixed-citation></ref>
<ref id="r59"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Zhu</surname>, <given-names>J.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Chiu</surname>, <given-names>M. M.</given-names></string-name></person-group> (<year>2019</year>). <article-title>Early home numeracy activities and later mathematics achievement: Early numeracy, interest, and self-efficacy as mediators.</article-title> <source>Educational Studies in Mathematics</source>, <volume>102</volume>(<issue>2</issue>), <fpage>173</fpage>–<lpage>191</lpage>. <pub-id pub-id-type="doi">10.1007/s10649-019-09906-6</pub-id></mixed-citation></ref>
</ref-list>
	<sec sec-type="data-availability" id="das"><title>Data Availability</title>
		<p>The data and analysis script of this study are publicly available (see <xref ref-type="bibr" rid="sp1_r1">Navarrete-Ulloa et al., 2025S-a</xref>).</p>
	</sec>
	<sec sec-type="supplementary-material" id="sp1">
        <title>Supplementary Materials</title>
		<p>The Supplementary Materials contain the following items:</p>
<list id="L1" list-type="bullet">
<list-item><p>The data and analysis script (<xref ref-type="bibr" rid="sp1_r1">Navarrete-Ulloa et al., 2025S-a</xref>)</p></list-item>
<list-item><p>Additional materials (<xref ref-type="bibr" rid="sp1_r2">Navarrete-Ulloa et al., 2025S-b</xref>): These materials provide additional details and analyses related to the TPM (Test of Preschool Mathematics) assessment, which is aligned with the Chilean curriculum and others (see the paper’s main text). The supplementary materials enhance the transparency and comprehensiveness of the TPM assessment by providing detailed mappings, transcripts, and additional analyses. The inclusion of the T1 data analysis complements the T2 findings, offering a more complete picture of the research outcomes. The percentile scales and correlation data help educators and researchers interpret the assessment results in a meaningful way.</p></list-item>
</list>
		<ref-list content-type="supplementary-material" id="suppl-ref-list">
			<ref id="sp1_r1">
				<mixed-citation publication-type="supplementary-material">
					<person-group person-group-type="author">
							<name name-style="western">
								<surname>Navarrete-Ulloa</surname>
								<given-names>J. A.</given-names>
							</name>
							<name name-style="western">
								<surname>Gómez</surname>
								<given-names>D. M.</given-names>
							</name>
							<name name-style="western">
								<surname>Ponce</surname>
								<given-names>L.</given-names>
							</name>
							<name name-style="western">
								<surname>Munoz-Rubke</surname>
								<given-names>F.</given-names>
							</name>
							<name name-style="western">
								<surname>Dartnell</surname>
								<given-names>P. R.</given-names>
							</name>
					</person-group> (<year>2025</year><comment>S-a</comment>). <source>Supplementary materials to "Assessing early math skills in preschoolers by using digital games"</source> <comment>[Research data and code]</comment>. <publisher-name>OSF</publisher-name>. <pub-id pub-id-type="doi" xlink:href="https://doi.org/10.17605/OSF.IO/87XVQ">10.17605/OSF.IO/87XVQ</pub-id>		
				</mixed-citation>
			</ref>
<ref id="sp1_r2">
				<mixed-citation publication-type="supplementary-material">
					<person-group person-group-type="author">
							<name name-style="western">
								<surname>Navarrete-Ulloa</surname>
								<given-names>J. A.</given-names>
							</name>
							<name name-style="western">
								<surname>Gómez</surname>
								<given-names>D. M.</given-names>
							</name>
							<name name-style="western">
								<surname>Ponce</surname>
								<given-names>L.</given-names>
							</name>
							<name name-style="western">
								<surname>Munoz-Rubke</surname>
								<given-names>F.</given-names>
							</name>
							<name name-style="western">
								<surname>Dartnell</surname>
								<given-names>P. R.</given-names>
							</name>
					</person-group> (<year>2025</year><comment>S-b</comment>). <source>Supplementary materials to "Assessing early math skills in preschoolers by using digital games"</source> <comment>[Additional materials]</comment>. <publisher-name>PsychOpen GOLD</publisher-name>. <pub-id pub-id-type="doi" xlink:href="https://doi.org/10.23668/psycharchives.16191">10.23668/psycharchives.16191</pub-id>		
				</mixed-citation>
			</ref>
		</ref-list>
	</sec>
<fn-group>
<fn fn-type="conflict"><p>The authors have declared that no competing interests exist.</p></fn>
</fn-group>
<notes>
   <title>Related Versions</title>
   <p>The present work is related to a prior proceedings paper presented at the 11<sup>th</sup> International Conference on Information and Education Technology (<xref ref-type="bibr" rid="r38">Navarrete-Ulloa et al., 2023</xref>). The proceedings paper was focused on the software architecture of the TPM and an exploratory analysis of the data associated to the numerical reasoning dimension.</p>
</notes>
<ack>
<p>The authors have no additional (i.e., non-financial) support to report.</p>
</ack>
</back>
</article>