diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a2be9e2..401c293 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,8 @@ +--- default_language_version: python: python repos: -- repo: https://github.com/pre-commit/pre-commit-hooks + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.5.0 hooks: - id: check-merge-conflict @@ -15,16 +16,20 @@ repos: exclude: ChangeLog-spell-corrected.diff - id: check-json exclude: mathics_scanner/data/character-tables.json -- repo: https://github.com/pycqa/isort + - repo: https://github.com/pycqa/isort rev: 5.13.2 hooks: - id: isort stages: [pre-commit] args: ["--profile", "black"] -- repo: https://github.com/psf/black + - repo: https://github.com/psf/black rev: 25.11.0 hooks: - id: black language_version: python3 stages: [pre-commit] exclude: version.py + - repo: https://github.com/python-jsonschema/check-jsonschema + rev: 0.31.0 # Use the latest version + hooks: + - id: check-github-workflows diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 903be57..13a597d 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -13,7 +13,7 @@ build: # Build documentation in the docs/ directory with Sphinx sphinx: - configuration: docs/build/conf.py + configuration: docs/source/conf.py # We recommend specifying your dependencies to enable reproducible builds: # https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html diff --git a/.yamllint b/.yamllint new file mode 100644 index 0000000..1052c4f --- /dev/null +++ b/.yamllint @@ -0,0 +1,4 @@ +# -*- conf -*- +# .yamllint configuration file +rules: + line-length: disable diff --git a/CHANGES.rst b/CHANGES.md similarity index 67% rename from CHANGES.rst rename to CHANGES.md index 8e823f6..0ba158a 100644 --- a/CHANGES.rst +++ b/CHANGES.md @@ -1,66 +1,60 @@ -CHANGES -======= +# CHANGES -10.0.0 ------- +## 10.0.1 + +May 6, 2026 + +Revise due to packaging missing a yaml file. Some small changes since release have been added too. + +## 10.0.0 April 18, 2026 -Supports Python 3.14. Python 3.10 support has been dropped. While Python 3.10 may still work, it is not supported. +Supports Python 3.14. Python 3.10 support has been dropped. While Python 3.10 may still work, but it is not supported. -More YAML/JSON tables added. mathics-core now imports values via ``mathics.characters``. Previously it -created JSON files on its own. +More YAML/JSON tables added. mathics-core now imports values via `mathics.characters`. Previously, it created JSON files on its own. -#. `PR #173 `_ Python 3.14 supported. Python 3.10 support has been dropped. (It may still work, but is not supported.) -#. `PR #158 `_ Name Pattern token introduced to handle ``?`` and ``??`` operands. -#. `PR #156 `_ Token names align more closely with ``CodeParser`Tokenize``. -#. `PR #149 `_ Add YAML/JSON table for box grouping characters -#. `PR #147 `_ Disambiguate Unicode for \[Rule] and \[DirectedEdge]. -#. `PR #145 `_ Add Unicode to ASCII conversion table to named characters +1. [PR \#158](https://github.com/Mathics3/mathics-core/pull/158) Name Pattern token introduced to handle `?` and `??` operands. +1. [PR \#156](https://github.com/Mathics3/mathics-core/pull/156) Token names align more closely with `` CodeParser`Tokenize ``. +4. [PR \#149](https://github.com/Mathics3/mathics-core/pull/142) Add YAML/JSON table for box grouping characters +5. [PR \#147](https://github.com/Mathics3/mathics-core/pull/147) Disambiguate Unicode for \[Rule\] and \[DirectedEdge\]. +6. [PR \#145](https://github.com/Mathics3/mathics-core/pull/145) Add Unicode to ASCII conversion table to named characters -Corrections to YAML table entries, e.g. "latex" field additional/corrections in named characters. +Corrections to YAML table entries, e.g., "latex" field; additional/corrections in named characters. Numerous spelling corrections were performed over the code. -The name ``Mathics3`` replaces ``Mathics``. ``Mathics`` was the monolithic -Python 2-ish code. Mathics3 has rewritten a number of major +The name `Mathics3` replaces `Mathics`. `Mathics` was the monolithic Python 2-ish code. Mathics3 has rewritten several major subcomponents, and the scanner and the character and operators tables in YAML are now in this repository. + 2.0.0 ----- +August 28, 2025 + Supports Python 3.13. Python 3.8 and 3.9 support has been dropped. Note: There are incompatible changes. Use with Mathics-core 9.0.0 or greater. -Support for saving token position information was started. This is in -module ``mathics_scanner.location``. Additional location information is saved when -``mathics_scanner.location.TRACK_LOCATIONS`` is set to ``True``. +Support for saving token position information was started. This is in module ``mathics_scanner.location``. Additional location information is saved when ``mathics_scanner.location.TRACK_LOCATIONS`` is set to ``True``. -Scanning was revised to handle more kinds of escape sequences and to -be sensitive to their placement inside and outside a string. This is in module ``mathics_scanner.escape_sequences``. +Scanning was revised to handle more kinds of escape sequences and to be sensitive to their placement inside and outside a string. This is in module ``mathics_scanner.escape_sequences``. Tokenization support for Box Input operator ``\*`` was added. -Small YML changes were made to reflect a better understanding of boxing -operators. In particular, ``\*`` is not a (prefix) operator. - -More AMSLaTeX translations added for named characters. - -Handle escape sequences in string literals. +Small YML changes were made to reflect a better understanding of boxing operators. In particular, ``\*`` is not a (prefix) operator. Internals -+++++++++ +--------- -Mathics3 scanner exceptions of class TranslateError are incompatible -with previous versions, and now store error parameters, "name", "tag", and -"args". +Mathics3 scanner exceptions of class TranslateError are incompatible with previous versions, and now store error parameters, "name", "tag", and "args". Bugs Fixed -++++++++++ +---------- -#. #125 Hex escape sequence in string literal doesn't work. -#. #136 Is YAML file valid? +* #125 Hex escape sequence in string literal doesn't work. +* #136 Is the YAML file valid? 1.4.1 ----- @@ -68,7 +62,7 @@ Bugs Fixed Jan 26, 2025 -Re-release to include ``operators.yml`` into the tarball/wheel. +Re-release to include `operators.yml` into the tarball/wheel. 1.4.0 @@ -106,17 +100,19 @@ Operator precedence values have been gone over. Aug 9, 2024 -Python 3.8 is now the minimum Python supported. Python 3.12 is supported. +Python 3.8 is now the minimum Python supported. Python 3.12 supported. Various dependencies elsewhere force 3.8 or newer. * Packaging was redone to be able to support Python 3.12. -* Files now follow current Python black formatting and ``isort`` import ordering -* Some Python code linting +* Files now follow current Python black formatting and isort import ordering +* Some Python code linting. 1.3.0 ------ +Feb 25, 2025 + * Add escape-code sequence for 32-bit Unicode. Issue #48. * Correct ``Infix`` and ``Tilde`` character symbols * Support double backslash (``\\``) as a single backslash character (``\``). @@ -135,10 +131,11 @@ Various dependencies elsewhere force 3.8 or newer. ----- * Start adding AMSLateX names. -* Add ``ApplyTo``, and ``Factorial2``. -* Adjust ``Tilde``, and ``Factorial``. +* Add `ApplyTo` +* Add `Factorial2`. PR #30 +* Adjust `Tilde`, and `Factorial`. * Regularize Unicode equivalents. -* Add named-characters.yml to distribution packages; Issue #32. +* Add `named-characters.yml `to distribution packages; Issue #32. * Use SPDX identifier in license; PR #31. 1.2.2-1.2.3 @@ -152,10 +149,10 @@ Many thanks to Victor the packager for AUR for pointing this out. ----- * Add tables for operator precedence. -* Start to add AMSLaTeX symbols. (A future release will finish this) -* Revise ``README.rst``. -* Some small corrections: ``Implies`` -* Make ``ujson`` optional +* Start to add AMSLaTeX symbols. (A future release will finish this.) +* Revise `README.rst.` +* Some small corrections: `Implies`. +* Make `ujson ` optional. 1.2.0 diff --git a/ChangeLog-spell-corrected.diff b/ChangeLog-spell-corrected.diff index ff5c7c6..d3266cd 100644 --- a/ChangeLog-spell-corrected.diff +++ b/ChangeLog-spell-corrected.diff @@ -1,6 +1,15 @@ ---- ChangeLog 2026-04-08 21:05:39.299635995 -0400 -+++ ChangeLog-spell-corrected 2026-04-08 21:08:32.699729697 -0400 -@@ -63,7 +63,7 @@ +--- ChangeLog 2026-05-06 20:24:55.127918087 -0400 ++++ ChangeLog-spell-corrected 2026-05-06 20:16:36.177300260 -0400 +@@ -24,7 +24,7 @@ + 2026-05-04 rocky + + * .pre-commit-config.yaml, .yamllint, mathics_scanner/location.py, +- pyproject.toml: Mostly administative stuff location.py: guard against no filename (happens in an interactive ++ pyproject.toml: Mostly administrative stuff location.py: guard against no filename (happens in an interactive + session) Add YAML lint checking, especially for GitHub CI work YAML. + + 2026-04-19 R. Bernstein +@@ -159,7 +159,7 @@ 2026-03-21 R. Bernstein @@ -9,7 +18,7 @@ Remove mention of Slot* as operators (#164) Right now, this interacts badly with ToString and Boxing in mathics-core When that's addressed, we should reinstate these. -@@ -99,7 +99,7 @@ +@@ -195,7 +195,7 @@ 2026-03-15 rocky @@ -18,7 +27,7 @@ Revert associativity of Pattern!? 2026-03-15 rocky -@@ -166,7 +166,7 @@ +@@ -262,7 +262,7 @@ * .github/workflows/mathics3-doctest.yml, mathics_scanner/tokeniser.py, test/test_tokeniser.py: Follow CodeTokenize Token names more closely (#156) Use `CodeTokenize` token names more often. In particular: * Raw{Left,Right}Parenthesis->{Open,Close}Paren * @@ -27,7 +36,7 @@ 2026-03-08 R. Bernstein -@@ -254,7 +254,7 @@ +@@ -350,7 +350,7 @@ 2026-02-07 R. Bernstein * mathics_scanner/data/grouping-characters.yml, @@ -36,7 +45,7 @@ First cut at grouping information (#149) 2026-02-06 R. Bernstein -@@ -271,7 +271,7 @@ +@@ -367,7 +367,7 @@ * mathics_scanner/characters.py, mathics_scanner/data/named-characters.yml, @@ -45,7 +54,7 @@ Reassign Unicode for `\[Rule]` and `\[DirectedEdge]` (#147) Reassign Unicode assignments for `\[Rule]` and `\[DirectedEdge]` which shared the Unicode for `\[RightArrow]`. `\[Rule]`, `\[RightArrow]`, and `\[DirectedEdge]` need distinct Unicode and glyph symbols. Use a Unicode glyph that matches the -@@ -445,7 +445,7 @@ +@@ -541,7 +541,7 @@ Tokenizer.get_more_input * Bang more on mathics3-tokens Start to show syntax errors. * Start going over error messages... In particular errors with octal digits and incomplete named errors. Go over docstrings in escape_sequences.py * Improve error handling... and add more tests. * Improve scanner... named-characters.yml: \[Mu] is letterlike tokeniser.py: Correct identifier or pattern for those having letterlike escape sequences * Handle EscapSequence errors better * Handle embedded escape sequences in Symbols... and also add Theta to the list of letterlike symbols * WIP - bang on Symbol tokenization with backslash Replace .format() with f-strings. Add comments around Symbol @@ -54,7 +63,7 @@ smaller chunks. * Small bugs related to escape-character handling NamedChracterSyntax should be a new-style TranslateError self.code -> self.source_text misc sntx_message() fixes. Document better. * Use git branch for testing Mathics * Revise Scanner error exception class TranslateError, TranslateErrorNew, ScanError now become ScannerError * Let's use 3.12 in CI testing it should be just a little bit faster (and it is more modern) * Small tidying changes to comments * ScannerError -> SyntaxError Use more direct and simpler error class name that is is more like its other subclassed errors. * More tests * One more escape test * Allow escape space "\ " + more string tests * Start unit test for comments * Fix a doc spelling typo + minor doc tweak * invalid escape sequences inside strings... An invalid escape sequence inside a string, like "\(a \+\)" is not -@@ -463,7 +463,7 @@ +@@ -559,7 +559,7 @@ mathics_scanner/characters.py, mathics_scanner/data/named-characters.yml, mathics_scanner/errors.py, mathics_scanner/mathics3_tokens.py, @@ -63,7 +72,7 @@ Preparatory changes for prescanner removal (#127) To handle escape sequences better, such as ignoring them in comments, branch revise-escape-sequence-scanning was started as more major refactor. However, that has become too large and is too hard to get right. -@@ -550,7 +550,7 @@ +@@ -646,7 +646,7 @@ 2025-01-18 R. Bernstein * mathics_scanner/mathics3_tokens.py, mathics_scanner/tokeniser.py, @@ -72,7 +81,7 @@ Add mathics tokens cli (#115) * command-line routine showing Mathics3 tokenization * Add --CodeTokenize option . Option --CodeTokenize (-C) on mathics3-tokens shows tokens more like it is shown in CodeParser's CodeTokenize. * Remove line number in In[] and Out[]. We don't have a % retrieval -@@ -558,7 +558,7 @@ +@@ -654,7 +654,7 @@ 2025-01-15 R. Bernstein @@ -81,7 +90,7 @@ \smallmid. more unicod refs.. (#118) Remove note 5 which is was wrong, and probably comes from the days when I was combining operators with character symbols which is wrong. Possibly in the future, this will be reworked so Operators (like -@@ -580,7 +580,7 @@ +@@ -676,7 +676,7 @@ 2025-01-08 rocky @@ -90,7 +99,7 @@ operator-precedence -> operator-precedences Note: Mathics-core will be broken until this is synchronized. 2025-01-08 rocky -@@ -599,12 +599,12 @@ +@@ -695,12 +695,12 @@ test_character_table_consistency.py}, test/test_general_yaml_sanity.py, test/test_operators.py: Add operator-to-amslatex and builtin-constants... (#114) operator-to-amslatex gives the AMSLaTeX string for unicode operator @@ -105,7 +114,7 @@ Correct FractionBox and other small changes (#113) 2025-01-03 R. Bernstein -@@ -647,7 +647,7 @@ +@@ -743,7 +743,7 @@ 2024-12-03 R. Bernstein * mathics_scanner/data/named-characters.yml, @@ -114,7 +123,7 @@ operator tests yet again; update YAML comments... (#106) * Test_mathics_precedence needs not to fail work when mathics is installed. Tweak failure messages * Update comments in character and operators YAML files. * Remove Parse field in operators YML. 2024-12-03 Juan Mauricio Matera -@@ -713,10 +713,10 @@ +@@ -809,10 +809,10 @@ mathics_scanner/data/operators.yml, mathics_scanner/generate/operator_csv_to_yml.py, mathics_scanner/tokeniser.py: tokenizer gets no-meaning infix ops @@ -127,7 +136,7 @@ intoseparate variables for each type they can hold. * Scanner uses operators json mm (#88, #89) * Change initialization of the tokenizer --------- Co-authored-by: Juan Mauricio Matera 2024-11-23 rocky -@@ -745,7 +745,7 @@ +@@ -841,7 +841,7 @@ mathics_scanner/generate/build_operator_tables.py, test/test_operators.py: Include no-meaning operators in JSON op extraction (#85) Include no-meaning operators in JSON op extraction, separated by @@ -136,7 +145,7 @@ 2024-11-18 R. Bernstein -@@ -787,8 +787,8 @@ +@@ -883,8 +883,8 @@ 2024-09-17 R. Bernstein * mathics_scanner/data/named-characters.yml, @@ -147,7 +156,7 @@ 2024-09-09 R. Bernstein -@@ -972,7 +972,7 @@ +@@ -1068,7 +1068,7 @@ 2023-12-17 rocky * admin-tools/pyenv-versions, @@ -156,7 +165,7 @@ Administrivia: typos and bump versions 2023-08-23 rocky -@@ -1075,7 +1075,7 @@ +@@ -1171,7 +1171,7 @@ test/test_general_yaml_sanity.py, test/test_has_unicode_inverse_sanity.py, test/test_table_consistency.py, test/test_unicode_equivalent.py: Go @@ -165,7 +174,7 @@ "unicode-equivalent" there is no unicode-equivalent, but has-unicode-inverse is set. * Add test that unicode-equivalent isn't the same things as ascii -@@ -1201,7 +1201,7 @@ +@@ -1297,7 +1297,7 @@ * mathics_scanner/data/named-characters.yml, mathics_scanner/tokeniser.py: Change the precedence of `|->` @@ -174,7 +183,7 @@ by `->` 2021-09-27 rocky -@@ -1268,7 +1268,7 @@ +@@ -1364,7 +1364,7 @@ 2021-07-31 R. Bernstein @@ -183,7 +192,7 @@ 2021-07-10 rocky -@@ -1300,18 +1300,18 @@ +@@ -1396,18 +1396,18 @@ 2021-06-29 rocky * mathics_scanner/data/named-characters.yml, @@ -194,7 +203,7 @@ 2021-06-29 rocky - * mathics_scanner/data/named-characters.yml: Noe one more source -+ * mathics_scanner/data/named-characters.yml: We now have one more source ++ * mathics_scanner/data/named-characters.yml: We now have one more source. 2021-06-29 rocky @@ -205,7 +214,7 @@ 2021-06-28 rocky -@@ -1332,7 +1332,7 @@ +@@ -1428,7 +1428,7 @@ 2021-06-27 rocky @@ -214,7 +223,7 @@ Get ready for release 1.2.3 2021-06-26 rocky -@@ -1422,7 +1422,7 @@ +@@ -1518,7 +1518,7 @@ mathics_scanner/tokeniser.py, mathics_scanner/version.py, setup.py, test/test_has_unicode_inverse_sanity.py, test/test_letterlikes_sanity.py, test/test_table_consistency.py, @@ -223,7 +232,7 @@ Blacken files 2021-06-21 rocky -@@ -1463,7 +1463,7 @@ +@@ -1559,7 +1559,7 @@ * .github/workflows/osx.yaml, .github/workflows/ubuntu.yaml, .github/workflows/windows.yml, mathics_scanner/characters.py, @@ -232,7 +241,7 @@ Administrivia & make ujson optional * It's not clear that pyston-2.2 support ujson so make it optional * In github workflows use requirements-dev.txt * revise requirement-dev to remove click (in setup.py) and add ujson * setup.py make ujson optional * version.py: bump to dev 2021-05-14 rocky -@@ -1589,7 +1589,7 @@ +@@ -1685,7 +1685,7 @@ 2021-04-03 rocky * mathics_scanner/data/named-characters.yml, @@ -241,7 +250,7 @@ 2021-04-03 rocky -@@ -1663,7 +1663,7 @@ +@@ -1759,7 +1759,7 @@ * .github/workflows/osx.yaml, Makefile, mathics_scanner/data/named-characters.yml, setup.py, @@ -250,7 +259,7 @@ Start tagging operators Note: use WL name when there is no conflict for name of symbol. A number of other small corrections: - Mark some symbols which are invertable when they are. However this should be redone and invertability should be detected automatically. - small correction in macos CI - AUTHORS.txt reduced to those who worked in this, rather than Mathics in general - in tests prefer top-level tests when that is possible (i.e. test is not parameterized) - use relative import as "test" can be ambiguous and lead to -@@ -1764,8 +1764,8 @@ +@@ -1860,8 +1860,8 @@ 2021-01-31 Pablo Emilio Escobar Gaviria @@ -261,7 +270,7 @@ 2021-01-31 Pablo Emilio Escobar Gaviria -@@ -1797,7 +1797,7 @@ +@@ -1893,7 +1893,7 @@ 2021-01-31 Pablo Emilio Escobar Gaviria @@ -270,7 +279,7 @@ Fixed another errors in the YAML table 2021-02-02 Pablo Emilio Escobar Gaviria -@@ -1820,7 +1820,7 @@ +@@ -1916,7 +1916,7 @@ 2021-02-01 Pablo Emilio Escobar Gaviria @@ -279,7 +288,7 @@ Documented stuff used by Mathics 2021-01-31 Pablo Emilio Escobar Gaviria -@@ -1840,7 +1840,7 @@ +@@ -1936,7 +1936,7 @@ 2021-01-31 Pablo Emilio Escobar Gaviria @@ -288,7 +297,7 @@ Improved the general tests for YAML 2021-01-31 Pablo Emilio Escobar Gaviria -@@ -1869,22 +1869,22 @@ +@@ -1965,22 +1965,22 @@ 2021-01-31 Pablo Emilio Escobar Gaviria @@ -315,7 +324,7 @@ Added sanity check for has-unicode-inverse and fixed the reound trip tests -@@ -1936,7 +1936,7 @@ +@@ -2032,7 +2032,7 @@ 2021-01-27 Pablo Emilio Escobar Gaviria @@ -324,7 +333,7 @@ resolves the appropriate plain-text representation of a given character -@@ -2092,15 +2092,15 @@ +@@ -2188,15 +2188,15 @@ 2021-01-19 Pablo Emilio Escobar Gaviria @@ -343,7 +352,7 @@ 2021-01-19 Pablo Emilio Escobar Gaviria -@@ -2278,5 +2278,4 @@ +@@ -2374,5 +2374,4 @@ 2021-01-18 Pablo Emilio Escobar Gaviria diff --git a/MANIFEST.in b/MANIFEST.in index 793d322..fe6ab05 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,7 +4,7 @@ include ChangeLog include COPYING.txt include FUTURE.rst include Makefile -include mathics_scanner/data/named-characters.yml +include mathics_scanner/data/*.yml include requirements-full.txt include requirements-dev.txt recursive-include mathics_scanner *.py diff --git a/Makefile b/Makefile index 13f449d..c441642 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,7 @@ RM ?= rm PIP_INSTALL_OPTS ?= .PHONY: all build \ + ChangeLog-without-corrections \ check check-full check-mathics3 clean \ develop dist doc \ inputrc-no-unicode \ diff --git a/README.rst b/README.rst index 1eedaac..343138d 100644 --- a/README.rst +++ b/README.rst @@ -28,7 +28,7 @@ Uses The scanner and character tables are used inside `Mathics3 `_. However information can also be used by other programs for tokenizing and formatting Wolfram Language code. -For example, tables are used in `mathics-pygments `_, a Pygments-based +For example, tables are used in `mathics-pygments `_, a Pygments-based lexer and highlighter for Mathematica/Wolfram Language source code. This library may be useful if you need to work with the Wolfram Language @@ -64,7 +64,8 @@ Implementation -------------- For notes on the implementation of the packages or details on the conversion -scheme, please read `Scanning `_. +scheme, please read `Mathics3 scanner's documentation `_ or +`Scanning section `_ of the Mathics3 User and Developers Guide. Contributing ------------ @@ -77,7 +78,7 @@ License Mathics3 is released under the GNU General Public License Version 3 (GPL3). -.. |Workflows| image:: https://github.com/Mathics3/mathics-scanner/actions/workflows/ubuntu.yml/badge.svg +.. |Workflows| image:: https://github.com/Mathics3/Mathics3-scanner/actions/workflows/ubuntu.yml/badge.svg .. |Packaging status| image:: https://repology.org/badge/vertical-allrepos/mathics-scanner.svg :target: https://repology.org/project/mathics-scanner/versions .. |Latest Version| image:: https://badge.fury.io/py/Mathics-Scanner.svg diff --git a/admin-tools/make-dist.sh b/admin-tools/make-dist.sh index dae2b54..2af9a9e 100755 --- a/admin-tools/make-dist.sh +++ b/admin-tools/make-dist.sh @@ -18,6 +18,6 @@ cd .. source mathics_scanner/version.py echo $__version__ -python -m build --wheel -python ./setup.py sdist +python -m build --wheel --no-isolation +python -m build --sdist --no-isolation finish diff --git a/codespell-ignore.txt b/codespell-ignore.txt new file mode 100644 index 0000000..bcd2c6b --- /dev/null +++ b/codespell-ignore.txt @@ -0,0 +1 @@ +inout diff --git a/mathics_scanner/__init__.py b/mathics_scanner/__init__.py index a82aba7..f803370 100644 --- a/mathics_scanner/__init__.py +++ b/mathics_scanner/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -This is the tokeniser or scanner portion for the Wolfram Language. +This is the tokenizer or scanner portion for the Wolfram Language. As such, it also contains a full set of translation between Wolfram Language named characters, their Unicode/ASCII equivalents and code-points. @@ -9,6 +9,7 @@ from mathics_scanner.characters import ( ALIASED_CHARACTERS, NAMED_CHARACTERS, + replace_box_unicode_with_ascii, replace_unicode_with_wl, replace_wl_with_plain_text, ) @@ -42,6 +43,7 @@ # "Tokeniser", "__version__", # "is_symbol_name", + "replace_box_unicode_with_ascii", "replace_unicode_with_wl", "replace_wl_with_plain_text", ] diff --git a/mathics_scanner/generate/boxing_characters.py b/mathics_scanner/generate/boxing_characters.py index 6083a33..7cf2c17 100644 --- a/mathics_scanner/generate/boxing_characters.py +++ b/mathics_scanner/generate/boxing_characters.py @@ -13,6 +13,18 @@ # Silence warnings about JSON tables not existing os.environ["MATHICS3_TABLE_GENERATION"] = "true" +try: + from mathics_scanner.version import __version__ # noqa +except ImportError: + import ast + from pathlib import Path + + version_file = Path(__file__).parent.parent / "version.py" + version_content = version_file.read_text() + version_line = [ + line for line in version_content.split("\n") if line.startswith("__version__") + ][0] + __version__ = ast.literal_eval(version_line.split("=")[1].strip().split("#")[0]) from mathics_scanner.version import __version__ # noqa diff --git a/mathics_scanner/generate/named_characters.py b/mathics_scanner/generate/named_characters.py index 5d257a4..913dde6 100755 --- a/mathics_scanner/generate/named_characters.py +++ b/mathics_scanner/generate/named_characters.py @@ -1,7 +1,8 @@ #!/usr/bin/env python -# This scripts reads the data from named-characters and converts it to the -# format used by the library internally +# This script reads the data from named-characters and converts it to the +# format used by the library internally. +import ast import json import os import re @@ -11,8 +12,20 @@ import click import yaml -# Silence warnings about JSON tables not existing +# Silence warnings about JSON tables not existing. os.environ["MATHICS3_TABLE_GENERATION"] = "true" + +try: + from mathics_scanner.version import __version__ # noqa +except ImportError: + import ast + + version_file = Path(__file__).parent.parent / "version.py" + version_content = version_file.read_text() + version_line = [ + line for line in version_content.split("\n") if line.startswith("__version__") + ][0] + __version__ = ast.literal_eval(version_line.split("=")[1].strip().split("#")[0]) from mathics_scanner.version import __version__ # noqa @@ -23,10 +36,10 @@ def build_unicode_to_ascii_table(data): """ result = {} for key, entry in data.items(): - # First, look at the ascii entry + # First, look at the "ascii" entry. ascii_equiv = entry.get("ascii", None) - # If there is no ascii entry, try with - # esc-alias + # If there is no "ascii" entry, try with + # "esc-alias" if ascii_equiv is None: ascii_equiv = entry.get("esc-alias", None) # otherwise, use the NameCharacter form: @@ -35,14 +48,14 @@ def build_unicode_to_ascii_table(data): unicode_equivalent = entry.get("unicode-equivalent", None) if unicode_equivalent is not None: - # not already an ascii character + # Not already an ASCII character. if unicode_equivalent != ascii_equiv and ( len(unicode_equivalent) != 1 or ord(unicode_equivalent) > 127 ): result[unicode_equivalent] = ascii_equiv wl_unicode = entry.get("wl-unicode", None) if wl_unicode is not None and wl_unicode not in result: - # not ascii + # not ASCII. if wl_unicode != ascii_equiv and ( len(wl_unicode) != 1 or ord(wl_unicode) > 127 ): diff --git a/mathics_scanner/generate/operators.py b/mathics_scanner/generate/operators.py index 549247b..d62f9c5 100755 --- a/mathics_scanner/generate/operators.py +++ b/mathics_scanner/generate/operators.py @@ -15,6 +15,19 @@ # Silence warnings about JSON tables not existing os.environ["MATHICS3_TABLE_GENERATION"] = "true" + +try: + from mathics_scanner.version import __version__ # noqa +except ImportError: + import ast + + version_file = Path(__file__).parent.parent / "version.py" + version_content = version_file.read_text() + version_line = [ + line for line in version_content.split("\n") if line.startswith("__version__") + ][0] + __version__ = ast.literal_eval(version_line.split("=")[1].strip().split("#")[0]) + from mathics_scanner.version import __version__ # noqa OPERATOR_FIELDS = [ diff --git a/mathics_scanner/generate/rl_inputrc.py b/mathics_scanner/generate/rl_inputrc.py index 6f8edab..dad47d4 100755 --- a/mathics_scanner/generate/rl_inputrc.py +++ b/mathics_scanner/generate/rl_inputrc.py @@ -8,8 +8,8 @@ """ import sys +from mathics_scanner.characters import ALIASED_CHARACTERS from mathics_scanner.characters import replace_wl_with_plain_text as r -from mathics_scanner.characters import aliased_characters def _escape(s: str) -> str: @@ -23,7 +23,7 @@ def _format(c: str, use_unicode: bool) -> str: if key == "nl": val = "\\n" else: - val = _escape(r(aliased_characters[c], use_unicode=use_unicode)) + val = _escape(r(ALIASED_CHARACTERS[c], use_unicode=use_unicode)) return f'"\\e{key}\\e": "{val}"\n' @@ -34,7 +34,7 @@ def generate_inputrc(fd=sys.stdout, use_unicode=True) -> None: their corresponding plain-text representation (full Unicode or strick ASCII) """ - for alias in aliased_characters: + for alias in ALIASED_CHARACTERS: try: fd.write(_format(alias, use_unicode)) except UnicodeEncodeError: diff --git a/mathics_scanner/location.py b/mathics_scanner/location.py index 1f309bb..1153692 100644 --- a/mathics_scanner/location.py +++ b/mathics_scanner/location.py @@ -75,6 +75,9 @@ def get_location_file_line( filename = code.co_filename line_number = code.co_firstlineno else: - filename = MATHICS3_PATHS[loc.container] + try: + filename = MATHICS3_PATHS[loc.container] + except IndexError: + filename = "???" line_number = loc.start_line return filename, line_number diff --git a/mathics_scanner/version.py b/mathics_scanner/version.py index 34a121a..6085df9 100644 --- a/mathics_scanner/version.py +++ b/mathics_scanner/version.py @@ -4,4 +4,4 @@ # well as importing into Python. That's why there is no # space around "=" below. # fmt: off -__version__="10.0.0" # noqa +__version__="10.0.1" # noqa diff --git a/pyproject.toml b/pyproject.toml index 89e8da8..b2f1114 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,6 @@ classifiers = [ "Intended Audience :: Developers", "Intended Audience :: Science/Research", "Programming Language :: Python", - "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", @@ -45,6 +44,8 @@ Homepage = "https://mathics.org/" [project.optional-dependencies] dev = [ + "check-jsonschema", + "pre-commit", "pytest", ] full = [ @@ -55,7 +56,14 @@ full = [ mathics3-make-boxing-character-json = "mathics_scanner.generate.boxing_characters:main" mathics3-make-named-character-json = "mathics_scanner.generate.named_characters:main" mathics3-make-operator-json = "mathics_scanner.generate.operators:main" -mathics3-code-tokenize = "mathics_scanner.mathics3_code_tokenize:main" +mathics3-codeparser-tokenize = "mathics_scanner.mathics3_code_tokenize:main" + +[tool.codespell] +# Point to your custom ignore file +ignore-words = "codespell-ignore.txt" + +# Exclude directories or files that often trigger false positives +skip = "*.pyd,*.pyc,./.git,./.venv,./build" [tool.setuptools] packages = [ @@ -68,6 +76,7 @@ packages = [ "mathics_scanner" = [ "data/boxing-characters.json", "data/boxing-characters.yml", + "data/grouping-characters.yml", "data/named-characters.json", "data/named-characters.yml", "data/operators.yml", diff --git a/setup.py b/setup.py index 966bbea..fc22270 100644 --- a/setup.py +++ b/setup.py @@ -1,28 +1,12 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -"""Setuptools based setup script for Mathics. +"""Setuptools based setup script for Mathics3 Scanner. For the easiest installation just type the following command (you'll probably need root privileges): - python setup.py install - -This will install the library in the default location. For instructions on -how to customize the install procedure read the output of: - - python setup.py --help install - -In addition, there are some other commands: - - python setup.py clean -> will clean all trash (*.pyc and stuff) - -To get a full list of available commands, read the output of: - - python setup.py --help-commands - -Or, if all else fails, feel free to write to the mathics users list at -mathics-users@googlegroups.com and ask for help. + pip install -e . """ import os