mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-06-24 01:17:28 +08:00
custom data with lora
This commit is contained in:
parent
a3ecda22fe
commit
985f413f99
1
lora/.gitignore
vendored
Normal file
1
lora/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
adapters.npz
|
@ -6,7 +6,19 @@ task.
|
||||
|
||||
In this example we'll use the WikiSQL[^wikisql] dataset to train the LLM to
|
||||
generate SQL queries from natural language. However, the example is intended to
|
||||
be general should you wish to modify the task.
|
||||
be general should you wish to use a custom dataset.
|
||||
|
||||
## Contents
|
||||
|
||||
* [Setup](Setup)
|
||||
* [Run](Run)
|
||||
* [Fine-tune](Fine-tune)
|
||||
* [Evaluate](Evaluate)
|
||||
* [Generate](Generate)
|
||||
* [Results](Results)
|
||||
* [Custom-Data](Custom Data)
|
||||
* [Memory-Issues](Memory Issues)
|
||||
|
||||
|
||||
## Setup
|
||||
|
||||
@ -57,6 +69,9 @@ Note, the model path should have the MLX weights, the tokenizer, and the
|
||||
By default, the adapter weights are saved in `adapters.npz`. You can specify
|
||||
the output location with `--adapter_file`.
|
||||
|
||||
You can resume fine-tuning with an existing adapter with
|
||||
`--resume_adapter_file` to specify the location of the adapter weights.
|
||||
|
||||
#### Evaluate
|
||||
|
||||
To compute test set perplexity use
|
||||
@ -98,6 +113,41 @@ training and validation loss at a few points over the course of training.
|
||||
|
||||
The model trains at around 475 tokens per second on an M2 Ultra.
|
||||
|
||||
## Custom Data
|
||||
|
||||
You can make your own dataset for fine-tuning with LoRA. You can specify the
|
||||
dataset with `--data=<my_data_directory>`. Check the subdirectory `data/` to
|
||||
see the expected format.
|
||||
|
||||
For fine-tuning, the data loader expects a `train.jsonl` and a `valid.jsonl` to
|
||||
be in the data directory. For evaluation (`--test`), the data loader expects a
|
||||
`test.jsonl` in the directory. Each line in the `*.jsonl` file should look
|
||||
like: are:
|
||||
|
||||
```
|
||||
{"text": "This is an example for the model."}
|
||||
```
|
||||
|
||||
Note other keys will be ignored by the loader.
|
||||
|
||||
## Memory Issues
|
||||
|
||||
Fine-tuning a large model with LoRA requires a machine with a deccent amount
|
||||
of memory. Here are some tips to reduce memory use should you need to do so:
|
||||
|
||||
1. Try using a smaller batch size with `--batch-size`. The default is `4` so
|
||||
setting this to `2` or `1` will reduce memory consumption. This may slow
|
||||
things down a little, but will also reduce the memory use.
|
||||
|
||||
2. Reduce the number of layers to fine-tune with `--lora-layers`. The default
|
||||
is `16`, so you can try `8` or `4`. This reduces the amount of memory
|
||||
needed for back propagation. It may also reduce the quality of the
|
||||
fine-tuned model if you are fine-tuning with a lot of data.
|
||||
|
||||
3. Longer examples require more memory. If it makes sense for your data, one thing
|
||||
you can do is break your examples into smaller
|
||||
sequences when making the `{train, valid, test}.jsonl` files.
|
||||
|
||||
[^lora]: Refer to the [arXiv paper](https://arxiv.org/abs/2106.09685) for more details on LoRA.
|
||||
[^llama]: Refer to the [arXiv paper](https://arxiv.org/abs/2302.13971) and [blog post](https://ai.meta.com/blog/large-language-model-llama-meta-ai/) for more details.
|
||||
[^mistral]: Refer to the [blog post](https://mistral.ai/news/announcing-mistral-7b/) and [github repository](https://github.com/mistralai/mistral-src) for more details.
|
||||
|
100
lora/data/test.jsonl
Normal file
100
lora/data/test.jsonl
Normal file
@ -0,0 +1,100 @@
|
||||
{"text": "table: 1-10015132-16\ncolumns: Player, No., Nationality, Position, Years in Toronto, School/Club Team\nQ: What is terrence ross' nationality\nA: SELECT Nationality FROM 1-10015132-16 WHERE Player = 'Terrence Ross'"}
|
||||
{"text": "table: 1-10015132-16\ncolumns: Player, No., Nationality, Position, Years in Toronto, School/Club Team\nQ: What clu was in toronto 1995-96\nA: SELECT School/Club Team FROM 1-10015132-16 WHERE Years in Toronto = '1995-96'"}
|
||||
{"text": "table: 1-10015132-16\ncolumns: Player, No., Nationality, Position, Years in Toronto, School/Club Team\nQ: which club was in toronto 2003-06\nA: SELECT School/Club Team FROM 1-10015132-16 WHERE Years in Toronto = '2003-06'"}
|
||||
{"text": "table: 1-10015132-16\ncolumns: Player, No., Nationality, Position, Years in Toronto, School/Club Team\nQ: how many schools or teams had jalen rose\nA: SELECT COUNT School/Club Team FROM 1-10015132-16 WHERE Player = 'Jalen Rose'"}
|
||||
{"text": "table: 1-10083598-1\ncolumns: No, Date, Round, Circuit, Pole Position, Fastest Lap, Race winner, Report\nQ: Where was Assen held?\nA: SELECT Round FROM 1-10083598-1 WHERE Circuit = 'Assen'"}
|
||||
{"text": "table: 1-10083598-1\ncolumns: No, Date, Round, Circuit, Pole Position, Fastest Lap, Race winner, Report\nQ: What was the number of race that Kevin Curtain won?\nA: SELECT COUNT No FROM 1-10083598-1 WHERE Pole Position = 'Kevin Curtain'"}
|
||||
{"text": "table: 1-10083598-1\ncolumns: No, Date, Round, Circuit, Pole Position, Fastest Lap, Race winner, Report\nQ: What was the date of the race in Misano?\nA: SELECT Date FROM 1-10083598-1 WHERE Circuit = 'Misano'"}
|
||||
{"text": "table: 1-1013129-2\ncolumns: Pick, Player, Position, Nationality, NHL team, College/junior/club team\nQ: How many different positions did Sherbrooke Faucons (qmjhl) provide in the draft?\nA: SELECT COUNT Position FROM 1-1013129-2 WHERE College/junior/club team = 'Sherbrooke Faucons (QMJHL)'"}
|
||||
{"text": "table: 1-1013129-2\ncolumns: Pick, Player, Position, Nationality, NHL team, College/junior/club team\nQ: What are the nationalities of the player picked from Thunder Bay Flyers (ushl)\nA: SELECT Nationality FROM 1-1013129-2 WHERE College/junior/club team = 'Thunder Bay Flyers (USHL)'"}
|
||||
{"text": "table: 1-1013129-2\ncolumns: Pick, Player, Position, Nationality, NHL team, College/junior/club team\nQ: How many different college/junior/club teams provided a player to the Washington Capitals NHL Team?\nA: SELECT COUNT College/junior/club team FROM 1-1013129-2 WHERE NHL team = 'Washington Capitals'"}
|
||||
{"text": "table: 1-1013129-3\ncolumns: Pick, Player, Position, Nationality, NHL team, College/junior/club team\nQ: How many different nationalities do the players of New Jersey Devils come from?\nA: SELECT COUNT Nationality FROM 1-1013129-3 WHERE NHL team = 'New Jersey Devils'"}
|
||||
{"text": "table: 1-1013129-3\ncolumns: Pick, Player, Position, Nationality, NHL team, College/junior/club team\nQ: What's Dorain Anneck's pick number?\nA: SELECT Pick FROM 1-1013129-3 WHERE Player = 'Dorain Anneck'"}
|
||||
{"text": "table: 1-1013129-3\ncolumns: Pick, Player, Position, Nationality, NHL team, College/junior/club team\nQ: What is the nationality of the player from Vancouver Canucks?\nA: SELECT Nationality FROM 1-1013129-3 WHERE NHL team = 'Vancouver Canucks'"}
|
||||
{"text": "table: 1-1013129-3\ncolumns: Pick, Player, Position, Nationality, NHL team, College/junior/club team\nQ: What's the pick number of the player from Springfield Olympics (Nejhl)?\nA: SELECT Pick FROM 1-1013129-3 WHERE College/junior/club team = 'Springfield Olympics (NEJHL)'"}
|
||||
{"text": "table: 1-1014206-2\ncolumns: #, Shipyard, Laid down, Launched, Commissioned, Fleet, Status\nQ: When were the ships launched that were laid down on september 1, 1964?\nA: SELECT Launched FROM 1-1014206-2 WHERE Laid down = 'September 1, 1964'"}
|
||||
{"text": "table: 1-1014206-2\ncolumns: #, Shipyard, Laid down, Launched, Commissioned, Fleet, Status\nQ: List the # for ships commissioned on december 18, 1965.\nA: SELECT # FROM 1-1014206-2 WHERE Commissioned = 'December 18, 1965'"}
|
||||
{"text": "table: 1-1014206-2\ncolumns: #, Shipyard, Laid down, Launched, Commissioned, Fleet, Status\nQ: List the # for ships commissioned on september 30, 1967.\nA: SELECT # FROM 1-1014206-2 WHERE Commissioned = 'September 30, 1967'"}
|
||||
{"text": "table: 1-1014206-2\ncolumns: #, Shipyard, Laid down, Launched, Commissioned, Fleet, Status\nQ: When were ships laid down that were commissioned on october 29, 1965?\nA: SELECT Laid down FROM 1-1014206-2 WHERE Commissioned = 'October 29, 1965'"}
|
||||
{"text": "table: 1-1015521-2\ncolumns: Equivalent NATO Rank Code, Rank in Spanish, Rank in English, Commonwealth equivalent, US Air Force equivalent\nQ: What could a spanish coronel be addressed as in the commonwealth military?\nA: SELECT Commonwealth equivalent FROM 1-1015521-2 WHERE Rank in Spanish = 'Coronel'"}
|
||||
{"text": "table: 1-1015521-2\ncolumns: Equivalent NATO Rank Code, Rank in Spanish, Rank in English, Commonwealth equivalent, US Air Force equivalent\nQ: Give me a list of all spanish officer titles that could receive recognition as group captain in english\nA: SELECT Rank in English FROM 1-1015521-2 WHERE Commonwealth equivalent = 'Group Captain'"}
|
||||
{"text": "table: 1-1015521-2\ncolumns: Equivalent NATO Rank Code, Rank in Spanish, Rank in English, Commonwealth equivalent, US Air Force equivalent\nQ: If you are a pilot officer in the commonwealth then what will you called as in the US air force?\nA: SELECT US Air Force equivalent FROM 1-1015521-2 WHERE Commonwealth equivalent = 'Pilot Officer'"}
|
||||
{"text": "table: 1-1015521-2\ncolumns: Equivalent NATO Rank Code, Rank in Spanish, Rank in English, Commonwealth equivalent, US Air Force equivalent\nQ: If you're a major general in the US air force then what ranking will you receive in the commonwealth's air force?\nA: SELECT Commonwealth equivalent FROM 1-1015521-2 WHERE US Air Force equivalent = 'Major General'"}
|
||||
{"text": "table: 1-1015521-2\ncolumns: Equivalent NATO Rank Code, Rank in Spanish, Rank in English, Commonwealth equivalent, US Air Force equivalent\nQ: If you get a ranking as major in the english military then what would the spanish military address you as? \nA: SELECT Rank in Spanish FROM 1-1015521-2 WHERE Rank in English = 'Major'"}
|
||||
{"text": "table: 1-10182508-5\ncolumns: Rank Each wrestlers total number of days as champion are ranked highest to lowest; wrestlers with the same number mean that they are tied for that certain rank., Wrestler, # of reigns, Combined defenses, Combined days\nQ: Which wrestlers have had 2 reigns?\nA: SELECT Wrestler FROM 1-10182508-5 WHERE # of reigns = 2"}
|
||||
{"text": "table: 1-10182508-5\ncolumns: Rank Each wrestlers total number of days as champion are ranked highest to lowest; wrestlers with the same number mean that they are tied for that certain rank., Wrestler, # of reigns, Combined defenses, Combined days\nQ: In terms of reigns, what is the lowest number listed?\nA: SELECT MIN # of reigns FROM 1-10182508-5"}
|
||||
{"text": "table: 1-10182508-5\ncolumns: Rank Each wrestlers total number of days as champion are ranked highest to lowest; wrestlers with the same number mean that they are tied for that certain rank., Wrestler, # of reigns, Combined defenses, Combined days\nQ: What rank was Bryan Danielson in this chart?\nA: SELECT Rank Each wrestlers total number of days as champion are ranked highest to lowest; wrestlers with the same number mean that they are tied for that certain rank. FROM 1-10182508-5 WHERE Wrestler = 'Bryan Danielson'"}
|
||||
{"text": "table: 1-10182508-5\ncolumns: Rank Each wrestlers total number of days as champion are ranked highest to lowest; wrestlers with the same number mean that they are tied for that certain rank., Wrestler, # of reigns, Combined defenses, Combined days\nQ: How many combined days did Go Shiozaki have?\nA: SELECT Combined days FROM 1-10182508-5 WHERE Wrestler = 'Go Shiozaki'"}
|
||||
{"text": "table: 1-10182508-5\ncolumns: Rank Each wrestlers total number of days as champion are ranked highest to lowest; wrestlers with the same number mean that they are tied for that certain rank., Wrestler, # of reigns, Combined defenses, Combined days\nQ: What was Go Shiozaki's rank?\nA: SELECT MIN Rank Each wrestlers total number of days as champion are ranked highest to lowest; wrestlers with the same number mean that they are tied for that certain rank. FROM 1-10182508-5 WHERE Wrestler = 'Go Shiozaki'"}
|
||||
{"text": "table: 1-1024710-2\ncolumns: Member, Electorate, Province, MPs term, Election date\nQ: Which province is grey and bell electorate in\nA: SELECT Province FROM 1-1024710-2 WHERE Electorate = 'Grey and Bell'"}
|
||||
{"text": "table: 1-1024710-2\ncolumns: Member, Electorate, Province, MPs term, Election date\nQ: Which province is bay of islands in\nA: SELECT Province FROM 1-1024710-2 WHERE Electorate = 'Bay of Islands'"}
|
||||
{"text": "table: 1-10294071-1\ncolumns: Player, Total W\u2013L, Singles W\u2013L, Doubles W\u2013L, Ties played, Debut, Years played\nQ: what is the total number of\u00a0total w\u2013l\u00a0where\u00a0doubles w\u2013l\u00a0is 11\u201311\nA: SELECT COUNT Total W\u2013L FROM 1-10294071-1 WHERE Doubles W\u2013L = '11\u201311'"}
|
||||
{"text": "table: 1-10294071-1\ncolumns: Player, Total W\u2013L, Singles W\u2013L, Doubles W\u2013L, Ties played, Debut, Years played\nQ: what is the total number of\u00a0singles w\u2013l\u00a0where\u00a0doubles w\u2013l\u00a0is 11\u201314\nA: SELECT COUNT Singles W\u2013L FROM 1-10294071-1 WHERE Doubles W\u2013L = '11\u201314'"}
|
||||
{"text": "table: 1-10294071-1\ncolumns: Player, Total W\u2013L, Singles W\u2013L, Doubles W\u2013L, Ties played, Debut, Years played\nQ: what's the\u00a0total w\u2013l\u00a0where\u00a0player\u00a0is boro jovanovi\u0107 category:articles with hcards\nA: SELECT Total W\u2013L FROM 1-10294071-1 WHERE Player = 'Boro Jovanovi\u0107 Category:Articles with hCards'"}
|
||||
{"text": "table: 1-10294071-1\ncolumns: Player, Total W\u2013L, Singles W\u2013L, Doubles W\u2013L, Ties played, Debut, Years played\nQ: what is the maximum\u00a0ties played\u00a0where\u00a0player\u00a0is josip palada category:articles with hcards\nA: SELECT MAX Ties played FROM 1-10294071-1 WHERE Player = 'Josip Palada Category:Articles with hCards'"}
|
||||
{"text": "table: 1-10294071-1\ncolumns: Player, Total W\u2013L, Singles W\u2013L, Doubles W\u2013L, Ties played, Debut, Years played\nQ: what is the total number of\u00a0ties played\u00a0where\u00a0total w\u2013l\u00a0is 38\u201324\nA: SELECT COUNT Ties played FROM 1-10294071-1 WHERE Total W\u2013L = '38\u201324'"}
|
||||
{"text": "table: 1-10333757-1\ncolumns: Calls, Frequency, Branding, Format, Market/Rank, Timeslot, Group owner\nQ: What is the Frequency at the Market/Rank of Burlington - Plattsburgh , Vermont - New York /143?\nA: SELECT COUNT Frequency FROM 1-10333757-1 WHERE Market/Rank = 'Burlington - Plattsburgh , Vermont - New York /143'"}
|
||||
{"text": "table: 1-10333757-1\ncolumns: Calls, Frequency, Branding, Format, Market/Rank, Timeslot, Group owner\nQ: What is the Branding for Group Owner Qantam of Cape Cod, LLC?\nA: SELECT Branding FROM 1-10333757-1 WHERE Group owner = 'Qantam of Cape Cod, LLC'"}
|
||||
{"text": "table: 1-10333757-1\ncolumns: Calls, Frequency, Branding, Format, Market/Rank, Timeslot, Group owner\nQ: What Branding does WRKO calls use?\nA: SELECT Branding FROM 1-10333757-1 WHERE Calls = 'WRKO'"}
|
||||
{"text": "table: 1-10333757-1\ncolumns: Calls, Frequency, Branding, Format, Market/Rank, Timeslot, Group owner\nQ: What is the Format for Branding of 1290 wkbk w281au 104.1?\nA: SELECT Format FROM 1-10333757-1 WHERE Branding = '1290 WKBK W281AU 104.1'"}
|
||||
{"text": "table: 1-10333757-1\ncolumns: Calls, Frequency, Branding, Format, Market/Rank, Timeslot, Group owner\nQ: Which Market/Rank is associated with WCRN calls?\nA: SELECT Market/Rank FROM 1-10333757-1 WHERE Calls = 'WCRN'"}
|
||||
{"text": "table: 1-10333757-1\ncolumns: Calls, Frequency, Branding, Format, Market/Rank, Timeslot, Group owner\nQ: Which Frequency is used for WEGP calls?\nA: SELECT Frequency FROM 1-10333757-1 WHERE Calls = 'WEGP'"}
|
||||
{"text": "table: 1-10408617-5\ncolumns: Scheme, Tariff code, BTs retail price (regulated), Approx premium, Prefixes\nQ: What is the regulated retail price for the tariff code ff0 prs?\nA: SELECT BTs retail price (regulated) FROM 1-10408617-5 WHERE Tariff code = 'ff0 PRS'"}
|
||||
{"text": "table: 1-10408617-5\ncolumns: Scheme, Tariff code, BTs retail price (regulated), Approx premium, Prefixes\nQ: What is the premium associated with tariff code g9?\nA: SELECT Approx premium FROM 1-10408617-5 WHERE Tariff code = 'g9'"}
|
||||
{"text": "table: 1-10408617-5\ncolumns: Scheme, Tariff code, BTs retail price (regulated), Approx premium, Prefixes\nQ: How many tariff codes have a bts retail price of 2p/min or inclusive?\nA: SELECT COUNT Tariff code FROM 1-10408617-5 WHERE BTs retail price (regulated) = '2p/min or inclusive'"}
|
||||
{"text": "table: 1-10408617-5\ncolumns: Scheme, Tariff code, BTs retail price (regulated), Approx premium, Prefixes\nQ: How many tariff codes have a bts retail price of 2.553p/min?\nA: SELECT COUNT Tariff code FROM 1-10408617-5 WHERE BTs retail price (regulated) = '2.553p/min'"}
|
||||
{"text": "table: 1-10408617-5\ncolumns: Scheme, Tariff code, BTs retail price (regulated), Approx premium, Prefixes\nQ: What prefixes are priced at pence per minute, fixed at all times with a premium of 3p/min?\nA: SELECT Prefixes FROM 1-10408617-5 WHERE Scheme = 'Pence per minute, fixed at all times' AND Approx premium = '3p/min'"}
|
||||
{"text": "table: 1-10408617-5\ncolumns: Scheme, Tariff code, BTs retail price (regulated), Approx premium, Prefixes\nQ: What is the bts retail price (regulated) for tariff code g10?\nA: SELECT BTs retail price (regulated) FROM 1-10408617-5 WHERE Tariff code = 'g10'"}
|
||||
{"text": "table: 1-10409754-5\ncolumns: Nominative, Old orthography, New orthography, /e/ or /\u00e6/ (IPA), Tone (Latvian notation: /~/ - level, /^/ - broken), Translation\nQ: What is the tone for gen.sing. plague?\nA: SELECT Tone (Latvian notation: /~/ - level, /^/ - broken) FROM 1-10409754-5 WHERE Translation = 'Gen.Sing. plague'"}
|
||||
{"text": "table: 1-10432351-1\ncolumns: Star (Pismis24-#), Spectral type, Magnitude (M bol ), Temperature (K), Radius (R \u2609 ), Mass (M \u2609 )\nQ: What is the smallest possible radius?\nA: SELECT MIN Radius (R \u2609 ) FROM 1-10432351-1"}
|
||||
{"text": "table: 1-10432351-1\ncolumns: Star (Pismis24-#), Spectral type, Magnitude (M bol ), Temperature (K), Radius (R \u2609 ), Mass (M \u2609 )\nQ: What are all the spectral types for star mismis24-# is 1sw?\nA: SELECT Spectral type FROM 1-10432351-1 WHERE Star (Pismis24-#) = '1SW'"}
|
||||
{"text": "table: 1-10432351-1\ncolumns: Star (Pismis24-#), Spectral type, Magnitude (M bol ), Temperature (K), Radius (R \u2609 ), Mass (M \u2609 )\nQ: If a radius is 10, what is the lowest possible mass?\nA: SELECT MIN Mass (M \u2609 ) FROM 1-10432351-1 WHERE Radius (R \u2609 ) = 10"}
|
||||
{"text": "table: 1-105344-2\ncolumns: Year, Aircraft kilometers, Departures, Flying hours, Passengers, Seat factor, Employees, Profit/loss\nQ: What percentage of seats were filled in 2006?\nA: SELECT Seat factor FROM 1-105344-2 WHERE Year = 2006"}
|
||||
{"text": "table: 1-105344-2\ncolumns: Year, Aircraft kilometers, Departures, Flying hours, Passengers, Seat factor, Employees, Profit/loss\nQ: How many hours were flown in each of the years where more than 64379058.0 kilometers were flown?\nA: SELECT Flying hours FROM 1-105344-2 WHERE Aircraft kilometers > 64379058.0"}
|
||||
{"text": "table: 1-105344-2\ncolumns: Year, Aircraft kilometers, Departures, Flying hours, Passengers, Seat factor, Employees, Profit/loss\nQ: Of the years that had exactly 17096 departures, what is the greatest number of aircraft kilometers flown?\nA: SELECT MAX Aircraft kilometers FROM 1-105344-2 WHERE Departures = 17096"}
|
||||
{"text": "table: 1-10548224-1\ncolumns: Year, Game or event, Date contested, League or governing body, Sport, Winning team, Losing team, Final score\nQ: Which winning team beat the New York Yankees?\nA: SELECT Winning team FROM 1-10548224-1 WHERE Losing team = 'New York Yankees'"}
|
||||
{"text": "table: 1-10548224-1\ncolumns: Year, Game or event, Date contested, League or governing body, Sport, Winning team, Losing team, Final score\nQ: What was the final score for the game that was contested on February 1, 2009?\nA: SELECT Final score FROM 1-10548224-1 WHERE Date contested = 'February 1, 2009'"}
|
||||
{"text": "table: 1-10548224-1\ncolumns: Year, Game or event, Date contested, League or governing body, Sport, Winning team, Losing team, Final score\nQ: What sport had a final score of 3-2?\nA: SELECT Sport FROM 1-10548224-1 WHERE Final score = '3-2'"}
|
||||
{"text": "table: 1-10548224-1\ncolumns: Year, Game or event, Date contested, League or governing body, Sport, Winning team, Losing team, Final score\nQ: Who was the winning team of the game that was contested on February 1, 2009?\nA: SELECT Winning team FROM 1-10548224-1 WHERE Date contested = 'February 1, 2009'"}
|
||||
{"text": "table: 1-10548224-1\ncolumns: Year, Game or event, Date contested, League or governing body, Sport, Winning team, Losing team, Final score\nQ: Who was the losing team of the game that was contested on February 1, 2004?\nA: SELECT Losing team FROM 1-10548224-1 WHERE Date contested = 'February 1, 2004'"}
|
||||
{"text": "table: 1-1057262-2\ncolumns: Crop (kilotonnes), New South Wales, Victoria, Queensland, Western Australia, South Australia, Tasmania, Total\nQ: what's the minimum\u00a0total\u00a0with\u00a0crop (kilotonnes)\u00a0being s lupin\nA: SELECT MIN Total FROM 1-1057262-2 WHERE Crop (kilotonnes) = 's Lupin'"}
|
||||
{"text": "table: 1-1057262-2\ncolumns: Crop (kilotonnes), New South Wales, Victoria, Queensland, Western Australia, South Australia, Tasmania, Total\nQ: what's the\u00a0new south wales\u00a0with\u00a0crop (kilotonnes)\u00a0being canola\nA: SELECT New South Wales FROM 1-1057262-2 WHERE Crop (kilotonnes) = 'Canola'"}
|
||||
{"text": "table: 1-1057262-2\ncolumns: Crop (kilotonnes), New South Wales, Victoria, Queensland, Western Australia, South Australia, Tasmania, Total\nQ: what's the total number of\u00a0south australia\u00a0with\u00a0victoria\u00a0value of 2173\nA: SELECT COUNT South Australia FROM 1-1057262-2 WHERE Victoria = 2173"}
|
||||
{"text": "table: 1-1057262-2\ncolumns: Crop (kilotonnes), New South Wales, Victoria, Queensland, Western Australia, South Australia, Tasmania, Total\nQ: what's the minimum\u00a0tasmania value\nA: SELECT MIN Tasmania FROM 1-1057262-2"}
|
||||
{"text": "table: 1-1057262-2\ncolumns: Crop (kilotonnes), New South Wales, Victoria, Queensland, Western Australia, South Australia, Tasmania, Total\nQ: what's the total number of\u00a0tasmania\u00a0with\u00a0new south wales\u00a0crop of 190 kilotonnes\nA: SELECT COUNT Tasmania FROM 1-1057262-2 WHERE New South Wales = 190"}
|
||||
{"text": "table: 1-1058787-1\ncolumns: Approximate Age, Virtues, Psycho Social Crisis, Significant Relationship, Existential Question [ not in citation given ], Examples\nQ: How many significant relationships list Will as a virtue?\nA: SELECT COUNT Significant Relationship FROM 1-1058787-1 WHERE Virtues = 'Will'"}
|
||||
{"text": "table: 1-1058787-1\ncolumns: Approximate Age, Virtues, Psycho Social Crisis, Significant Relationship, Existential Question [ not in citation given ], Examples\nQ: Which examples ask the existential question \"Can I Love?\"\nA: SELECT Examples FROM 1-1058787-1 WHERE Existential Question [ not in citation given ] = 'Can I Love?'"}
|
||||
{"text": "table: 1-1059743-2\ncolumns: Rank, Member Association, Points, Group stage, Play-off, AFC Cup\nQ: How many countries got 796.7 points?\nA: SELECT COUNT Rank FROM 1-1059743-2 WHERE Points = '796.7'"}
|
||||
{"text": "table: 1-1059743-2\ncolumns: Rank, Member Association, Points, Group stage, Play-off, AFC Cup\nQ: In what group stage were 177.2 points awarded?\nA: SELECT COUNT Group stage FROM 1-1059743-2 WHERE Points = '177.2'"}
|
||||
{"text": "table: 1-1059743-2\ncolumns: Rank, Member Association, Points, Group stage, Play-off, AFC Cup\nQ: What is the lowest group to earn 886.6 points?\nA: SELECT MIN Group stage FROM 1-1059743-2 WHERE Points = '886.6'"}
|
||||
{"text": "table: 1-1059743-2\ncolumns: Rank, Member Association, Points, Group stage, Play-off, AFC Cup\nQ: How many countries earned 177.2 points?\nA: SELECT COUNT Member Association FROM 1-1059743-2 WHERE Points = '177.2'"}
|
||||
{"text": "table: 1-10586064-2\ncolumns: County, Precincts, Lunsford, % Lunsford, McConnell, % McConnell, Total\nQ: If % lunsford is 51.82% what is the % mcconnell in Letcher?\nA: SELECT % McConnell FROM 1-10586064-2 WHERE % Lunsford = '51.82%'"}
|
||||
{"text": "table: 1-10586064-2\ncolumns: County, Precincts, Lunsford, % Lunsford, McConnell, % McConnell, Total\nQ: What country had the total 18,900 (r)?\nA: SELECT County FROM 1-10586064-2 WHERE Total = '18,900 (R)'"}
|
||||
{"text": "table: 1-10586064-2\ncolumns: County, Precincts, Lunsford, % Lunsford, McConnell, % McConnell, Total\nQ: When % mcconnell is 44.54% what are the total number of counties?\nA: SELECT COUNT County FROM 1-10586064-2 WHERE % McConnell = '44.54%'"}
|
||||
{"text": "table: 1-10586064-2\ncolumns: County, Precincts, Lunsford, % Lunsford, McConnell, % McConnell, Total\nQ: If % mcconnell is 47.17% what is the total number of mcconnell ?\nA: SELECT COUNT McConnell FROM 1-10586064-2 WHERE % McConnell = '47.17%'"}
|
||||
{"text": "table: 1-10586064-2\ncolumns: County, Precincts, Lunsford, % Lunsford, McConnell, % McConnell, Total\nQ: What is the county of precints 515?\nA: SELECT County FROM 1-10586064-2 WHERE Precincts = 515"}
|
||||
{"text": "table: 1-10601843-2\ncolumns: Stadium, Capacity, City, Country, Tenant, Opening\nQ: Which city has a capacity of 41903?\nA: SELECT City FROM 1-10601843-2 WHERE Capacity = 41903"}
|
||||
{"text": "table: 1-10601843-2\ncolumns: Stadium, Capacity, City, Country, Tenant, Opening\nQ: What is the maximum capacity of the Otkrytie Arena stadium?\nA: SELECT MAX Capacity FROM 1-10601843-2 WHERE Stadium = 'Otkrytie Arena'"}
|
||||
{"text": "table: 1-10601843-2\ncolumns: Stadium, Capacity, City, Country, Tenant, Opening\nQ: When did the stadium where Bursaspor is the tenant open?\nA: SELECT MIN Opening FROM 1-10601843-2 WHERE Tenant = 'Bursaspor'"}
|
||||
{"text": "table: 1-10601843-2\ncolumns: Stadium, Capacity, City, Country, Tenant, Opening\nQ: How many tenants are there in the city of Samsun?\nA: SELECT COUNT Tenant FROM 1-10601843-2 WHERE City = 'Samsun'"}
|
||||
{"text": "table: 1-10610087-5\ncolumns: No. in series, No. in season, Title, Directed by, Written by, Original air date\nQ: what's the\u00a0original air date\u00a0with\u00a0title\u00a0 \"hell\"\nA: SELECT Original air date FROM 1-10610087-5 WHERE Title = '\"Hell\"'"}
|
||||
{"text": "table: 1-10638523-1\ncolumns: Particulars and Characteristics, Shivalik Zone, Mid-Hill Zone, High hill zone, Trance- n Himalaya Zone\nQ: What is the percentage of the Shivalik Zone where the percentage of the Mid-Hill Zone is 10%?\nA: SELECT Shivalik Zone FROM 1-10638523-1 WHERE Mid-Hill Zone = '10%'"}
|
||||
{"text": "table: 1-10638523-1\ncolumns: Particulars and Characteristics, Shivalik Zone, Mid-Hill Zone, High hill zone, Trance- n Himalaya Zone\nQ: For mid-hill zone what is the altitude?\nA: SELECT Mid-Hill Zone FROM 1-10638523-1 WHERE Particulars and Characteristics = 'Altitude'"}
|
||||
{"text": "table: 1-10638523-1\ncolumns: Particulars and Characteristics, Shivalik Zone, Mid-Hill Zone, High hill zone, Trance- n Himalaya Zone\nQ: What are the climatic conditions for the trance- n himalaya zone?\nA: SELECT Trance- n Himalaya Zone FROM 1-10638523-1 WHERE Particulars and Characteristics = 'Climatic conditions'"}
|
||||
{"text": "table: 1-10638523-1\ncolumns: Particulars and Characteristics, Shivalik Zone, Mid-Hill Zone, High hill zone, Trance- n Himalaya Zone\nQ: What is the percentage of the trance- n himalaya zone that corresponds with the high hill zone is 25%?\nA: SELECT Trance- n Himalaya Zone FROM 1-10638523-1 WHERE High hill zone = '25%'"}
|
||||
{"text": "table: 1-10644188-3\ncolumns: Total tenure rank, Uninterrupted rank, Name, State represented, Dates of service, Total tenure time, Uninterrupted time\nQ: What is the state of Ted Stevens?\nA: SELECT State represented FROM 1-10644188-3 WHERE Name = 'Ted Stevens'"}
|
||||
{"text": "table: 1-10682862-68\ncolumns: Country, Players, Standard, Minor, First title, Last title\nQ: What's the standard of the country who won its first title in 1992?\nA: SELECT MAX Standard FROM 1-10682862-68 WHERE First title = 1992"}
|
||||
{"text": "table: 1-10682862-68\ncolumns: Country, Players, Standard, Minor, First title, Last title\nQ: What's the smallest number of players?\nA: SELECT MIN Players FROM 1-10682862-68"}
|
||||
{"text": "table: 1-10682862-68\ncolumns: Country, Players, Standard, Minor, First title, Last title\nQ: In what year was the last last title received, by any of the countries?\nA: SELECT MAX Last title FROM 1-10682862-68"}
|
||||
{"text": "table: 1-10710364-1\ncolumns: Religious group, Population % 1961, Population % 1971, Population % 1981, Population % 1991, Population % 2001\nQ: What religious groups made up 0.72% of the Indian population in 2001?\nA: SELECT Religious group FROM 1-10710364-1 WHERE Population % 2001 = '0.72%'"}
|
||||
{"text": "table: 1-10718868-2\ncolumns: No. in series, No. in season, Title, Directed by, Written by, Original air date, U.S. viewers (millions)\nQ: What is the original air date for episode 15 of season 6?\nA: SELECT Original air date FROM 1-10718868-2 WHERE No. in season = 15"}
|
||||
{"text": "table: 1-10718868-2\ncolumns: No. in series, No. in season, Title, Directed by, Written by, Original air date, U.S. viewers (millions)\nQ: How many episodes in season 6 titles \"Poppin' Tags\"?\nA: SELECT COUNT No. in season FROM 1-10718868-2 WHERE Title = '\"Poppin' Tags\"'"}
|
||||
{"text": "table: 1-10753917-1\ncolumns: Season, Driver, Team, Engine, Poles, Wins, Podiums, Points, Margin of defeat\nQ: Which podiums did the Williams team have with a margin of defeat of 2?\nA: SELECT Podiums FROM 1-10753917-1 WHERE Team = 'Williams' AND Margin of defeat = '2'"}
|
||||
{"text": "table: 1-10753917-1\ncolumns: Season, Driver, Team, Engine, Poles, Wins, Podiums, Points, Margin of defeat\nQ: How many drivers on the williams team had a margin of defeat of 2?\nA: SELECT COUNT Driver FROM 1-10753917-1 WHERE Team = 'Williams' AND Margin of defeat = '2'"}
|
||||
{"text": "table: 1-10753917-1\ncolumns: Season, Driver, Team, Engine, Poles, Wins, Podiums, Points, Margin of defeat\nQ: How many seasons was clay regazzoni the driver?\nA: SELECT COUNT Season FROM 1-10753917-1 WHERE Driver = 'Clay Regazzoni'"}
|
||||
{"text": "table: 1-10753917-1\ncolumns: Season, Driver, Team, Engine, Poles, Wins, Podiums, Points, Margin of defeat\nQ: Which margin of defeats had points of 30?\nA: SELECT Margin of defeat FROM 1-10753917-1 WHERE Points = '30'"}
|
||||
{"text": "table: 1-10753917-1\ncolumns: Season, Driver, Team, Engine, Poles, Wins, Podiums, Points, Margin of defeat\nQ: Which podiums did the alfa romeo team have?\nA: SELECT Podiums FROM 1-10753917-1 WHERE Team = 'Alfa Romeo'"}
|
||||
{"text": "table: 1-10797636-1\ncolumns: Village (German), Village (Slovene), Number of people 1991, Percent of Slovenes 1991, Percent of Slovenes 1951\nQ: What was the percent of slovenes 1951 for bach?\nA: SELECT Percent of Slovenes 1951 FROM 1-10797636-1 WHERE Village (German) = 'Bach'"}
|
||||
{"text": "table: 1-10812403-4\ncolumns: Pick #, CFL Team, Player, Position, College\nQ: What college's team is the Saskatchewan Roughriders?\nA: SELECT College FROM 1-10812403-4 WHERE CFL Team = 'Saskatchewan Roughriders'"}
|
||||
{"text": "table: 1-10812403-4\ncolumns: Pick #, CFL Team, Player, Position, College\nQ: What position did Calvin Mccarty play?\nA: SELECT Position FROM 1-10812403-4 WHERE Player = 'Calvin McCarty'"}
|
||||
{"text": "table: 1-10812403-4\ncolumns: Pick #, CFL Team, Player, Position, College\nQ: How many people were pick #30?\nA: SELECT COUNT Position FROM 1-10812403-4 WHERE Pick # = 30"}
|
1000
lora/data/train.jsonl
Normal file
1000
lora/data/train.jsonl
Normal file
File diff suppressed because it is too large
Load Diff
100
lora/data/valid.jsonl
Normal file
100
lora/data/valid.jsonl
Normal file
@ -0,0 +1,100 @@
|
||||
{"text": "table: 1-10015132-11\ncolumns: Player, No., Nationality, Position, Years in Toronto, School/Club Team\nQ: What position does the player who played for butler cc (ks) play?\nA: SELECT Position FROM 1-10015132-11 WHERE School/Club Team = 'Butler CC (KS)'"}
|
||||
{"text": "table: 1-10015132-11\ncolumns: Player, No., Nationality, Position, Years in Toronto, School/Club Team\nQ: How many schools did player number 3 play at?\nA: SELECT COUNT School/Club Team FROM 1-10015132-11 WHERE No. = '3'"}
|
||||
{"text": "table: 1-10015132-11\ncolumns: Player, No., Nationality, Position, Years in Toronto, School/Club Team\nQ: What school did player number 21 play for?\nA: SELECT School/Club Team FROM 1-10015132-11 WHERE No. = '21'"}
|
||||
{"text": "table: 1-10015132-11\ncolumns: Player, No., Nationality, Position, Years in Toronto, School/Club Team\nQ: Who is the player that wears number 42?\nA: SELECT Player FROM 1-10015132-11 WHERE No. = '42'"}
|
||||
{"text": "table: 1-10015132-11\ncolumns: Player, No., Nationality, Position, Years in Toronto, School/Club Team\nQ: What player played guard for toronto in 1996-97?\nA: SELECT Player FROM 1-10015132-11 WHERE Position = 'Guard' AND Years in Toronto = '1996-97'"}
|
||||
{"text": "table: 1-10015132-9\ncolumns: Player, No., Nationality, Position, Years in Toronto, School/Club Team\nQ: Who are all of the players on the Westchester High School club team?\nA: SELECT Player FROM 1-10015132-9 WHERE School/Club Team = 'Westchester High School'"}
|
||||
{"text": "table: 1-10015132-9\ncolumns: Player, No., Nationality, Position, Years in Toronto, School/Club Team\nQ: What school/club team is Amir Johnson on?\nA: SELECT School/Club Team FROM 1-10015132-9 WHERE Player = 'Amir Johnson'"}
|
||||
{"text": "table: 1-10015132-9\ncolumns: Player, No., Nationality, Position, Years in Toronto, School/Club Team\nQ: What are the total amount of numbers on the Toronto team in 2005-06?\nA: SELECT COUNT No. FROM 1-10015132-9 WHERE Years in Toronto = '2005-06'"}
|
||||
{"text": "table: 1-10015132-9\ncolumns: Player, No., Nationality, Position, Years in Toronto, School/Club Team\nQ: What are the total number of positions on the Toronto team in 2006-07?\nA: SELECT COUNT Position FROM 1-10015132-9 WHERE Years in Toronto = '2006-07'"}
|
||||
{"text": "table: 1-10015132-9\ncolumns: Player, No., Nationality, Position, Years in Toronto, School/Club Team\nQ: What are the nationality of the players on the Fresno State school/club team?\nA: SELECT Nationality FROM 1-10015132-9 WHERE School/Club Team = 'Fresno State'"}
|
||||
{"text": "table: 1-10015132-9\ncolumns: Player, No., Nationality, Position, Years in Toronto, School/Club Team\nQ: What school/club team is Trey Johnson on?\nA: SELECT School/Club Team FROM 1-10015132-9 WHERE Player = 'Trey Johnson'"}
|
||||
{"text": "table: 1-10026563-1\ncolumns: Entered office as Head of State or Government, Began time as senior G8 leader, Ended time as senior G8 leader, Person, Office\nQ: When did Jacques Chirac stop being a G8 leader?\nA: SELECT Ended time as senior G8 leader FROM 1-10026563-1 WHERE Person = 'Jacques Chirac'"}
|
||||
{"text": "table: 1-10026563-1\ncolumns: Entered office as Head of State or Government, Began time as senior G8 leader, Ended time as senior G8 leader, Person, Office\nQ: When did the Prime Minister of Italy take office?\nA: SELECT Entered office as Head of State or Government FROM 1-10026563-1 WHERE Office = 'Prime Minister of Italy'"}
|
||||
{"text": "table: 1-1008653-1\ncolumns: Country ( exonym ), Capital ( exonym ), Country ( endonym ), Capital ( endonym ), Official or native language(s) (alphabet/script)\nQ: What is the English name of the country whose official native language is Dutch Papiamento?\nA: SELECT Country ( exonym ) FROM 1-1008653-1 WHERE Official or native language(s) (alphabet/script) = 'Dutch Papiamento'"}
|
||||
{"text": "table: 1-1008653-1\ncolumns: Country ( exonym ), Capital ( exonym ), Country ( endonym ), Capital ( endonym ), Official or native language(s) (alphabet/script)\nQ: What official or native languages are spoken in the country whose capital city is Canberra?\nA: SELECT Official or native language(s) (alphabet/script) FROM 1-1008653-1 WHERE Capital ( exonym ) = 'Canberra'"}
|
||||
{"text": "table: 1-1008653-1\ncolumns: Country ( exonym ), Capital ( exonym ), Country ( endonym ), Capital ( endonym ), Official or native language(s) (alphabet/script)\nQ: What is the local name given to the city of Canberra?\nA: SELECT Capital ( endonym ) FROM 1-1008653-1 WHERE Capital ( exonym ) = 'Canberra'"}
|
||||
{"text": "table: 1-1008653-1\ncolumns: Country ( exonym ), Capital ( exonym ), Country ( endonym ), Capital ( endonym ), Official or native language(s) (alphabet/script)\nQ: What is the local name given to the capital of Anguilla?\nA: SELECT Capital ( endonym ) FROM 1-1008653-1 WHERE Country ( endonym ) = 'Anguilla'"}
|
||||
{"text": "table: 1-1008653-1\ncolumns: Country ( exonym ), Capital ( exonym ), Country ( endonym ), Capital ( endonym ), Official or native language(s) (alphabet/script)\nQ: What is the English name given to the city of St. John's?\nA: SELECT Capital ( exonym ) FROM 1-1008653-1 WHERE Capital ( endonym ) = 'St. John's'"}
|
||||
{"text": "table: 1-1008653-1\ncolumns: Country ( exonym ), Capital ( exonym ), Country ( endonym ), Capital ( endonym ), Official or native language(s) (alphabet/script)\nQ: How many capital cities does Australia have?\nA: SELECT COUNT Capital ( endonym ) FROM 1-1008653-1 WHERE Country ( endonym ) = 'Australia'"}
|
||||
{"text": "table: 1-10088101-1\ncolumns: No. in set, No. in series, Title, Directed by, Written by, Original air date, Production code\nQ: The episode with production code 9abx02 was originally aired on what date?\nA: SELECT Original air date FROM 1-10088101-1 WHERE Production code = '9ABX02'"}
|
||||
{"text": "table: 1-10088101-1\ncolumns: No. in set, No. in series, Title, Directed by, Written by, Original air date, Production code\nQ: What is the episode number that has production code 8abx15?\nA: SELECT MIN No. in series FROM 1-10088101-1 WHERE Production code = '8ABX15'"}
|
||||
{"text": "table: 1-10295819-2\ncolumns: Player, Highest singles ranking, Highest doubles ranking, First year played, Years played, Ties played, Total W\u2013L, Singles W\u2013L, Doubles W\u2013L\nQ: Name the minimum tiesplayed for 6 years\nA: SELECT MIN Ties played FROM 1-10295819-2 WHERE Years played = 6"}
|
||||
{"text": "table: 1-10342194-3\ncolumns: District, Total amount of trees, Prevailing types, %, Amount of old trees, Amount of trees, that require replacement\nQ: What is the amount of trees, that require replacement when prevailing types, % is pine \u2014 29.37 poplar \u2014 26.12 acer negundo \u2014 13.2?\nA: SELECT Amount of trees, that require replacement FROM 1-10342194-3 WHERE Prevailing types, % = 'Pine \u2014 29.37 Poplar \u2014 26.12 Acer negundo \u2014 13.2'"}
|
||||
{"text": "table: 1-10342194-3\ncolumns: District, Total amount of trees, Prevailing types, %, Amount of old trees, Amount of trees, that require replacement\nQ: What is the amount of trees, that require replacement when district is leninsky?\nA: SELECT Amount of trees, that require replacement FROM 1-10342194-3 WHERE District = 'Leninsky'"}
|
||||
{"text": "table: 1-10342194-3\ncolumns: District, Total amount of trees, Prevailing types, %, Amount of old trees, Amount of trees, that require replacement\nQ: What is the district when the total amount of trees is smaller than 150817.6878461314 and amount of old trees is 1,928 (1.89%)?\nA: SELECT District FROM 1-10342194-3 WHERE Total amount of trees < 150817.6878461314 AND Amount of old trees = '1,928 (1.89%)'"}
|
||||
{"text": "table: 1-10342194-3\ncolumns: District, Total amount of trees, Prevailing types, %, Amount of old trees, Amount of trees, that require replacement\nQ: What is the amount of trees, that require replacement when the district is motovilikhinsky?\nA: SELECT Amount of trees, that require replacement FROM 1-10342194-3 WHERE District = 'Motovilikhinsky'"}
|
||||
{"text": "table: 1-10342194-3\ncolumns: District, Total amount of trees, Prevailing types, %, Amount of old trees, Amount of trees, that require replacement\nQ: What is the total amount of trees when district is leninsky?\nA: SELECT MAX Total amount of trees FROM 1-10342194-3 WHERE District = 'Leninsky'"}
|
||||
{"text": "table: 1-10342194-3\ncolumns: District, Total amount of trees, Prevailing types, %, Amount of old trees, Amount of trees, that require replacement\nQ: What is the district when prevailing types, % is acer negundo \u2014 30.22 tilia \u2014 18.6 poplar \u2014 15.23?\nA: SELECT District FROM 1-10342194-3 WHERE Prevailing types, % = 'Acer negundo \u2014 30.22 Tilia \u2014 18.6 Poplar \u2014 15.23'"}
|
||||
{"text": "table: 1-10429820-13\ncolumns: Iowa State vs., Overall Record, in Ames, at Opponents Venue, at Neutral Site, Last 5 Meetings, Last 10 Meetings, Current Streak, Since Beginning of Big 12\nQ: When the value of \"since beginning of big 12\" is synonymous with its' category, what are the in Ames values?\nA: SELECT in Ames FROM 1-10429820-13 WHERE Since Beginning of Big 12 = 'Since Beginning of Big 12'"}
|
||||
{"text": "table: 1-1046170-5\ncolumns: Year, Division, League, Regular Season, Playoffs, U.S. Open Cup\nQ: what's the\u00a0u.s. open cup status\u00a0for regular season\u00a0of 4th, atlantic division \nA: SELECT U.S. Open Cup FROM 1-1046170-5 WHERE Regular Season = '4th, Atlantic Division'"}
|
||||
{"text": "table: 1-1046170-5\ncolumns: Year, Division, League, Regular Season, Playoffs, U.S. Open Cup\nQ: how many division did not qualify for u.s. open cup in 2003\nA: SELECT Division FROM 1-1046170-5 WHERE U.S. Open Cup = 'Did Not Qualify' AND Year = 2003"}
|
||||
{"text": "table: 1-1046170-5\ncolumns: Year, Division, League, Regular Season, Playoffs, U.S. Open Cup\nQ: which round is u.s. open cup division semifinals\nA: SELECT U.S. Open Cup FROM 1-1046170-5 WHERE Playoffs = 'Division Semifinals'"}
|
||||
{"text": "table: 1-1046170-5\ncolumns: Year, Division, League, Regular Season, Playoffs, U.S. Open Cup\nQ: what are all the playoffs for regular season is 1st, atlantic division\nA: SELECT Playoffs FROM 1-1046170-5 WHERE Regular Season = '1st, Atlantic Division'"}
|
||||
{"text": "table: 1-1046170-5\ncolumns: Year, Division, League, Regular Season, Playoffs, U.S. Open Cup\nQ: what are all the playoffs for u.s. open cup in 1st round\nA: SELECT Playoffs FROM 1-1046170-5 WHERE U.S. Open Cup = '1st Round'"}
|
||||
{"text": "table: 1-1061075-1\ncolumns: Season, Competition, Round, Opponents, 1st leg, 2nd leg, Aggregate\nQ: what is the total number of\u00a02nd leg\u00a0where\u00a0aggregate\u00a0is 7-2\nA: SELECT COUNT 2nd leg FROM 1-1061075-1 WHERE Aggregate = '7-2'"}
|
||||
{"text": "table: 1-1061075-1\ncolumns: Season, Competition, Round, Opponents, 1st leg, 2nd leg, Aggregate\nQ: what's the\u00a0aggregate\u00a0where\u00a01st leg\u00a0is 3\u20132\nA: SELECT Aggregate FROM 1-1061075-1 WHERE 1st leg = '3\u20132'"}
|
||||
{"text": "table: 1-1061075-1\ncolumns: Season, Competition, Round, Opponents, 1st leg, 2nd leg, Aggregate\nQ: what's the\u00a0competition\u00a0where\u00a0aggregate\u00a0is 4\u20137\nA: SELECT Competition FROM 1-1061075-1 WHERE Aggregate = '4\u20137'"}
|
||||
{"text": "table: 1-1061075-1\ncolumns: Season, Competition, Round, Opponents, 1st leg, 2nd leg, Aggregate\nQ: what's the\u00a0competition\u00a0where\u00a01st leg\u00a0is 4-1 (h)\nA: SELECT Competition FROM 1-1061075-1 WHERE 1st leg = '4-1 (h)'"}
|
||||
{"text": "table: 1-1061075-1\ncolumns: Season, Competition, Round, Opponents, 1st leg, 2nd leg, Aggregate\nQ: what is the total number of\u00a0round\u00a0where\u00a0opponents\u00a0is haugar\nA: SELECT COUNT Round FROM 1-1061075-1 WHERE Opponents = 'Haugar'"}
|
||||
{"text": "table: 1-1061075-1\ncolumns: Season, Competition, Round, Opponents, 1st leg, 2nd leg, Aggregate\nQ: what's the\u00a01st leg\u00a0where\u00a0opponents\u00a0is galatasaray\nA: SELECT 1st leg FROM 1-1061075-1 WHERE Opponents = 'Galatasaray'"}
|
||||
{"text": "table: 1-10706961-2\ncolumns: Rd, Name, Pole Position, Fastest Lap, Winning driver, Winning team, Report\nQ: What is the highest Rd that Tom Sneva had the pole position in?\nA: SELECT MAX Rd FROM 1-10706961-2 WHERE Pole Position = 'Tom Sneva'"}
|
||||
{"text": "table: 1-10706961-2\ncolumns: Rd, Name, Pole Position, Fastest Lap, Winning driver, Winning team, Report\nQ: How many winning drivers were there in the race that had a fastest lap time of 56.920?\nA: SELECT COUNT Winning driver FROM 1-10706961-2 WHERE Fastest Lap = '56.920'"}
|
||||
{"text": "table: 1-10706961-2\ncolumns: Rd, Name, Pole Position, Fastest Lap, Winning driver, Winning team, Report\nQ: How many reports are there in the race that Forsythe Racing won and Teo Fabi had the pole position in?\nA: SELECT COUNT Report FROM 1-10706961-2 WHERE Winning team = 'Forsythe Racing' AND Pole Position = 'Teo Fabi'"}
|
||||
{"text": "table: 1-10706961-2\ncolumns: Rd, Name, Pole Position, Fastest Lap, Winning driver, Winning team, Report\nQ: Which Rd took place at the Indianapolis 500?\nA: SELECT Rd FROM 1-10706961-2 WHERE Name = 'Indianapolis 500'"}
|
||||
{"text": "table: 1-10706961-2\ncolumns: Rd, Name, Pole Position, Fastest Lap, Winning driver, Winning team, Report\nQ: Which teams won when Bobby Rahal was their winning driver?\nA: SELECT Winning team FROM 1-10706961-2 WHERE Winning driver = 'Bobby Rahal'"}
|
||||
{"text": "table: 1-10706961-2\ncolumns: Rd, Name, Pole Position, Fastest Lap, Winning driver, Winning team, Report\nQ: What was the fastest lap time in the Escort Radar Warning 200?\nA: SELECT Fastest Lap FROM 1-10706961-2 WHERE Name = 'Escort Radar Warning 200'"}
|
||||
{"text": "table: 1-10707176-2\ncolumns: Rnd, Race Name, Circuit, City/Location, Date, Pole position, Winning driver, Winning team, Report\nQ: What report was there for the porsche north america?\nA: SELECT Report FROM 1-10707176-2 WHERE Winning team = 'Porsche North America'"}
|
||||
{"text": "table: 1-10707176-2\ncolumns: Rnd, Race Name, Circuit, City/Location, Date, Pole position, Winning driver, Winning team, Report\nQ: What rnds were there for the phoenix international raceway?\nA: SELECT Rnd FROM 1-10707176-2 WHERE Circuit = 'Phoenix International Raceway'"}
|
||||
{"text": "table: 1-10707176-2\ncolumns: Rnd, Race Name, Circuit, City/Location, Date, Pole position, Winning driver, Winning team, Report\nQ: Who was the pole position for the rnd equalling 12?\nA: SELECT Pole position FROM 1-10707176-2 WHERE Rnd = '12'"}
|
||||
{"text": "table: 1-10707176-2\ncolumns: Rnd, Race Name, Circuit, City/Location, Date, Pole position, Winning driver, Winning team, Report\nQ: How many reports were the for the cleveland burke lakefront airport circut?\nA: SELECT COUNT Report FROM 1-10707176-2 WHERE Circuit = 'Cleveland Burke Lakefront Airport'"}
|
||||
{"text": "table: 1-10707176-2\ncolumns: Rnd, Race Name, Circuit, City/Location, Date, Pole position, Winning driver, Winning team, Report\nQ: How many winning drivers were the for the rnd equalling 5?\nA: SELECT COUNT Winning driver FROM 1-10707176-2 WHERE Rnd = '5'"}
|
||||
{"text": "table: 1-10706879-3\ncolumns: Rd, Name, Pole Position, Fastest Lap, Winning driver, Winning team, Report\nQ: The race tony bettenhausen 200 has what smallest rd?\nA: SELECT MIN Rd FROM 1-10706879-3 WHERE Name = 'Tony Bettenhausen 200'"}
|
||||
{"text": "table: 1-10706879-3\ncolumns: Rd, Name, Pole Position, Fastest Lap, Winning driver, Winning team, Report\nQ: The winning team of the race, los angeles times 500 is who?\nA: SELECT Winning team FROM 1-10706879-3 WHERE Name = 'Los Angeles Times 500'"}
|
||||
{"text": "table: 1-10706879-3\ncolumns: Rd, Name, Pole Position, Fastest Lap, Winning driver, Winning team, Report\nQ: How many winning drivers in the kraco twin 125 (r2) race were there?\nA: SELECT COUNT Winning driver FROM 1-10706879-3 WHERE Name = 'Kraco Twin 125 (R2)'"}
|
||||
{"text": "table: 1-10706879-3\ncolumns: Rd, Name, Pole Position, Fastest Lap, Winning driver, Winning team, Report\nQ: What are the races that johnny rutherford has won?\nA: SELECT Name FROM 1-10706879-3 WHERE Winning driver = 'Johnny Rutherford'"}
|
||||
{"text": "table: 1-10706879-3\ncolumns: Rd, Name, Pole Position, Fastest Lap, Winning driver, Winning team, Report\nQ: How many fastest laps were there for a rd that equals 10?\nA: SELECT COUNT Fastest Lap FROM 1-10706879-3 WHERE Rd = 10"}
|
||||
{"text": "table: 1-10712301-5\ncolumns: Region, Operator, Licence award date, On air date, Closure date\nQ: What is the license award date for North East England?\nA: SELECT Licence award date FROM 1-10712301-5 WHERE Region = 'North East England'"}
|
||||
{"text": "table: 1-10733530-3\ncolumns: Nation, Population (thousands), Internet subscriptions (2000) (thousands of users), Internet subscriptions (2008) (thousands of users), % growth (2000\u20132008), % Internet users\nQ: What is the percentage of growth in 2000-2008 in ethiopia?\nA: SELECT % growth (2000\u20132008) FROM 1-10733530-3 WHERE Nation = 'Ethiopia'"}
|
||||
{"text": "table: 1-10733530-3\ncolumns: Nation, Population (thousands), Internet subscriptions (2000) (thousands of users), Internet subscriptions (2008) (thousands of users), % growth (2000\u20132008), % Internet users\nQ: Name the total number of percentage growth 2000-2008 of uganda?\nA: SELECT COUNT % growth (2000\u20132008) FROM 1-10733530-3 WHERE Nation = 'Uganda'"}
|
||||
{"text": "table: 1-10733530-3\ncolumns: Nation, Population (thousands), Internet subscriptions (2000) (thousands of users), Internet subscriptions (2008) (thousands of users), % growth (2000\u20132008), % Internet users\nQ: What is the maximum percentage grown 2000-2008 in burundi\nA: SELECT MAX % growth (2000\u20132008) FROM 1-10733530-3 WHERE Nation = 'Burundi'"}
|
||||
{"text": "table: 1-10798421-1\ncolumns: Village (German), Village (Slovenian), Number of people 1991, Percent of Slovenes 1991, Percent of Slovenes 1951\nQ: Provide me with the names of all the villages (German) that has 76.3% of Slovenes in 1951.\nA: SELECT Village (German) FROM 1-10798421-1 WHERE Percent of Slovenes 1951 = '76.3%'"}
|
||||
{"text": "table: 1-10798421-1\ncolumns: Village (German), Village (Slovenian), Number of people 1991, Percent of Slovenes 1991, Percent of Slovenes 1951\nQ: Give me the minimum number of people in 1991 with 92.5% of Slovenes in 1991.\nA: SELECT MIN Number of people 1991 FROM 1-10798421-1 WHERE Percent of Slovenes 1991 = '92.5%'"}
|
||||
{"text": "table: 1-10798421-1\ncolumns: Village (German), Village (Slovenian), Number of people 1991, Percent of Slovenes 1991, Percent of Slovenes 1951\nQ: Provide me with the name of all the village (German) that are part of the village (Slovenian) with sele srednji kot. \nA: SELECT Village (German) FROM 1-10798421-1 WHERE Village (Slovenian) = 'Sele Srednji Kot'"}
|
||||
{"text": "table: 1-10798421-1\ncolumns: Village (German), Village (Slovenian), Number of people 1991, Percent of Slovenes 1991, Percent of Slovenes 1951\nQ: Provide me with the name of all the village (German) that are part of the village (Slovenian) with sele borovnica.\nA: SELECT Village (German) FROM 1-10798421-1 WHERE Village (Slovenian) = 'Sele Borovnica'"}
|
||||
{"text": "table: 1-10798421-1\ncolumns: Village (German), Village (Slovenian), Number of people 1991, Percent of Slovenes 1991, Percent of Slovenes 1951\nQ: Provide me with the name of the village (German) where there is 96.9% Slovenes in 1951. \nA: SELECT Village (German) FROM 1-10798421-1 WHERE Percent of Slovenes 1951 = '96.9%'"}
|
||||
{"text": "table: 1-10798421-1\ncolumns: Village (German), Village (Slovenian), Number of people 1991, Percent of Slovenes 1991, Percent of Slovenes 1951\nQ: Provide with the names of the village (German) that is part of village (Slovenian) with sele srednji kot.\nA: SELECT Village (German) FROM 1-10798421-1 WHERE Village (Slovenian) = 'Sele Srednji Kot'"}
|
||||
{"text": "table: 1-10812293-3\ncolumns: Game, Date, Team, Score, High points, High rebounds, High assists, Location Attendance, Record\nQ: What was the score of the game on November 12?\nA: SELECT Score FROM 1-10812293-3 WHERE Date = 'November 12'"}
|
||||
{"text": "table: 1-10812293-3\ncolumns: Game, Date, Team, Score, High points, High rebounds, High assists, Location Attendance, Record\nQ: Who had high assists when they played against San Antonio?\nA: SELECT High assists FROM 1-10812293-3 WHERE Team = 'San Antonio'"}
|
||||
{"text": "table: 1-10812293-3\ncolumns: Game, Date, Team, Score, High points, High rebounds, High assists, Location Attendance, Record\nQ: Who scored the most points in game 4?\nA: SELECT High points FROM 1-10812293-3 WHERE Game = 4"}
|
||||
{"text": "table: 1-10812293-3\ncolumns: Game, Date, Team, Score, High points, High rebounds, High assists, Location Attendance, Record\nQ: Where was the game on November 20?\nA: SELECT Location Attendance FROM 1-10812293-3 WHERE Date = 'November 20'"}
|
||||
{"text": "table: 1-10935205-1\ncolumns: No. in season, No. in series, Title, Canadian airdate, US airdate, Production code\nQ: The canadian airdate of 11 february 2008 applied to what series number?\nA: SELECT COUNT No. in series FROM 1-10935205-1 WHERE Canadian airdate = '11 February 2008'"}
|
||||
{"text": "table: 1-10935205-1\ncolumns: No. in season, No. in series, Title, Canadian airdate, US airdate, Production code\nQ: The U.S. airdate of 4 april 2008 had a production code of what?\nA: SELECT MAX Production code FROM 1-10935205-1 WHERE US airdate = '4 April 2008'"}
|
||||
{"text": "table: 1-10935205-1\ncolumns: No. in season, No. in series, Title, Canadian airdate, US airdate, Production code\nQ: The episode titled \"don't stop believin'\" was what highest number of the season?\nA: SELECT MAX No. in season FROM 1-10935205-1 WHERE Title = '\"Don't Stop Believin'\"'"}
|
||||
{"text": "table: 1-10935205-1\ncolumns: No. in season, No. in series, Title, Canadian airdate, US airdate, Production code\nQ: The U.S. airdate of 8 august 2008 also had canadian airdates of what?\nA: SELECT Canadian airdate FROM 1-10935205-1 WHERE US airdate = '8 August 2008'"}
|
||||
{"text": "table: 1-10935205-1\ncolumns: No. in season, No. in series, Title, Canadian airdate, US airdate, Production code\nQ: The canadian airdate of 17 march 2008 had how many numbers in the season?\nA: SELECT COUNT No. in season FROM 1-10935205-1 WHERE Canadian airdate = '17 March 2008'"}
|
||||
{"text": "table: 1-10935205-1\ncolumns: No. in season, No. in series, Title, Canadian airdate, US airdate, Production code\nQ: For the episode(s) aired in the U.S. on 4 april 2008, what were the names?\nA: SELECT Title FROM 1-10935205-1 WHERE US airdate = '4 April 2008'"}
|
||||
{"text": "table: 1-10953197-5\ncolumns: No. in series, No. in season, Title, Director, Writer(s), Original air date, Production code\nQ: Who directed the episode \"Great Sexpectations (2)\"?\nA: SELECT Director FROM 1-10953197-5 WHERE Title = '\"Great Sexpectations (2)\"'"}
|
||||
{"text": "table: 1-10975034-2\ncolumns: Pick #, CFL Team, Player, Position, College\nQ: Which player from the 2004 CFL draft attended Wilfrid Laurier?\nA: SELECT Player FROM 1-10975034-2 WHERE College = 'Wilfrid Laurier'"}
|
||||
{"text": "table: 1-10975034-2\ncolumns: Pick #, CFL Team, Player, Position, College\nQ: What position does Christian Leibl-Cote play?\nA: SELECT Position FROM 1-10975034-2 WHERE Player = 'Christian Leibl-Cote'"}
|
||||
{"text": "table: 1-10975034-2\ncolumns: Pick #, CFL Team, Player, Position, College\nQ: What is the pick number for Northwestern college?\nA: SELECT MAX Pick # FROM 1-10975034-2 WHERE College = 'Northwestern'"}
|
||||
{"text": "table: 1-10992-3\ncolumns: No, City district (Stadtteil), Area in km\u00b2, Population, Foreign nationals, Foreign nationals in %, Area district (Ortsbezirk)\nQ: How many foreigners in percentage terms had a population of 4.911?\nA: SELECT COUNT Foreign nationals in % FROM 1-10992-3 WHERE Population = '4.911'"}
|
||||
{"text": "table: 1-10992-3\ncolumns: No, City district (Stadtteil), Area in km\u00b2, Population, Foreign nationals, Foreign nationals in %, Area district (Ortsbezirk)\nQ: What is the number of the city district of stadtteil where foreigners are 5.162?\nA: SELECT COUNT City district (Stadtteil) FROM 1-10992-3 WHERE Foreign nationals = '5.162'"}
|
||||
{"text": "table: 1-10992-3\ncolumns: No, City district (Stadtteil), Area in km\u00b2, Population, Foreign nationals, Foreign nationals in %, Area district (Ortsbezirk)\nQ: What is the city where the number is 47?\nA: SELECT City district (Stadtteil) FROM 1-10992-3 WHERE No = '47'"}
|
||||
{"text": "table: 1-11044765-1\ncolumns: School, Mascot, Location, League, Enrollment\nQ: Which leagues have Raiders as their mascot?\nA: SELECT League FROM 1-11044765-1 WHERE Mascot = 'Raiders'"}
|
||||
{"text": "table: 1-11044765-1\ncolumns: School, Mascot, Location, League, Enrollment\nQ: Which leagues is the Galena school in?\nA: SELECT League FROM 1-11044765-1 WHERE School = 'Galena'"}
|
||||
{"text": "table: 1-11044765-1\ncolumns: School, Mascot, Location, League, Enrollment\nQ: What city and state is the Lancers mascot located?\nA: SELECT Location FROM 1-11044765-1 WHERE Mascot = 'Lancers'"}
|
||||
{"text": "table: 1-11044765-1\ncolumns: School, Mascot, Location, League, Enrollment\nQ: What city and state are the miners located in?\nA: SELECT Location FROM 1-11044765-1 WHERE Mascot = 'Miners'"}
|
||||
{"text": "table: 1-11044765-1\ncolumns: School, Mascot, Location, League, Enrollment\nQ: Which school has the Raiders as their mascot?\nA: SELECT School FROM 1-11044765-1 WHERE Mascot = 'Raiders'"}
|
||||
{"text": "table: 1-1121352-2\ncolumns: No., Date, Tournament, Winning score, To par, Margin of victory, Runner(s)-up\nQ: Where was the tournament dated nov 3, 2002?\nA: SELECT Tournament FROM 1-1121352-2 WHERE Date = 'Nov 3, 2002'"}
|
||||
{"text": "table: 1-1121352-2\ncolumns: No., Date, Tournament, Winning score, To par, Margin of victory, Runner(s)-up\nQ: Where is the margin of victory dated mar 28, 2004?\nA: SELECT Margin of victory FROM 1-1121352-2 WHERE Date = 'Mar 28, 2004'"}
|
||||
{"text": "table: 1-1121352-2\ncolumns: No., Date, Tournament, Winning score, To par, Margin of victory, Runner(s)-up\nQ: What is the to par dated may 4, 2003?\nA: SELECT To par FROM 1-1121352-2 WHERE Date = 'May 4, 2003'"}
|
||||
{"text": "table: 1-1121352-2\ncolumns: No., Date, Tournament, Winning score, To par, Margin of victory, Runner(s)-up\nQ: What date were the runner ups pat hurst juli inkster?\nA: SELECT Date FROM 1-1121352-2 WHERE Runner(s)-up = 'Pat Hurst Juli Inkster'"}
|
||||
{"text": "table: 1-11210576-4\ncolumns: Character, Fate, Actor, First Episode, Final Episode, Duration, Final Episode Count\nQ: what's the total number of\u00a0final epbeingode count\u00a0with\u00a0character\u00a0being rick stetler\nA: SELECT COUNT Final Episode Count FROM 1-11210576-4 WHERE Character = 'Rick Stetler'"}
|
||||
{"text": "table: 1-11210576-4\ncolumns: Character, Fate, Actor, First Episode, Final Episode, Duration, Final Episode Count\nQ: what are all the actor where first episode is \"ambush\"\nA: SELECT Actor FROM 1-11210576-4 WHERE First Episode = '\"Ambush\"'"}
|
||||
{"text": "table: 1-11210576-4\ncolumns: Character, Fate, Actor, First Episode, Final Episode, Duration, Final Episode Count\nQ: what's the\u00a0character\u00a0with\u00a0fate\u00a0being deceased: knife wound\nA: SELECT Character FROM 1-11210576-4 WHERE Fate = 'Deceased: Knife Wound'"}
|
||||
{"text": "table: 1-11210576-4\ncolumns: Character, Fate, Actor, First Episode, Final Episode, Duration, Final Episode Count\nQ: what's the total number of\u00a0final epbeingode count\u00a0with\u00a0first epbeingode\u00a0being \"l.a.\"\nA: SELECT COUNT Final Episode Count FROM 1-11210576-4 WHERE First Episode = '\"L.A.\"'"}
|
||||
{"text": "table: 1-11210576-4\ncolumns: Character, Fate, Actor, First Episode, Final Episode, Duration, Final Episode Count\nQ: what's the\u00a0actor\u00a0with\u00a0character\u00a0being judge joseph ratner\nA: SELECT Actor FROM 1-11210576-4 WHERE Character = 'Judge Joseph Ratner'"}
|
||||
{"text": "table: 1-11210576-4\ncolumns: Character, Fate, Actor, First Episode, Final Episode, Duration, Final Episode Count\nQ: what's the\u00a0first epbeingode\u00a0with\u00a0final epbeingode\u00a0being \"rio\"\nA: SELECT First Episode FROM 1-11210576-4 WHERE Final Episode = '\"Rio\"'"}
|
||||
{"text": "table: 1-11214772-2\ncolumns: Year, Champion, Score, Runner-Up, Location, Semi-Finalist #1, Semi-Finalist #2\nQ: Which team was the second semi finalist in 2007?\nA: SELECT Semi-Finalist #2 FROM 1-11214772-2 WHERE Year = 2007"}
|
||||
{"text": "table: 1-11214772-2\ncolumns: Year, Champion, Score, Runner-Up, Location, Semi-Finalist #1, Semi-Finalist #2\nQ: How many teams were listed as runner up in 2005 and there the first semi finalist was Western Carolina?\nA: SELECT COUNT Runner-Up FROM 1-11214772-2 WHERE Semi-Finalist #1 = 'Western Carolina' AND Year = 2005"}
|
@ -100,4 +100,20 @@ if __name__ == "__main__":
|
||||
datanames = ["train", "dev", "test"]
|
||||
sizes = [56355, 8421, 15878]
|
||||
for dataname, size in zip(datanames, sizes):
|
||||
len(WikiSQL(dataname)) == 56355, f"Wrong {dataname} set size."
|
||||
len(WikiSQL(dataname)) == size, f"Wrong {dataname} set size."
|
||||
|
||||
# Write the sets to jsonl
|
||||
import json
|
||||
|
||||
train, dev, test = load()
|
||||
datasets = [
|
||||
(train, "train", 1000),
|
||||
(dev, "valid", 100),
|
||||
(test, "test", 100),
|
||||
]
|
||||
for dataset, name, size in datasets:
|
||||
with open(f"data/{name}.jsonl", "w") as fid:
|
||||
for e, t in zip(range(size), dataset):
|
||||
# Strip the <s>, </s> since the tokenizer adds them
|
||||
json.dump({"text": t[3:-4]}, fid)
|
||||
fid.write("\n")
|
110
lora/lora.py
110
lora/lora.py
@ -16,7 +16,6 @@ from mlx.utils import tree_map, tree_flatten, tree_unflatten
|
||||
|
||||
|
||||
from models import ModelArgs, Model, LoRALinear
|
||||
import wikisql
|
||||
|
||||
|
||||
def build_parser():
|
||||
@ -52,6 +51,18 @@ def build_parser():
|
||||
action="store_true",
|
||||
help="Do training",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--data",
|
||||
type=str,
|
||||
default="data/",
|
||||
help="Directory with {train, valid, test}.jsonl files",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--lora_layers",
|
||||
type=int,
|
||||
default=16,
|
||||
help="Number of layers to fine-tune",
|
||||
)
|
||||
parser.add_argument("--batch_size", type=int, default=4, help="Minibatch size.")
|
||||
parser.add_argument(
|
||||
"--iters", type=int, default=1000, help="Iterations to train for."
|
||||
@ -59,7 +70,7 @@ def build_parser():
|
||||
parser.add_argument(
|
||||
"--val_batches",
|
||||
type=int,
|
||||
default=100,
|
||||
default=25,
|
||||
help="Number of validation batches, -1 uses the entire validation set.",
|
||||
)
|
||||
parser.add_argument(
|
||||
@ -111,8 +122,15 @@ class Tokenizer:
|
||||
self._sep = "▁"
|
||||
assert self._model.vocab_size() == self._model.get_piece_size()
|
||||
|
||||
def encode(self, s: str) -> List[int]:
|
||||
return [self._model.bos_id(), *self._model.encode(s)]
|
||||
def encode(self, s: str, eos: bool = False) -> List[int]:
|
||||
toks = [self._model.bos_id(), *self._model.encode(s)]
|
||||
if eos:
|
||||
toks.append(self.eos_id)
|
||||
return toks
|
||||
|
||||
@property
|
||||
def eos_id(self) -> int:
|
||||
return self._model.eos_id()
|
||||
|
||||
def decode(self, t: List[int]) -> str:
|
||||
out = self._model.decode(t)
|
||||
@ -125,6 +143,44 @@ class Tokenizer:
|
||||
return self._model.vocab_size()
|
||||
|
||||
|
||||
class Dataset:
|
||||
"""
|
||||
Light-weight wrapper to hold lines from a jsonl file
|
||||
"""
|
||||
|
||||
def __init__(self, path: Path, key: str = "text"):
|
||||
if not path.exists():
|
||||
self._data = None
|
||||
else:
|
||||
with open(path, "r") as fid:
|
||||
self._data = [json.loads(l) for l in fid]
|
||||
self._key = key
|
||||
|
||||
def __getitem__(self, idx: int):
|
||||
return self._data[idx][self._key]
|
||||
|
||||
def __len__(self):
|
||||
return len(self._data)
|
||||
|
||||
|
||||
def load(args):
|
||||
names = ("train", "valid", "test")
|
||||
train, valid, test = (Dataset(Path(args.data) / f"{n}.jsonl") for n in names)
|
||||
if args.train and len(train) == 0:
|
||||
raise ValueError(
|
||||
"Training set not found or empty. Must provide training set for fine-tuning."
|
||||
)
|
||||
if args.train and len(valid) == 0:
|
||||
raise ValueError(
|
||||
"Validation set not found or empty. Must provide validation set for fine-tuning."
|
||||
)
|
||||
if args.test and len(test) == 0:
|
||||
raise ValueError(
|
||||
"Test set not found or empty. Must provide test set for evaluation."
|
||||
)
|
||||
return train, valid, test
|
||||
|
||||
|
||||
def loss(model, inputs, targets, lengths):
|
||||
# Run model on inputs
|
||||
logits, _ = model(inputs)
|
||||
@ -139,24 +195,31 @@ def loss(model, inputs, targets, lengths):
|
||||
return ce, ntoks
|
||||
|
||||
|
||||
def iterate_batches(dset, tokenizer, batch_size, shuffle=True):
|
||||
def iterate_batches(dset, tokenizer, batch_size, train=False):
|
||||
# Shuffle indices
|
||||
indices = np.arange(len(dset))
|
||||
if shuffle:
|
||||
indices = np.random.permutation(indices)
|
||||
while True:
|
||||
indices = np.arange(len(dset))
|
||||
if train:
|
||||
indices = np.random.permutation(indices)
|
||||
|
||||
# Collect batches from dataset
|
||||
for i in range(0, len(indices) - batch_size + 1, batch_size):
|
||||
# Encode batch
|
||||
batch = [tokenizer.encode(dset[indices[i + j]]) for j in range(batch_size)]
|
||||
lengths = [len(x) for x in batch]
|
||||
# Collect batches from dataset
|
||||
for i in range(0, len(indices) - batch_size + 1, batch_size):
|
||||
# Encode batch
|
||||
batch = [
|
||||
tokenizer.encode(dset[indices[i + j]], eos=True)
|
||||
for j in range(batch_size)
|
||||
]
|
||||
lengths = [len(x) for x in batch]
|
||||
|
||||
# Pad to the max length
|
||||
batch_arr = np.zeros((batch_size, max(lengths)), np.int32)
|
||||
for j in range(batch_size):
|
||||
batch_arr[j, : lengths[j]] = batch[j]
|
||||
batch = mx.array(batch_arr)
|
||||
yield batch[:, :-1], batch[:, 1:], mx.array(lengths)
|
||||
# Pad to the max length
|
||||
batch_arr = np.zeros((batch_size, max(lengths)), np.int32)
|
||||
for j in range(batch_size):
|
||||
batch_arr[j, : lengths[j]] = batch[j]
|
||||
batch = mx.array(batch_arr)
|
||||
yield batch[:, :-1], batch[:, 1:], mx.array(lengths)
|
||||
|
||||
if not train:
|
||||
break
|
||||
|
||||
|
||||
def evaluate(model, dataset, loss, tokenizer, batch_size, num_batches):
|
||||
@ -164,7 +227,7 @@ def evaluate(model, dataset, loss, tokenizer, batch_size, num_batches):
|
||||
ntokens = 0
|
||||
for it, batch in zip(
|
||||
range(num_batches),
|
||||
iterate_batches(dataset, tokenizer, batch_size, shuffle=False),
|
||||
iterate_batches(dataset, tokenizer, batch_size),
|
||||
):
|
||||
losses, toks = loss(model, *batch)
|
||||
all_losses.append((losses * toks).item())
|
||||
@ -183,7 +246,8 @@ def train(model, train_set, val_set, optimizer, loss, tokenizer, args):
|
||||
# Main training loop
|
||||
start = time.perf_counter()
|
||||
for it, batch in zip(
|
||||
range(args.iters), iterate_batches(train_set, tokenizer, args.batch_size)
|
||||
range(args.iters),
|
||||
iterate_batches(train_set, tokenizer, args.batch_size, train=True),
|
||||
):
|
||||
# Forward and backward pass
|
||||
(lvalue, toks), grad = loss_value_and_grad(model, *batch)
|
||||
@ -289,7 +353,7 @@ if __name__ == "__main__":
|
||||
|
||||
# Freeze all layers other than LORA linears
|
||||
model.freeze()
|
||||
for l in model.layers[16:32]:
|
||||
for l in model.layers[-args.lora_layers :]:
|
||||
l.attention.wq = LoRALinear.from_linear(l.attention.wq)
|
||||
l.attention.wv = LoRALinear.from_linear(l.attention.wv)
|
||||
|
||||
@ -299,7 +363,7 @@ if __name__ == "__main__":
|
||||
print(f"Trainable parameters {p:.3f}M")
|
||||
|
||||
print("Loading datasets")
|
||||
train_set, valid_set, test_set = wikisql.load()
|
||||
train_set, valid_set, test_set = load(args)
|
||||
|
||||
# Resume training the given adapters.
|
||||
if args.resume_adapter_file is not None:
|
||||
|
Loading…
Reference in New Issue
Block a user